## setup
Sys.time()

# Sets up width of output
options(width = 76)

# Loads current version of package
# devtools::install_github("giabaio/survHE", ref = "devel")
library("survHE")

# Set seed for replication
set.seed(240920)

# Reads the data in & shows a selection of records. 
data <- read.csv("data.csv")


## show_data
# Shows an excerpt of the data
rbind(head(data), tail(data))


## run_mle
# Run a selection of models using the fit.models function in survHE
# First, defines the vector of models to be used
mods <- c("exp", "weibull", "gamma", "lnorm", "llogis", "gengamma") 
# Then the formula specifying the linear predictor
formula <- Surv(time, event) ~ as.factor(arm)
# And then runs the models using MLE via flexsurv & shows the elements of the
# resulting survHE object
m1 <- fit.models(formula = formula, data = data, distr = mods)
# Explores the output
class(m1)
names(m1)
names(m1$models)
names(m1$models[[1]])
names(m1$models$Exponential)
m1$model.fitting
m1$method


## run_inla
# Needs to define the names of the distribution from the set that INLA can handle
# (notice that we can use the abbreviations defined in Table 1)
distr.inla <- c("exp", "wei", "llo", "lno")
m2 <- fit.models(formula = formula, data = data, distr = distr.inla, method = "inla",
                 control.family = list(weibull = list(prior = "gamma", param = c(.1, .1)),
                                       lognormal = list(initial = 0.1)))

m2 <- fit.models(formula, data, c("exp", "weibull", "llogis", "lnorm"), method = "inla",
                 control.family = list(exponential = list(),
                                       weibull = list(prior = "gamma", param = c(.1,.1)),
                                       loglogistic = list(), 
                                       lognormal = list(initial = 1)))

## run_hmc
# NB: it is possible to use the option 'refresh = 0' to prevent rstan from printing the progress indicator 
m3 <- fit.models(formula, data, distr = mods, "hmc")


## Traceplots
rstan::traceplot(m3$models[[2]])


## ACF
rstan::stan_ac(m3$models[[2]])


## run_hmc2
# This runs the Bayesian model via HMC using the Generalised Gamma and with a specific 
# setting for the prior of the linear predictor naming convention is consistent with 
# survHE (as in Table 1)
m4 <- fit.models(formula, data, distr = "gengamma", method = "hmc", 
                 priors = list(gengamma = list(sigma_beta = rep(5, 2))))

# But can also define priors on a set of models
priors <- list(exp = list(sigma_beta = rep(4, 2)), wei = list(mu_beta = rep(2, 2)))
m4 <- fit.models(formula, data, distr = mods, method = "hmc", priors = priors)
# Or construct a full list of priors & then re-run the models using fit.models
priors <- vector("list", 6)
# But still needs to name the list (at least for the non-null elements)
names(priors) <- mods
priors[[6]] <- list(a_sigma = 2, b_sigma = 4)

priors <- replicate(5, list())
priors[[6]] <- list(a_sigma = 2, b_sigma = 4)
names(priors) <- mods

m4 <- fit.models(formula, data, distr = mods, method = "hmc", priors = priors)

priors <- replicate(6, list())
names(priors) <- mods
priors[[4]] <- list(mu_alpha = 1, sigma_alpha = 4)

## Visualising the results
# Use the print method for survHE objects. This uses the first model stored in m1, 
# by default
print(m1)
# But can choose to show the fifth model
print(m1, mod = 5)
# Or use the original flexsurv table instead
print(m1, mod = 6, original = TRUE, digits = 3)
# And it works just as well for other inferential engines - this is for HMC, 
# using survHE notation
print(m3, 6)
# Or the original rstan one
print(m3, mod = 2, original = TRUE, digits = 6)


## makeKM_models
# Can also plot the survival curves for each object, eg.
plot(m1, add.km = TRUE)


## makeKM_models2
# This plots the survival curves for some of the models included in m1 and m3
plot(m1, m3, mods = c(2, 3, 8, 9), colour = c("blue", "green", "red", "yellow"), 
     lab.model = c("Weibull (MLE)", "Gamma (MLE)", "Weibull (HMC)", "Gamma (HMC)"),
     lab.profile = c("Control","Treatment"),
     add.km = TRUE)


## add_covariate
# With more complex models, things are not as straightforward: for example, we 
# could add a covariate:
m5 <- fit.models(Surv(time, event) ~ as.factor(arm) + as.factor(sex), 
                 data = data, distr = "exp")
print(m5)


## makeKM_models3
# KM_models3, fig.width=10, fig.height=9, out.width="100%"
plot(m5, add.km = TRUE)


## makeKM_models4
# In cases like this, better use 'newdata' and selects only a given profile to plot:
m5 <- fit.models(Surv(time, event) ~ as.factor(arm) + age,
                    data = data, distr = "exp")
newdata <- list(list(arm = 0, age = mean(data$age)), list(arm = 1, age = mean(data$age)))
# KM_model4, fig.width=10, fig.height=9, out.width="100%"
plot(m5, newdata = newdata)

## ggplot_customisation
p <- plot(m1)
p + theme_classic()

## summary
# The summary method computes the mean survival time
summary(m1)
# We need to make sure, however, that the range of times is long enough as to 
# characterise the full survival curve
summary(m1, t = seq(0, 60))
# And we can also include a specific profile to compute the mean
summary(m1, t = seq(0, 60), newdata = list(list(arm = 1)))


## makemodel_fit_1
# Another important issue is to do model fitting - we can use survHE 
# function 'model.fit.plot'
model.fit.plot(m1)


## makemodel_fit_2
# Selecting the type of statistic ("aic" vs "bic" vs "dic")
model.fit.plot(m1, type = "bic")


## makemodel_fit_3
# And we can fully customise the plot, mixing different objects and selecting 
# which models we want. 
model.fit.plot(m1, m3, mods = c(1, 2, 3, 7, 8, 9),
               models = c("Exponential (MLE)", "Weibull (MLE)", "Gamma (MLE)", 
                         "Exponential (HMC)", "Weibull (HMC)", "Gamma (HMC)"))


## makemodel_fit_4
model.fit.plot(MLE = m1, HMC = m3, stacked = TRUE, mods = c(1, 2, 3, 7, 8, 9),
               name_legend = "Inferential method")


## makemodel_fit_5
model.fit.plot(MLE = m1, HMC = m3, stacked = TRUE, mods = c(1, 2, 3, 7, 8, 9), type = "dic")


## psa
# PSA - that's crucial to economic evaluation and survHE as well. We can use the 
# function 'make.surv'
psa <- make.surv(fit = m3, nsim = 1000, t = seq(.1, 63))
# Show its elements
names(psa)


## makepsa_plot
# And then visualise the results
psa.plot(psa)


## makepsa_plot2
# And fully customise the call to this function
psa.plot(psa, xlab = "Extrapolated time", ylab = "Estimation of the survival curves", 
         alpha = 0.2, col = c("dark grey", "black"), main = "PSA to survival curves",
         cex.txt = .95)


## makepsa_plot3
# Can also use the standard "plot" function to get the same picture...
plot(m3, mods = 1, nsim = 1000, t = seq(.1, 63))


## write_table
# And finally write the results to an Excel file
write.surv(psa, file = "temp.xlsx")


## rps
# We can also do "advanced" models, ie Royston-Parmar splines & Poly-Weibull
# RPS is available under MLE & Bayesian HMC modelling:
formula <- Surv(time, event) ~ as.factor(arm)
m6 <- fit.models(formula = formula, data = data, distr = "rps", k = 2)
m7 <- fit.models(formula = formula, data = data, distr = "rps", k = 2, method = "hmc")
# We can use the methods print, plot and summary, eg
print(m6)
print(m7)


## poly-weibull
# For the Poly-Weibull, we need to specify a list of formulae before running the 
# Bayesian/HMC model
formula.pw <- list(Surv(time, event) ~ 1, Surv(time, event) ~ as.factor(arm))
m8  <- poly.weibull(formula.pw, data, cores = 4)
# But the results aren't great...
print(m8)
# And possibly use more advanced options in the background call to rstan
m9 <- poly.weibull(formula.pw, data, cores = 4, iter = 4000,
                   control = list(adapt_delta = .95, stepsize = .005, max_treedepth = 100)
)
# Now the results are better
print(m9)


## digitising
# Digitising --- based on some input txt files with data obtained using DigitizeIt
surv_inp <- "survival.txt"
nrisk_inp <- "nrisk.txt"
km_out <- "KMdata.txt"
ipd_out <- "IPDdata.txt"
digitise(surv_inp = surv_inp, nrisk_inp = nrisk_inp, 
         km_output = km_out, ipd_output = ipd_out)


## makeIPD
# Can use the output from 'digitise' to create pseudo-data
data <- make.ipd(list("IPDdata.txt"), ctr = 1, 
                 var.labs = c("time", "event", "arm"))

## final
sessionInfo()