## setup Sys.time() # Sets up width of output options(width = 76) # Loads current version of package # devtools::install_github("giabaio/survHE", ref = "devel") library("survHE") # Set seed for replication set.seed(240920) # Reads the data in & shows a selection of records. data <- read.csv("data.csv") ## show_data # Shows an excerpt of the data rbind(head(data), tail(data)) ## run_mle # Run a selection of models using the fit.models function in survHE # First, defines the vector of models to be used mods <- c("exp", "weibull", "gamma", "lnorm", "llogis", "gengamma") # Then the formula specifying the linear predictor formula <- Surv(time, event) ~ as.factor(arm) # And then runs the models using MLE via flexsurv & shows the elements of the # resulting survHE object m1 <- fit.models(formula = formula, data = data, distr = mods) # Explores the output class(m1) names(m1) names(m1$models) names(m1$models[[1]]) names(m1$models$Exponential) m1$model.fitting m1$method ## run_inla # Needs to define the names of the distribution from the set that INLA can handle # (notice that we can use the abbreviations defined in Table 1) distr.inla <- c("exp", "wei", "llo", "lno") m2 <- fit.models(formula = formula, data = data, distr = distr.inla, method = "inla", control.family = list(weibull = list(prior = "gamma", param = c(.1, .1)), lognormal = list(initial = 0.1))) m2 <- fit.models(formula, data, c("exp", "weibull", "llogis", "lnorm"), method = "inla", control.family = list(exponential = list(), weibull = list(prior = "gamma", param = c(.1,.1)), loglogistic = list(), lognormal = list(initial = 1))) ## run_hmc # NB: it is possible to use the option 'refresh = 0' to prevent rstan from printing the progress indicator m3 <- fit.models(formula, data, distr = mods, "hmc") ## Traceplots rstan::traceplot(m3$models[[2]]) ## ACF rstan::stan_ac(m3$models[[2]]) ## run_hmc2 # This runs the Bayesian model via HMC using the Generalised Gamma and with a specific # setting for the prior of the linear predictor naming convention is consistent with # survHE (as in Table 1) m4 <- fit.models(formula, data, distr = "gengamma", method = "hmc", priors = list(gengamma = list(sigma_beta = rep(5, 2)))) # But can also define priors on a set of models priors <- list(exp = list(sigma_beta = rep(4, 2)), wei = list(mu_beta = rep(2, 2))) m4 <- fit.models(formula, data, distr = mods, method = "hmc", priors = priors) # Or construct a full list of priors & then re-run the models using fit.models priors <- vector("list", 6) # But still needs to name the list (at least for the non-null elements) names(priors) <- mods priors[[6]] <- list(a_sigma = 2, b_sigma = 4) priors <- replicate(5, list()) priors[[6]] <- list(a_sigma = 2, b_sigma = 4) names(priors) <- mods m4 <- fit.models(formula, data, distr = mods, method = "hmc", priors = priors) priors <- replicate(6, list()) names(priors) <- mods priors[[4]] <- list(mu_alpha = 1, sigma_alpha = 4) ## Visualising the results # Use the print method for survHE objects. This uses the first model stored in m1, # by default print(m1) # But can choose to show the fifth model print(m1, mod = 5) # Or use the original flexsurv table instead print(m1, mod = 6, original = TRUE, digits = 3) # And it works just as well for other inferential engines - this is for HMC, # using survHE notation print(m3, 6) # Or the original rstan one print(m3, mod = 2, original = TRUE, digits = 6) ## makeKM_models # Can also plot the survival curves for each object, eg. plot(m1, add.km = TRUE) ## makeKM_models2 # This plots the survival curves for some of the models included in m1 and m3 plot(m1, m3, mods = c(2, 3, 8, 9), colour = c("blue", "green", "red", "yellow"), lab.model = c("Weibull (MLE)", "Gamma (MLE)", "Weibull (HMC)", "Gamma (HMC)"), lab.profile = c("Control","Treatment"), add.km = TRUE) ## add_covariate # With more complex models, things are not as straightforward: for example, we # could add a covariate: m5 <- fit.models(Surv(time, event) ~ as.factor(arm) + as.factor(sex), data = data, distr = "exp") print(m5) ## makeKM_models3 # KM_models3, fig.width=10, fig.height=9, out.width="100%" plot(m5, add.km = TRUE) ## makeKM_models4 # In cases like this, better use 'newdata' and selects only a given profile to plot: m5 <- fit.models(Surv(time, event) ~ as.factor(arm) + age, data = data, distr = "exp") newdata <- list(list(arm = 0, age = mean(data$age)), list(arm = 1, age = mean(data$age))) # KM_model4, fig.width=10, fig.height=9, out.width="100%" plot(m5, newdata = newdata) ## ggplot_customisation p <- plot(m1) p + theme_classic() ## summary # The summary method computes the mean survival time summary(m1) # We need to make sure, however, that the range of times is long enough as to # characterise the full survival curve summary(m1, t = seq(0, 60)) # And we can also include a specific profile to compute the mean summary(m1, t = seq(0, 60), newdata = list(list(arm = 1))) ## makemodel_fit_1 # Another important issue is to do model fitting - we can use survHE # function 'model.fit.plot' model.fit.plot(m1) ## makemodel_fit_2 # Selecting the type of statistic ("aic" vs "bic" vs "dic") model.fit.plot(m1, type = "bic") ## makemodel_fit_3 # And we can fully customise the plot, mixing different objects and selecting # which models we want. model.fit.plot(m1, m3, mods = c(1, 2, 3, 7, 8, 9), models = c("Exponential (MLE)", "Weibull (MLE)", "Gamma (MLE)", "Exponential (HMC)", "Weibull (HMC)", "Gamma (HMC)")) ## makemodel_fit_4 model.fit.plot(MLE = m1, HMC = m3, stacked = TRUE, mods = c(1, 2, 3, 7, 8, 9), name_legend = "Inferential method") ## makemodel_fit_5 model.fit.plot(MLE = m1, HMC = m3, stacked = TRUE, mods = c(1, 2, 3, 7, 8, 9), type = "dic") ## psa # PSA - that's crucial to economic evaluation and survHE as well. We can use the # function 'make.surv' psa <- make.surv(fit = m3, nsim = 1000, t = seq(.1, 63)) # Show its elements names(psa) ## makepsa_plot # And then visualise the results psa.plot(psa) ## makepsa_plot2 # And fully customise the call to this function psa.plot(psa, xlab = "Extrapolated time", ylab = "Estimation of the survival curves", alpha = 0.2, col = c("dark grey", "black"), main = "PSA to survival curves", cex.txt = .95) ## makepsa_plot3 # Can also use the standard "plot" function to get the same picture... plot(m3, mods = 1, nsim = 1000, t = seq(.1, 63)) ## write_table # And finally write the results to an Excel file write.surv(psa, file = "temp.xlsx") ## rps # We can also do "advanced" models, ie Royston-Parmar splines & Poly-Weibull # RPS is available under MLE & Bayesian HMC modelling: formula <- Surv(time, event) ~ as.factor(arm) m6 <- fit.models(formula = formula, data = data, distr = "rps", k = 2) m7 <- fit.models(formula = formula, data = data, distr = "rps", k = 2, method = "hmc") # We can use the methods print, plot and summary, eg print(m6) print(m7) ## poly-weibull # For the Poly-Weibull, we need to specify a list of formulae before running the # Bayesian/HMC model formula.pw <- list(Surv(time, event) ~ 1, Surv(time, event) ~ as.factor(arm)) m8 <- poly.weibull(formula.pw, data, cores = 4) # But the results aren't great... print(m8) # And possibly use more advanced options in the background call to rstan m9 <- poly.weibull(formula.pw, data, cores = 4, iter = 4000, control = list(adapt_delta = .95, stepsize = .005, max_treedepth = 100) ) # Now the results are better print(m9) ## digitising # Digitising --- based on some input txt files with data obtained using DigitizeIt surv_inp <- "survival.txt" nrisk_inp <- "nrisk.txt" km_out <- "KMdata.txt" ipd_out <- "IPDdata.txt" digitise(surv_inp = surv_inp, nrisk_inp = nrisk_inp, km_output = km_out, ipd_output = ipd_out) ## makeIPD # Can use the output from 'digitise' to create pseudo-data data <- make.ipd(list("IPDdata.txt"), ctr = 1, var.labs = c("time", "event", "arm")) ## final sessionInfo()