### Simulation study -- Appendix
library("survival")
library("eventglm")

library("data.table")
library("ggplot2")
library("xtable")

### run simulations

source("appendix-functions.R")

setup <- rbind(expand.grid(nn = c(500),
                           cens.rate = c(.5, .8),
                           b2.cens = c(.1),
                           b3.cens = c(.1),
                           b4.cens = c(.05),
                           gamma.cens = c(1,.5),
                           link = c("identity"),
                           scenario = c("0", "3", "4"),
                           stringsAsFactors = FALSE),
               expand.grid(nn = c(500),
                           cens.rate = c(.5, .8),
                           b2.cens = c(.1),
                           b3.cens = c(0),
                           b4.cens = c(0),
                           gamma.cens = c(1,.5),
                           link = c("identity"),
                           scenario = c("0", "3", "4"),
                           stringsAsFactors = FALSE),
               expand.grid(nn = c(500),
                           cens.rate = c(.5, .8),
                           b2.cens = c(0),
                           b3.cens = c(0),
                           b4.cens = c(0),
                           gamma.cens = c(1),
                           link = c("identity"),
                           scenario = c("0", "3", "4"),
                           stringsAsFactors = FALSE)
)

library("parallel")
cl <- makeCluster(8)
clusterSetRNGStream(cl, iseed = 20201013)

init <- clusterEvalQ(cl, {

  library("survival")
  library("eventglm")

  source("appendix-functions.R")

  setup <- rbind(expand.grid(nn = c(500),
                             cens.rate = c(.5, .8),
                             b2.cens = c(.1),
                             b3.cens = c(.1),
                             b4.cens = c(.05),
                             gamma.cens = c(1,2),
                             link = c("identity"),
                             scenario = c("0", "3", "4"),
                             stringsAsFactors = FALSE),
                 expand.grid(nn = c(500),
                             cens.rate = c(.5, .8),
                             b2.cens = c(.1),
                             b3.cens = c(0),
                             b4.cens = c(0),
                             gamma.cens = c(1,2),
                             link = c("identity"),
                             scenario = c("0", "3", "4"),
                             stringsAsFactors = FALSE),
                 expand.grid(nn = c(500),
                             cens.rate = c(.5, .8),
                             b2.cens = c(0),
                             b3.cens = c(0),
                             b4.cens = c(0),
                             gamma.cens = c(1),
                             link = c("identity"),
                             scenario = c("0", "3", "4"),
                             stringsAsFactors = FALSE)
  )


})

## WARNING: This takes a long time to run. Example results are already included in the tests subdirectory

for(j in 1:nrow(setup)) {

  clusterExport(cl, "j")
  res <- do.call(rbind, clusterApply(cl, 1:1000, function(i){

    inres <- tryCatch(simulate_data(setup$nn[j], scenario = setup$scenario[j],
                                    beta.cens = c(setup$b2.cens[j],setup$b3.cens[j],setup$b4.cens[j]),
                                    cens.rate = setup$cens.rate[j], gamma.cens = setup$gamma.cens[j],
                                    link = setup$link[j]),
                      error = function(e) {
                        data.frame(estimate = NA, std.err.rob = NA, std.err.cor = NA,
                                   std.err.nai = NA,
                                   parameter = NA, method = "failed", cmod = "fail",
                                   n = setup$nn[j],
                                   cens.rate = setup$cens.rate[j], scenario = setup$scenario[j],
                                   link = setup$link[j],
                                   beta.cens = paste(c(setup$b2.cens[j],setup$b3.cens[j],setup$b4.cens[j]),
                                                     collapse = "-"),
                                   gamma.cens = setup$gamma.cens[j])
                      })

    inres$replicate <- i
    inres

  }))

  if(j == 1) {
    write.csv(res, "sims-all-type-2b.csv", row.names = FALSE)
  } else {
    write.table(res, "sims-all-type-2b.csv", row.names = FALSE, append = TRUE,
                col.names = FALSE, sep = ",")
  }
}

stopCluster(cl)

set.seed(20201013)
true.values <- NULL
for(li in c("identity")) {
  for(sc in c( "3", "4")){

    tv <- true_values(scenario = sc, link = li,
                      nchunk = 500, chunks = 100)

    true.values <- rbind(true.values,
                         data.frame(scenario = sc, link = li,
                                    tv.cuminc = tv$ci.coef[2],
                                    tv.rmean = tv$rmean.coef[2]))
  }}


write.csv(true.values, "true-coefficients.csv")

### analysis of simulation results

sim2 <- data.table(read.csv("sims-all-type-2b.csv"))
truev <- data.table(read.csv("true-coefficients.csv"))
truev <- rbind(truev, data.table(X = "tr3", scenario = 0, link = "identity",
                                 tv.cuminc = 0, tv.rmean = 0))


sim2 <- merge(sim2,
              rbind(truev[, .(scenario, link, trueval = tv.cuminc, parameter = "ci")],
                    truev[, .(scenario, link, trueval = tv.rmean, parameter = "rmean")]),
              by = c("parameter", "link", "scenario"))

sim2[, method := factor(method, levels = rev(c("jack", "strat", "ipcwcoxph", "ipcwaalen")),
                        ordered = TRUE)]


## bias

sim2[, bias := ifelse(scenario == 0, (estimate - trueval), (estimate - trueval) / trueval)]

res2 <- sim2[!is.na(estimate), .(mest = median(bias),
                                 lower = quantile(bias, .05) ,
                                 upper = quantile(bias, .95)),
             by = .(scenario, n, parameter, cens.rate, beta.cens, gamma.cens, link, method, cmod)]



sim2[, emp.sd := sd(estimate, na.rm = TRUE),
     by = .(parameter, method, cmod, n, cens.rate, scenario, link, beta.cens, gamma.cens)]

empsd2 <- sim2[, .(emp.sd = sd(estimate, na.rm = TRUE)),
               by = .(parameter, method, cmod, n, cens.rate, scenario, link, beta.cens, gamma.cens)]


empsd2$method <- factor(empsd2$method, levels = (c("jack", "strat", "ipcwcoxph", "ipcwaalen")),
                        ordered = TRUE)



## coverage
covertrue <- function(est, se, true) {

  (true >= est - 1.96 * se) &
    (true <= est + 1.96 * se)

}

emperr <- sim2[, .(estvar = c(median((std.err.rob - emp.sd) / emp.sd, na.rm = TRUE),
                              median((std.err.cor - emp.sd) / emp.sd, na.rm = TRUE),
                              median((std.err.nai - emp.sd) / emp.sd)),
                   estvar.low = c(quantile((std.err.rob - emp.sd) / emp.sd, .05, na.rm = TRUE),
                                  quantile((std.err.cor - emp.sd) / emp.sd, .05, na.rm = TRUE),
                                  quantile((std.err.nai - emp.sd) / emp.sd, .05)),
                   estvar.high = c(quantile((std.err.rob - emp.sd) / emp.sd, .95, na.rm = TRUE),
                                   quantile((std.err.cor - emp.sd) / emp.sd, .95, na.rm = TRUE),
                                   quantile((std.err.nai - emp.sd) / emp.sd, .95)),
                   coverage = c(mean(covertrue(estimate, std.err.rob, trueval), na.rm = TRUE),
                                mean(covertrue(estimate, std.err.cor, trueval), na.rm = TRUE),
                                mean(covertrue(estimate, std.err.nai, trueval))),
                   varmethod = c("robust", "corrected", "naive")),
               by = .(parameter, method, cmod, n, cens.rate, scenario, link, beta.cens, gamma.cens)]



table(res2$beta.cens)


type1 <- sim2[scenario == "0", .(type1err = c(
  mean(2 * pnorm(-abs(estimate / std.err.rob)) < .05, na.rm = TRUE),
  mean(2 * pnorm(-abs(estimate / std.err.cor)) < .05, na.rm = TRUE),
  mean(2 * pnorm(-abs(estimate / std.err.nai)) < .05, na.rm = TRUE)),
  se.method = c("robust", "corrected", "naive")
), .(parameter, link, method, cmod, cens.rate, beta.cens, gamma.cens)]


resraw <- merge(res2[parameter == "ci" & gamma.cens == 2 & scenario != "0" &
                       method %in% c("strat", "jack", "ipcwaalen", "ipcwcoxph") &
                       cmod %in% c("reg", "twohaj")],
                empsd2[parameter == "ci" & gamma.cens == 2 & scenario != "0" &
                         method %in% c("strat", "jack", "ipcwaalen", "ipcwcoxph") &
                         cmod %in% c("reg", "twohaj")])

resraw[, mary := sprintf("%.3f (%.3f)", mest, emp.sd)]
resraw$mary <- ifelse(substr(resraw$mary, 1, 1) == "-",
                      gsub("-", "$-$", resraw$mary, fixed = TRUE),
                      paste0("\\phantom{$-$}", resraw$mary))

res2base <- dcast(resraw,
                  scenario + cens.rate + beta.cens  ~ method + cmod, value.var = "mary")

res2base$scenario <- ifelse(res2base$scenario == "3", "moderate", "large")
res2base$beta.cens <- sapply(strsplit(res2base$beta.cens, "-"),
                             function(x) {
                               sprintf("$(%s, %s, %s)$", x[1], x[2], x[3])
                             })
colnames(res2base)[c(1:3, 7,6,4,5)] <- c("Coeff." , "Cens.\\ rate",
                        "\\code{beta.cens}" , "Independent" , "Stratified" , "\\code{ipcw.aalen}" , "\\code{ipcw.coxph}")

print(xtable(res2base[, c(1:3, 7,6,4,5)]),
      include.rownames = FALSE, digits = 3, sanitize.text.function = function(x) {x})

res2raw <- merge(res2[parameter == "ci" &  scenario != "0" &
                        beta.cens == "0.1-0.1-0.05" &
                        method %in% c("ipcwaalen", "ipcwcoxph") &
                        cmod %in% c("twobin", "twohaj")],
                 empsd2[parameter == "ci" &  scenario != "0" &
                          beta.cens == "0.1-0.1-0.05" &
                          method %in% c("ipcwaalen", "ipcwcoxph") &
                          cmod %in% c("twobin", "twohaj")])

res2raw[, mary := sprintf("%.3f (%.3f)", mest, emp.sd)]
res2raw$mary <- ifelse(substr(res2raw$mary, 1, 1) == "-",
                       gsub("-", "$-$", res2raw$mary, fixed = TRUE),
                      paste0("\\phantom{$-$}", res2raw$mary))

res22 <- dcast(res2raw,
               scenario +  gamma.cens + cens.rate +cmod  ~ method , value.var = "mary")

res22$scenario <- ifelse(res22$scenario == "3", "moderate", "large")
res22$gamma.cens <- ifelse(res22$gamma.cens == 1, "PH", "nonPH")
res22$cmod <- ifelse(res22$cmod == "twobin", "Binder", "Hajek")

print(xtable(res22),
      include.rownames = FALSE, digits = 3,
      sanitize.text.function = function(x) x)


res3 <- emperr[
  beta.cens == "0.1-0-0"  &
    method %in% c("jack")]
res3[, mary := sprintf("%.2f (%.2f)", estvar, coverage)]
res3$mary <- ifelse(res3$mary == "NA (NaN)", "\\phantom{$-$}---",
                    ifelse(substr(res3$mary, 1, 1) == "-",
                           gsub("-", "$-$", res3$mary, fixed = TRUE),
                       paste0("\\phantom{$-$}", res3$mary)))


res33 <- dcast(res3,
               parameter + scenario +  gamma.cens + cens.rate  ~ varmethod , value.var = "mary")

res33$scenario <- ifelse(res33$scenario == "0" , "null",
                         ifelse(res33$scenario == "3",  "moderate", "large"))
res33$gamma.cens <- ifelse(res33$gamma.cens == 1, "PH", "nonPH")

print(xtable(res33[, -1]), include.rownames = FALSE, sanitize.text.function = function(x) x)
