## Example analyses from Dynamic Treatment Regimen Estimation via ## Regression-Based Techniques: Introducing R Package DTRreg by ## Wallace et al. library("DTRreg") # Section 4.1: single run of a 2-stage dWOLS analysis using IPTW # weights set.seed(1) # expit function expit <- function(x) 1/(1 + exp(-x)) # sample size n <- 10000 # variables (X = patient information, A = treatment) X1 <- rnorm(n) A1 <- rbinom(n, 1, expit(X1)) X2 <- rnorm(n) A2 <- rbinom(n, 1, expit(X2)) # blip functions gamma1 <- A1 * (1 + X1) gamma2 <- A2 * (1 + X2) # observed outcome: treatment-free outcome plus blip functions Y <- exp(X1) + exp(X2) + gamma1 + gamma2 + rnorm(n) # models to be passed to DTRreg # blip model blip.mod <- list(~ X1, ~ X2) # treatment model (correctly specified) treat.mod <- list(A1~ X1, A2~ X2) # treatment-free model (incorrectly specified) tf.mod <- list(~ X1, ~ X2) # weight function (IPTW) weight.fun <- function(w) 1/w # analysis example1a <- DTRreg(Y, blip.mod, treat.mod, tf.mod, method = "dwols", weight = weight.fun, var.estim = "bootstrap") example1a # example 1b: 2-stage binary treatment via G-estimation, dWOLS, and # Q-learning with both, one, or neither treatment and treatment-free # model correctly specified simfun1 <- function(n = 10000, t = 1, tf = 1, method = 1) { # data setup # expit function expit <- function(x) 1/(1 + exp(-x)) # variables (X = patient information, A = treatment) X1 <- rnorm(n) A1 <- rbinom(n, 1, expit(X1)) X2 <- rnorm(n) A2 <- rbinom(n, 1, expit(X2)) # exponents to be used later eX1 <- exp(X1) eX2 <- exp(X2) # blip functions gamma1 <- A1 * (1 + X1) gamma2 <- A2 * (1 + X2) # observed outcome: treatment-free outcome plus each blip Y <- rnorm(n, exp(X1) + exp(X2) + gamma1 + gamma2, 1) # analysis # models to be passed to dWOLS # blip model bl.mod <- list(~ X1, ~ X2) # treatment model (correct if t = 1) if (t == 1) { tr.mod <- list(A1 ~ X1, A2 ~ X2) } else { tr.mod <- list(A1 ~ 1, A2 ~ 1) } # treatment free model (correct if tf = 1) if (tf == 1) { trf.mod <- list(~ eX1, ~ eX2) } else { trf.mod <- list(~ X1, ~ X2) } # perform dynamic WOLS weight.fun <- function(w) 1/w if (method == 1) { mod1 <- DTRreg(Y, bl.mod, tr.mod, trf.mod, method = "gest", weight = weight.fun, var.estim = "none") } if (method == 2) { mod1 <- DTRreg(Y, bl.mod, tr.mod, trf.mod, method = "dwols", weight = weight.fun, var.estim = "none") } if (method == 3) { mod1 <- DTRreg(Y, bl.mod, tr.mod, trf.mod, method = "qlearn", weight = weight.fun, var.estim = "none") } # output parameter estimates c(mod1$psi[[1]], mod1$psi[[2]]) } # simulation runs: run <- 1000 n <- 10000 # for storage psi.yy.g <- psi.yn.g <- psi.ny.g <- psi.nn.g <- matrix(nrow = run, ncol = 4) psi.yy.d <- psi.yn.d <- psi.ny.d <- psi.nn.d <- matrix(nrow = run, ncol = 4) psi.y.q <- psi.n.q <- matrix(nrow = run, ncol = 4) # (note that Q-learning does not depend on choice of treatment model) for (i in 1:run) { if (i %% 100 == 0) { cat(i, "\n") } # reset seed for each analysis within each run set.seed(i) # both incorrect psi.nn.g[i, ] <- simfun1(n, t = 0, tf = 0, method = 1) psi.nn.d[i, ] <- simfun1(n, t = 0, tf = 0, method = 2) psi.n.q[i, ] <- simfun1(n, t = 0, tf = 0, method = 3) set.seed(i) # treatment incorrect, treatment-free correct psi.ny.g[i, ] <- simfun1(n, t = 0, tf = 1, method = 1) psi.ny.d[i, ] <- simfun1(n, t = 0, tf = 1, method = 2) set.seed(i) # treatment correct, treatment-free incorrect psi.yn.g[i, ] <- simfun1(n, t = 1, tf = 0, method = 1) psi.yn.d[i, ] <- simfun1(n, t = 1, tf = 0, method = 2) set.seed(i) # both correct psi.yy.g[i, ] <- simfun1(n, t = 1, tf = 1, method = 1) psi.yy.d[i, ] <- simfun1(n, t = 1, tf = 1, method = 2) psi.y.q[i, ] <- simfun1(n, t = 1, tf = 1, method = 3) } # return table of mean estimates for each method and set-up example1b <- rbind(colMeans(psi.yy.g), colMeans(psi.yn.g), colMeans(psi.ny.g), colMeans(psi.nn.g), colMeans(psi.yy.d), colMeans(psi.yn.d), colMeans(psi.ny.d), colMeans(psi.nn.d), colMeans(psi.y.q), colMeans(psi.n.q)) dimnames(example1b) <- list(paste0(rep(c("G-estimation", "dWOLS", "Q-learning"), c(4, 4, 2)), ":", c(rep(c("Both correct", "Treatment correct", "Treatment-free correct", "Neither correct"), 2), c("Treatment-free correct", "Treatment-free incorrect"))), paste0("psi", c("10", "11", "20", "21"))) round(example1b, digits = 3) # example 1c: diagnostic plots with treatment-free incorrectly, and # correctly, specified set.seed(1) # expit function expit <- function(x) 1/(1 + exp(-x)) # sample size n <- 1000 # variables (X = patient information, A = treatment) X1 <- rnorm(n) A1 <- rbinom(n, 1, expit(X1)) X2 <- rnorm(n) A2 <- rbinom(n, 1, expit(X2)) # blip functions gamma1 <- A1 * (1 + X1) gamma2 <- A2 * (1 + X2) # observed outcome: treatment-free outcome plus blip functions Y <- exp(X1) + exp(X2) + gamma1 + gamma2 + rnorm(n) # models to be passed to DTRreg # blip model blip.mod <- list(~ X1, ~ X2) # treatment model (correctly specified) treat.mod <- list(A1~ X1, A2~ X2) # treatment-free models (one incorrect, one correct) tf.mod.incorrect <- list(~ X1, ~ X2) eX1 <- exp(X1) eX2 <- exp(X2) tf.mod.correct <- list(~ eX1, ~ eX2) # weight function (IPTW) weight.fun <- function(w) 1/w # analysis example1c.incorrect <- DTRreg(Y, blip.mod, treat.mod, tf.mod.incorrect, method = "dwols", weight = weight.fun) example1c.correct <- DTRreg(Y, blip.mod, treat.mod, tf.mod.correct, method = "dwols", weight = weight.fun) # model plots plot(example1c.incorrect) plot(example1c.correct) ## Section 4.2: 2-stage continuous treatment with quadratic (in A) blip # estimation via G-estimation with bootstrap variance estimation # example 2a: single simulation run set.seed(1) # sample size n <- 10000 # variables (X = patient information, A = treatment) X1 <- rnorm(n) A1 <- rnorm(n, X1, 0.5) X2 <- rnorm(n) A2 <- rnorm(n, X2, 0.5) # blip functions gamma1 <- A1 * (1 + X1 - A1) gamma2 <- A2 * (1 + X2 - A2) # outcome: treatment-free outcome plus blip functions Y <- exp(X1) + exp(X2) + gamma1 + gamma2 + rnorm(n) # models to be passed to DTRreg blip.mod <- list(~ X1 + A1, ~ X2 + A2) treat.mod <- list(A1 ~ X1, A2 ~ X2) tf.mod <- list(~ X1, ~ X1 + A1 + X2) # perform G-estimation example2a <- DTRreg(Y, blip.mod, treat.mod, tf.mod, var.estim = "bootstrap", B = 200, treat.range = c(-5, 5)) example2a # example 2b: multiple runs to demonstrate consistency simfun2 <- function(n = 1000) { # data setup # variables (X = patient information, A = treatment) X1 <- rnorm(n) A1 <- rnorm(n, X1, 0.5) X2 <- rnorm(n) A2 <- rnorm(n, X2, 0.5) # outcome: treatment-free plus blips Y <- exp(X1) + exp(X2) + A1 * (1 + X1 - A1) + A2 * (1 + X2 - A2) + rnorm(n) # analysis # models to be passed to reg.dtr bl.mod <- list(~ X1 + A1, ~ X2 + A2) tr.mod <- list(A1~ X1, A2~ X2) trf.mod <- list(~ X1, ~ X1 + A1 + X2) # perform g-estimation (no var.estim as we're just looking at parameter estimates) mod2 <- DTRreg(Y, bl.mod, tr.mod, trf.mod, var.estim = "none", treat.range = c(-5, 5)) # output blip parameters c(mod2$psi[[1]], mod2$psi[[2]]) } # simulation runs run <- 1000 psi <- matrix(nrow = run, ncol = 6) set.seed(1) for (i in 1:run) { if (i %% 100 == 0) { cat(i, "\n") } psi[i, ] <- simfun2(10000) } round(colMeans(psi), digits = 3) # Section 4.3: PROBIT analysis # read in simulated dataset PROBIT.sim <- read.csv("Simulated_PROBIT_data.csv") # analysis # models to be passed to DTRreg bl.mod <- list(~ Wgt0, ~ Wgt3 + Infc + Hosp) tr.mod <- list( Brf1 ~ Intervention + factor(Educ) + Age + BF.prev + BF.not + Sex, Brf2 ~ Brf1 + Intervention + factor(Educ) + Age + BF.prev + BF.not + Sex) trf.mod <- list( ~ Intervention + factor(Location) + factor(Educ) + Smoke + Allergy + Age + I(Age^2) + BF.prev + BF.not + Cesarean + Sex + Wgt0, ~ Intervention + factor(Location) + factor(Educ) + Smoke + Allergy + Age + I(Age^2) + BF.prev + BF.not + Cesarean + Sex + Infc + Hosp + Wgt0 + Brf1 + Wgt3) # perform G-estimation verb.mod <- DTRreg(verbal.iq, bl.mod, tr.mod, trf.mod, data = PROBIT.sim, method = "gest", var.estim = "sandwich") verb.mod