## Run simulation.R before to obtain the results of the simulation
## study which are save in file "subgroup_id_sim_no_rct_results.rda"
## and loaded in this script.

## sim_data_1
## 1st block
library("personalized")

# use previous PRNG
RNGkind(sample.kind = "Rounding")

set.seed(123)
n.obs <- 1000
n.vars <- 50
x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars)

# simulate non-randomized treatment assignment
xbetat <- 0.5 + 0.25 * x[, 21] - 0.25 * x[, 41]
trt.prob <- plogis(xbetat)
trt <- rbinom(n.obs, 1, prob = trt.prob)

# simulate Delta(x)
delta <- (0.5 + x[, 2] - 0.5 * x[, 3] - 1 * x[, 11] + 1 * x[, 1] * x[, 12])

# simulate main effects g(X)
xbeta <- x[, 1] + x[, 11] - 2 * x[, 12] ** 2 + x[, 13] + 0.5 * x[, 15] ** 2

# add both main effects and interactions
xbeta <- xbeta + delta * (2 * trt - 1)


trt <- as.factor(ifelse(trt == 1, "Trt", "Ctrl"))

# simulate continuous outcomes
y <- xbeta + rnorm(n.obs)

# simulate binary outcomes
y.binary <- 1 * (xbeta + rnorm(n.obs, sd = 2) > 0)

# create time-to-event outcomes
surv.time <- exp(-20 - xbeta + rnorm(n.obs, sd = 1))
cens.time <- exp(rnorm(n.obs, sd = 3))
y.time.to.event <- pmin(surv.time, cens.time)
status <- 1 * (surv.time <= cens.time)

## change_basesize
theme_set(theme_grey(base_size = 9))

## propens_func_example_glm
## 2nd block
propensity.func <- function(x, trt) {
    # save data in a data.frame
    data.fr <- data.frame(trt = trt, x)

    # fit propensity score model using binomial GLM
    propensity.model <- glm(trt ~ ., family = binomial(), data = data.fr)

    # create estimated probabilities
    pi.x <- predict(propensity.model, type = "response")
    return(pi.x)
}

propensity.func(x, trt)[101:105]
trt[101:105]

## plot_overlap
## 3rd block
check.overlap(x, trt, propensity.func)

## const_propensity
## 4th block
constant.propensity.func <- function(x, trt) 0.5

## fit_model2
## 5th block
subgrp.model <- fit.subgroup(x = x, y = y, trt = trt,
    propensity.func = propensity.func,
    method = "a_learning",
    loss   = "sq_loss_lasso",
    nfolds = 10) # option for cv.glmnet

summary(subgrp.model)

## create_propensity
## 6th block
# create function for fitting propensity score model
propensity.func.lasso <- function(x, trt) {
    # fit propensity score model
    # with binomial model and lasso penalty.
    # tuning parameter selected by 10-fold CV
    propens.model <- cv.glmnet(y = trt, x = x,
        family = "binomial")
    pi.x <- predict(propens.model, s = "lambda.min",
        newx = x, type = "response")[, 1]
    pi.x
}

## propens_func_example_const
## 7th block
propensity.func.const <- function(x, trt) 0.5

## multinom_propens
## 8th block
propensity.func.multinom <- function(x, trt) {
    require("nnet")
    df <- data.frame(trt = trt, x)
    mfit <- nnet::multinom(trt ~ . -trt, data = df)
    # predict returns a matrix of probabilities:
    # one column for each treatment level
    propens <- predict(mfit, type = "probs")

    if (is.factor(trt)) {
        values <- levels(trt)[trt]
    } else {
        values <- trt
    }
    # return the probability corresponding to the
    # treatment that was observed
    probs <- propens[cbind(1:nrow(propens),
        match(values, colnames(propens)))]
    probs
}

## multinom_propens2
## 9th block
propensity.func.multinom <- function(x, trt) {
    require("nnet")
    df <- data.frame(trt = trt, x)
    mfit <- multinom(trt ~ . -trt, data = df)
    # predict returns a matrix of probabilities:
    # one column for each treatment level
    propens <- predict(mfit, type = "probs")

    if (is.factor(trt)) {
        levels <- levels(trt)
    } else {
        levels <- sort(unique(trt))
    }
    # return the probability corresponding to the
    # treatment that was observed
    probs <- propens[,match(levels, colnames(propens))]
    probs
}

## efficiency_func_example_b
## 10th block
augment.func.simple <- function(x, y) {
    cvmod <- cv.glmnet(y = y, x = x, nfolds = 10)
    predictions <- predict(cvmod, newx = x, s = "lambda.min")
    predictions
}

## efficiency_func_example_cc
## 11th block
augment.func <- function(x, y, trt) {
    data <- data.frame(x, y, trt = ifelse(trt == "Trt", 1, -1))
    xm <- model.matrix(y~trt*x-1, data = data)

    cvmod <- cv.glmnet(y = y, x = xm)
    ## get predictions when trt = 1
    data$trt <- 1
    xm1 <- model.matrix(y~trt*x-1, data = data)
    preds_1  <- predict(cvmod, xm1, s = "lambda.min")

    ## get predictions when trt = -1
    data$trt <- -1
    xm2 <- model.matrix(y~trt*x-1, data = data)
    preds_n1  <- predict(cvmod, xm2, s = "lambda.min")

    ## return predictions averaged over trt
    return(0.5 * (preds_1 + preds_n1))
}

## efficiency_func_example_binary
## 12th block
augment.func.bin <- function(x, y) {
    cvmod <- cv.glmnet(y = y, x = x, family = "binomial")
    predict(cvmod, newx = x, s = "lambda.min", type = "link")
}

## efficiency_ex
## 13th block
subgrp.model.eff <- fit.subgroup(x = x, y = y, trt = trt,
    propensity.func = propensity.func,
    loss = "sq_loss_lasso",
    augment.func = augment.func,
    nfolds = 10) # option for cv.glmnet

summary(subgrp.model.eff)

## expolosscustom
## 14th block
fit.expo.loss <- function(x, y, weights, ...) {
    ## define loss
    expo.loss <- function(beta, x, y, weights) {
        sum(weights * y * exp(-drop(x %*% beta)))
    }

    ## use optim() to minimize loss function
    opt <- optim(rep(0, NCOL(x)), fn = expo.loss,
        x = x, y = y, weights = weights)

    coefs <- opt$par

    ## define prediction function which
    ## inputs a design matrix
    ## and returns benefit scores
    pred <- function(x, type = "response") {
        cbind(1, x) %*% coefs
    }

    # return list of required components
    list(predict = pred, model = opt, coefficients = coefs)
}

## fit_model_gbm
## 15th block
subgrp.model.gbm <- fit.subgroup(x = x, y = y, trt = trt,
    propensity.func = propensity.func.lasso,
    loss   = "sq_loss_gbm",
    shrinkage = 0.025, # options for gbm
    n.trees = 1000,
    interaction.depth = 2,
    cv.folds = 5)

## fit_binary_1
## 16th block
subgrp.bin <- fit.subgroup(x = x, y = y.binary, trt = trt,
    propensity.func = propensity.func.lasso,
    loss   = "logistic_loss_lasso",
    nfolds = 10) # option for cv.glmnet

## tte_model_example
## 17th block
library("survival")
set.seed(123)
subgrp.cox <- fit.subgroup(x = x, y = Surv(y.time.to.event, status),
    trt = trt, propensity.func = propensity.func.lasso,
    loss   = "cox_loss_lasso",
    nfolds = 10)      # option for cv.glmnet

## print_tte_model
## 18th block
summary(subgrp.cox)

## summarize_sub
## 19th block
comp <- summarize.subgroups(subgrp.cox)

## print_summarize
## 20th block
print(comp, p.value = 0.01)

## train_test_ex
## 21st block
# check that the object is an object returned by fit.subgroup()
class(subgrp.model.eff)

validation.eff <- validate.subgroup(subgrp.model.eff,
    B = 25,  # specify the number of replications
    method = "training_test_replication",
    benefit.score.quantiles = c(0.5, 0.75, 0.9),
    train.fraction = 0.75)

validation.eff

## print_traintest
## 22nd block
print(validation.eff, which.quant = c(2, 3))

## boot_bias_ex
## 28th block
validation.boot <- validate.subgroup(subgrp.model.eff,
    B = 100,  # specify the number of replications
    method = "boot_bias_correction")

validation.boot

## gen_test_data
## 29th block
x.test <- matrix(rnorm(10 * n.obs * n.vars, sd = 3), 10 * n.obs, n.vars)


# simulate non-randomized treatment
xbetat.test   <- 0.5 + 0.25 * x.test[, 21] - 0.25 * x.test[, 41]
trt.prob.test <- plogis(xbetat.test)
trt.test    <- rbinom(10 * n.obs, 1, prob = trt.prob.test)

# simulate response
delta.test <- (0.5 + x.test[, 2] - 0.5 * x.test[, 3] -
    x.test[, 11] + x.test[, 1] * x.test[, 12])
xbeta.test <- x.test[, 1] + x.test[, 11] - 2 * x.test[, 12] ** 2 +
    x.test[, 13] + 0.5 * x.test[, 15] ** 2

xbeta.test <- xbeta.test + delta.test * (2 * trt.test - 1)

y.test <- xbeta.test + rnorm(10 * n.obs, sd = 2)

## predict_bene_score
## 30th block
bene.score.test <- predict(subgrp.model.eff, newx = x.test)

## compare_with_test
## 31st block

## Effect of control among those recommended control
mean(y.test[bene.score.test <= 0 & trt.test == 0]) -
       mean(y.test[bene.score.test <= 0 & trt.test == 1])

quantile(validation.boot$boot.results[[1]][, 1], c(0.025, 0.975), na.rm = TRUE)

## Trt effect among those recommended the treatment

mean(y.test[bene.score.test > 0 & trt.test == 1]) -
       mean(y.test[bene.score.test > 0 & trt.test == 0])

quantile(validation.boot$boot.results[[1]][, 2], c(0.025,  0.975), na.rm = TRUE)

## plot_ex_model_1
## 32nd block
plot(subgrp.model)

## plot_ex_model_2
## 33rd block
plot(subgrp.model, type = "density")

## plot_ex_model_3
## 34th block
plot(subgrp.model, type = "interaction")

## plot_ex_model_4
## 35th block
plot(subgrp.model, type = "conditional")

## train_test_ex2
## 36th block
validation <- validate.subgroup(subgrp.model,
    B = 100,  # specify the number of replications
    method = "training_test_replication",
    train.fraction = 0.75)

validation

## plot_ex_model_1a
## 37th block
plot(validation)

## plot_ex_model_1b
## 38th block
plot(validation, type = "density")

## plot_ex_model_1c
## 39th block
plot(validation, type = "conditional")

## plot_compare_validation_ex
## 40th block
plotCompare(validation, validation.eff)

## sim_three_trt_data
## 41st block
set.seed(123)
n.obs  <- 1000
n.vars <- 100
x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars)

# simulated non-randomized treatment with multiple levels
# based off of a multinomial logistic model
xbetat_1 <- 0.1 + 0.5 * x[, 21] - 0.25 * x[, 25]
xbetat_2 <- 0.1 - 0.5 * x[, 11] + 0.25 * x[, 15]
trt.1.prob <- exp(xbetat_1) / (1 + exp(xbetat_1) + exp(xbetat_2))
trt.2.prob <- exp(xbetat_2) / (1 + exp(xbetat_1) + exp(xbetat_2))
trt.3.prob <- 1 - (trt.1.prob + trt.2.prob)

prob.mat <- cbind(trt.1.prob, trt.2.prob, trt.3.prob)
trt.mat <- apply(prob.mat, 1, function(rr) rmultinom(1, 1, prob = rr))
trt.num <- apply(trt.mat, 2, function(rr) which(rr == 1))
trt <- as.factor(paste0("Trt_", trt.num))

# simulate response

# effect of treatment 1 relative to treatment 3
delta1 <- 2 * (0.5 + x[, 2] - 2 * x[, 3] )
# effect of treatment 2 relative to treatment 3
delta2 <- (0.5 + x[, 6] - 2 * x[, 5])

# main covariate effects with nonlinearities
xbeta <- x[, 1] + x[, 11] - 2 * x[, 12]^2 + x[, 13] +
    0.5 * x[, 15] ^ 2 + 2 * x[, 2] - 3 * x[, 5]

# create entire functional form of E(Y|T,X)
xbeta <- xbeta +
    delta1 * ((trt.num == 1) - (trt.num == 3)) +
    delta2 * ((trt.num == 2) - (trt.num == 3))


# simulate continuous outcomes E(Y|T,X)
y <- xbeta + rnorm(n.obs, sd = 2)


## display_mult_trt_vector
## 42nd block
trt[1:5]
table(trt)

## define_multi_propens
## 43rd block
propensity.multinom.lasso <- function(x, trt) {
    if (!is.factor(trt)) trt <- as.factor(trt)
    gfit <- cv.glmnet(y = trt, x = x, family = "multinomial")

    # predict returns a matrix of probabilities:
    # one column for each treatment level
    propens <- drop(predict(gfit, newx = x,
        type = "response", s = "lambda.min"))

    # return the matrix probability of treatment assignments
    probs <- propens[,match(levels(trt), colnames(propens))]

    probs
}

## check_overlap_multitreat
## 44th block
check.overlap(x = x, trt = trt, propensity.multinom.lasso)

## fit_multi_trt_model
## 45th block
set.seed(123)
subgrp.multi <- fit.subgroup(x = x, y = y,
    trt = trt, propensity.func = propensity.multinom.lasso,
    reference.trt = "Trt_3",
    loss   = "sq_loss_lasso")

summary(subgrp.multi)

## plot_multi_trt_model
## 46th block
pl <- plot(subgrp.multi)
pl + theme(axis.text.x = element_text(angle = 90, hjust = 1))

## validate_multi_trt_model
## 47th block
set.seed(123)
validation.multi <- validate.subgroup(subgrp.multi,
    B = 100,  # specify the number of replications
    method = "training_test_replication",
    train.fraction = 0.5)

print(validation.multi, digits = 2, sample.pct = TRUE)

## plotcomparemultivalidated
## 48th block
plv <- plot(validation.multi)
plv + theme(axis.text.x = element_text(angle = 90, hjust = 1))

## readsimresults

## Run simulation.R first to create this object and save it.
load("subgroup_id_sim_no_rct_results.rda")

auc.res <- do.call(rbind, res.list2[[3]])

cor.res <- do.call(rbind, res.list2[[1]])

## ALL OWL methods are dominated by their FOWL versions,
## so let's only plot FOWL results
owls <- c("OWL-Loss-W-Aug", "OWL-Logistic-Loss-W", "OWL-Logistic-Loss-W-Aug", "OWL-Loss-W")

cor.res <- cor.res[!(cor.res$Method %in% owls), ]
auc.res <- auc.res[!(auc.res$Method %in% owls), ]


if (is.factor(cor.res$Method))
{
    cor.res$Method <- droplevels(cor.res$Method)
} else
{
    cor.res$Method <- as.factor(cor.res$Method)
}

# reorder levels so personalized methods are all last
orders <- c(8, 1, 7:6, 9, 10:13, 2:5)

cor.res$Method <- factor(cor.res$Method, levels = levels(cor.res$Method)[orders])

if (is.factor(auc.res$Method))
{
    auc.res$Method <- droplevels(auc.res$Method)
} else
{
    auc.res$Method <- as.factor(auc.res$Method)
}

auc.res$Method <- factor(auc.res$Method, levels = levels(auc.res$Method)[orders])


cols <- c("#53354ACC", "#66CD00", "darkorange", "#55DDE0",
    "#574AE2FF", "#574AE266", "#D12600", "#D1260066", "#CEFF1AFF", "#CEFF1A66", "#943CB4FF", "#943CB466", "#FE6900FF")

levels(auc.res$Method)[c(6:13)] <- c("Sq-A", "Sq-A-Aug", "Sq-W","Sq-W-Aug","FOWL-L-W","FOWL-L-W-Aug", "FOWL-H-W", "FOWL-H-W-Aug")
levels(cor.res$Method)[c(6:13)] <- c("Sq-A", "Sq-A-Aug", "Sq-W","Sq-W-Aug","FOWL-L-W","FOWL-L-W-Aug", "FOWL-H-W", "FOWL-H-W-Aug")

## drop model4you-Tree since it is significantly
## and consistently worse than model4you-Forest and
## as such makes it difficult to look at the results
cor.res <- cor.res[cor.res$Method != "model4you-Tree", ]
auc.res <- auc.res[auc.res$Method != "model4you-Tree", ]

cor.res$Method <- droplevels(cor.res$Method)
auc.res$Method <- droplevels(auc.res$Method)

auc.res$Model <- ifelse(auc.res$main.effect.misspec, "Model~2", "Model~1")
auc.res$n <- paste0("n==", auc.res$sample.size)
auc.res$nu <- paste0("ME~size:", auc.res$main.effect.size)

cor.res$Model <- ifelse(cor.res$main.effect.misspec, "Model~2", "Model~1")
cor.res$n <- paste0("n==", cor.res$sample.size)
cor.res$nu <- paste0("ME~size:", cor.res$main.effect.size)


## plotsimres2
plotsimres2 <- ggplot(aes(x = Method, y = (Value), group = Method, fill = Method), data = cor.res) +
    geom_boxplot() +
    ylab("Correlation") +
    facet_grid(Model + nu ~ n,
               scales = "free_y", labeller = label_parsed) +
    scale_fill_manual(values = cols) +
    coord_cartesian(ylim = c(0, 1)) +
    theme(legend.position = "bottom",
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text = element_text(size = 12),
          axis.title = element_text(size = 16),
          legend.text = element_text(size = 12),
          legend.title = element_blank(),
          strip.text = element_text(size = 12),
          legend.key.width = unit(2.5,"cm")) +
    geom_vline(xintercept = 4.5,
               color = scales::alpha("black", 0.5),
               linetype = "dashed")
plotsimres2

pdf("Figures/plotsimres2-1.pdf",
    height = 10, width = 9)
print(plotsimres2)
dev.off()

## plotsimres4
plotsimres4 <- ggplot(aes(x = Method, y = (Value), group = Method, fill = Method), data = auc.res) +
    geom_boxplot() +
    ylab("AUC") +
    facet_grid(Model + nu ~ n,
               scales = "free_y", labeller = label_parsed) +
    scale_fill_manual(values = cols) +
    coord_cartesian(ylim = c(0.48, 1)) +
    theme(legend.position = "bottom",
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text = element_text(size = 12),
          axis.title = element_text(size = 16),
          legend.text = element_text(size = 12),
          legend.title = element_blank(),
          strip.text = element_text(size = 12),
          legend.key.width = unit(2.5,"cm")) +
    geom_vline(xintercept = 4.5,
               color = scales::alpha("black", 0.5),
               linetype = "dashed")
plotsimres4

pdf("Figures/plotsimres4-1.pdf",
    height = 10, width = 9)
print(plotsimres4)
dev.off()

## data_prep
## 49th block
data("LaLonde", package = "personalized")

# whether an individual had a higher salary
# in 1978 than 1975
y <- LaLonde$outcome

# treatment assignment (employment training vs not)
trt <- LaLonde$treat

x.varnames <- c("age", "educ", "black", "hisp", "white",
                "marr", "nodegr", "log.re75", "u75")

# covariates
data.x <- LaLonde[, x.varnames]

# construct design matrix (with no intercept)
x <- model.matrix(~ -1 + ., data = data.x)

## const_propens_func
## 50th block
const.propens <- function(x, trt) {
    mean.trt <- mean(trt == "Trt")
    rep(mean.trt, length(trt))
}

## fit_model_logreg_nsw
## 51st block
set.seed(1)
subgrp_fit_w <- fit.subgroup(x = x, y = y, trt = trt,
    loss = "logistic_loss_lasso",
    propensity.func = const.propens,
    type.measure = "auc",
    nfolds = 10)
summary(subgrp_fit_w)

## validate_subgrps
## 52nd block
val_subgrp_w <- validate.subgroup(subgrp_fit_w, B = 500,
    method = "training",
    train.fraction = 0.80)

## print_results_val
## 53rd block
print(val_subgrp_w, digits = 4, sample.pct = TRUE)

## plot
## 54th block
plot(val_subgrp_w)

pdf("Figures/plot-1.pdf",
    height = 3.75, width = 6)
print(plot(val_subgrp_w))
dev.off()

## boot_validate_subgrps
## 55th block
val_subgrp_w_boot <- validate.subgroup(subgrp_fit_w, B = 500,
    method = "boot")


## print_results_bootval
## 56th block
print(val_subgrp_w_boot, digits = 4, sample.pct = TRUE)

## plot_validate_subgrps
## 57th block
plotCompare(subgrp_fit_w, val_subgrp_w, val_subgrp_w_boot, type = "int")


pdf("Figures/plot_validate_subgrps-1.pdf",
    height = 3.75, width = 5)
print(plotCompare(subgrp_fit_w, val_subgrp_w, val_subgrp_w_boot, type = "int"))
dev.off()