######################################################################
## Section 6: Timings
######################################################################

######################################################################
## 6.1. Cross validation
######################################################################

## run code-section-6A.R to
## - create input files for the Python code
## - create the R output files 
load("res.mat.rda")
load("loss.diff.mat.oem.rda")
## run Python script "code-section-6B.py" to create the csv file
pyres <- read.csv("sklearn_lasso_times_10fold.csv", header = FALSE)

res.list <- vector(mode = "list", length = 14)
res.list[[1]] <- data.frame(Method = "oem", Penalty = "lasso",                      res.mat[, c(1:3, 4)])
res.list[[2]] <- data.frame(Method = "glmnet", Penalty = "lasso",                   res.mat[, c(1:3, 5)])
res.list[[3]] <- data.frame(Method = "cv.oem", Penalty = "lasso",                   res.mat[, c(1:3, 6)])
res.list[[4]] <- data.frame(Method = "xval.oem", Penalty = "lasso",                 res.mat[, c(1:3, 7)])
res.list[[5]] <- data.frame(Method = "cv.oem", Penalty = "MCP",                     res.mat[, c(1:3, 8)])
res.list[[6]] <- data.frame(Method = "xval.oem", Penalty = "MCP",                   res.mat[, c(1:3, 9)])
res.list[[7]] <- data.frame(Method = "cv.oem", Penalty = "group lasso",             res.mat[, c(1:3, 10)])
res.list[[8]] <- data.frame(Method = "xval.oem", Penalty = "group lasso",           res.mat[, c(1:3, 11)])
res.list[[9]] <- data.frame(Method = "cv.oem", Penalty = "lasso/group lasso/MCP",   res.mat[, c(1:3, 12)])
res.list[[10]] <- data.frame(Method = "xval.oem", Penalty = "lasso/group lasso/MCP", res.mat[, c(1:3, 13)])
res.list[[11]] <- data.frame(Method = "cv.glmnet", Penalty = "lasso",                res.mat[, c(1:3, 14)])
res.list[[12]] <- data.frame(Method = "cv.ncvreg", Penalty = "MCP",                  res.mat[, c(1:3, 15)])
res.list[[13]] <- data.frame(Method = "cv.grpreg", Penalty = "group lasso",          res.mat[, c(1:3, 16)])
res.list[[14]] <- data.frame(Method = "cv.gglasso", Penalty = "group lasso",         res.mat[, c(1:3, 17)])

res.list <- lapply(res.list, function(x) {
    colnames(x)[6] <- "Seconds"
    x
})

colnames(pyres) <- c("nobs", "nvars", "nfolds", "Seconds")
pyres2plot <- data.frame(Method = "sklearn-LassoCV", Penalty = "lasso", pyres[, c(3, 1, 2, 4)])
res2plot <- do.call(rbind, res.list)
res2plot <- rbind(res2plot, pyres2plot)

res2plot$nobs <- factor(res2plot$nobs)
levels(res2plot$nobs) <- c("n == 10^5", "n == 10^6")

res2plot$Method <- as.factor(res2plot$Method)
res2plot$Method <- factor(res2plot$Method, levels = levels(res2plot$Method)[c(7, 6, 5, 9, 2, 4, 3, 1, 8)])

res2plot$Penalty <- as.factor(res2plot$Penalty)
res2plot$Penalty <- factor(res2plot$Penalty, levels = levels(res2plot$Penalty)[c(2, 4, 1, 3)])

library("ggplot2")
science_theme <- theme(panel.grid.major = element_line(size = 0.25, color = "grey80", linetype = "dashed"), 
                       axis.line = element_line(size = 0.7, color = "black"), text = element_text(size = 14), 
                       strip.background = element_blank())

my_label_parsed <- function (labels, multi_line = TRUE) {
    labels <- label_value(labels, multi_line = multi_line)
    if (multi_line) {
        if (any(unlist(unname(labels)) %in% c("lasso", "group lasso"))) {
            lapply(unname(labels), lapply, function(values) {
                c(as.character(values))
            })
        } else {
            lapply(unname(labels), lapply, function(values) {
                c(parse(text = as.character(values)))
            })
        }
    } else {
        if (any(unlist(unname(labels)) %in% c("lasso", "group lasso"))) {
            lapply(labels, function(values) {
                values <- paste0("list(", values, ")")
                lapply(values, function(expr) c(parse(text = expr)))
            })
        } else {
            lapply(labels, function(values) {
                values <- paste0("list(", values, ")")
                lapply(values, function(expr) c(expr))
            })
        }
    }
}

plt_times <- ggplot(aes(x = nvars, y = log(Seconds), group = Method, color = Method), 
       data = res2plot[which(res2plot$nfolds == 10), ]) +
    geom_line(aes(linetype = Method), size = 1) + 
    geom_point(aes(shape = Method), size = 2) + 
    facet_grid(nobs ~ Penalty, labeller = my_label_parsed) + 
    scale_colour_manual(values = c("#7570b3", "#e7298a", "#33a02c", "#ff7f00", "purple", 
                                   "#ff7f00", "grey50", "chartreuse1", "cyan", "seagreen1")) +
    scale_linetype_manual(values = c("dashed", "dashed", "solid", "solid", "solid", "solid", "solid", "solid", "solid")) +
    scale_shape_manual(values = c(19, 19, 19, 19, 19, 25, 17, 15, 19)) +
    xlab("Number of Variables") +
    ylab("Time in log(Seconds)") +
    theme_bw(base_size = 12) + science_theme + 
    scale_x_continuous(breaks = sort(unique(res2plot$nvars))) + 
    theme(legend.position = "bottom",
          legend.key.width = unit(1, 'cm'),
          axis.text.x = element_text(angle = 90, hjust = 1))

plt_times

ggsave(
    filename = "plotCV-1.pdf",
    plot = plt_times,
    units = "in", height = 360/70, width = 576/70
)


## for accuracies
loss.oem.linear <- data.frame(loss.diff.mat.oem)
loss.oem.linear[, 1:2] <- res.mat[, 2:3]
loss.oem.linear$nobs <- factor(loss.oem.linear$nobs)
levels(loss.oem.linear$nobs) <- c("$10^5$", "$10^6$")

loss.oem.diffs.linear <- format(loss.oem.linear[, 3:ncol(loss.oem.linear)], scientific = TRUE, digits = 3)
loss.oem.diffs.linear <- data.frame(lapply(loss.oem.diffs.linear, as.character))
loss.oem.diffs.linear <- data.frame(lapply(loss.oem.diffs.linear, as.factor))
for (i in 1:ncol(loss.oem.diffs.linear)) {
    levels(loss.oem.diffs.linear[, i]) <- paste0("$", gsub("e", "\\\\times 10^{", levels(loss.oem.diffs.linear[, i])), "}$")
    loss.oem.diffs.linear[, i] <- levels(loss.oem.diffs.linear[, i])[loss.oem.diffs.linear[, i]]
}

loss.oem.linear[, 3:ncol(loss.oem.linear)] <- loss.oem.diffs.linear

library("tables")
latexTable(tabular(RowFactor(nobs) * RowFactor(nvars) ~ (glmnet+gglasso+ncvreg) * Heading()*identity, 
                   data = (loss.oem.linear)))


################
## parallel CV simulation

## WARNING: TAKES A LONG TIME AND REQUIRES 10 CORES

library("oem")
library("glmnet")
library("mvtnorm")
library("ncvreg")
library("gglasso")
library("grpreg")
library("microbenchmark")
set.seed(123)

k.vec <- c(5, 10)
nvars.vec <- c(50, 100, 250, 500)
nobs.vec <- c(1e5, 1e6)

n.runs <- 10
rho <- 0.5

res.mat <- array(NA, dim = c(length(k.vec) * length(nvars.vec) * length(nobs.vec), 3 + 8))
ct <- 0

for (j in 1:length(nvars.vec)) {
    nvars <- nvars.vec[j]
    sigma <- rho ^ abs(outer(1:nvars, 1:nvars, FUN = "-"))
    for (i in 1:length(nobs.vec)) {
        for (k in 1:length(k.vec)) {
            K <- k.vec[k]
            
            ct <- ct + 1
            nobs <- nobs.vec[i]
            x <- rmvnorm(nobs, mean = numeric(nvars), sigma = sigma)
            y <- drop(x %*% c(-0.5, -0.5, 0.5, 0.5, 1, rep(0, nvars - 5))) + rnorm(nobs, sd = 2)
            oefit <- oem(x = x, y = y, penalty = "lasso")
            
            grps <- rep(1:(nvars/25), each = 25)
            lam <- oefit$lambda[[1]]
            mb <- microbenchmark(
                "xval.oem[lasso][1]" = xval.oem(x = x, y = y, nfolds = K, penalty = "lasso", lambda = lam, tol = 1e-8),
                "xval.oem[lasso][5]" = xval.oem(x = x, y = y, nfolds = K, penalty = "lasso", lambda = lam, tol = 1e-8, ncores = 5),
                "xval.oem[lasso][10]" = xval.oem(x = x, y = y, nfolds = K, penalty = "lasso", lambda = lam, tol = 1e-8, ncores = 10),
                "xval.oem[lasso][20]" = xval.oem(x = x, y = y, nfolds = K, penalty = "lasso", lambda = lam, tol = 1e-8, ncores = 20),
                "xval.oem[all][1]" = xval.oem(x = x, y = y, nfolds = K, penalty = c("lasso", "mcp", "grp.lasso"), lambda = lam, gamma = 3, groups = grps, tol = 1e-8),
                "xval.oem[all][5]" = xval.oem(x = x, y = y, nfolds = K, penalty = c("lasso", "mcp", "grp.lasso"), 
                                                      lambda = lam, gamma = 3, groups = grps, tol = 1e-8, ncores = 5),
                "xval.oem[all][10]" = xval.oem(x = x, y = y, nfolds = K, penalty = c("lasso", "mcp", "grp.lasso"), 
                                                      lambda = lam, gamma = 3, groups = grps, tol = 1e-8, ncores = 10),
                "xval.oem[all][20]" = xval.oem(x = x, y = y, nfolds = K, penalty = c("lasso", "mcp", "grp.lasso"), 
                                                      lambda = lam, gamma = 3, groups = grps, tol = 1e-8, ncores = 10),
                times = n.runs,
                unit = "s")
            
            if (ct == 1) {
                names <- levels(summary(mb)$expr)[summary(mb)$expr]
                colnames(res.mat) <- c("nfolds", "nobs", "nvars", names)
            }
            res.mat[ct, 1:3] <- c(K, nobs, nvars)
            res.mat[ct, 4:(length(summary(mb)$mean) + 3)] <- summary(mb)$mean
            print(res.mat[ct, ])
        }
    }
}

res2 <- res.mat[,-grep("20", colnames(res.mat))]
res2.lasso <- data.frame(Penalty = "lasso", res2[, 1:6])
res2.all <- data.frame(Penalty = "all", res2[, c(1:3, 7:9)])

res2.lasso.1 <- data.frame(Cores = 1, res2.lasso[, c(1:4, 5)])
res2.lasso.5 <- data.frame(Cores = 5, res2.lasso[, c(1:4, 6)])
res2.lasso.10 <- data.frame(Cores = 10, res2.lasso[, c(1:4, 7)])

colnames(res2.lasso.1)[6] <- colnames(res2.lasso.5)[6] <- colnames(res2.lasso.10)[6] <- "Seconds"

res2.all.1 <- data.frame(Cores = 1, res2.all[, c(1:4, 5)])
res2.all.5 <- data.frame(Cores = 5, res2.all[, c(1:4, 6)])
res2.all.10 <- data.frame(Cores = 10, res2.all[, c(1:4, 7)])

colnames(res2.all.1)[6] <- colnames(res2.all.5)[6] <- colnames(res2.all.10)[6] <- "Seconds"

res2plot <- rbind(res2.lasso.1, res2.lasso.5, res2.lasso.10,
                  res2.all.1, res2.all.5, res2.all.10)

res2plot$nobs <- factor(res2plot$nobs)
levels(res2plot$nobs) <- c("n == 10^5", "n == 10^6")
res2plot$nvars <- factor(res2plot$nvars)
levels(res2plot$nvars) <- paste0("p == ", levels(res2plot$nvars))
levels(res2plot$Penalty) <- c("lasso", "lasso/group lasso/MCP")

library("ggplot2")
science_theme <- theme(panel.grid.major = element_line(size = 0.25, color = "grey80", linetype = "dashed"), 
                       axis.line = element_line(size = 0.7, color = "black"), text = element_text(size = 14), 
                       strip.background = element_blank())
ggplot(aes(x = Cores, y = log(Seconds), group = Penalty, color = Penalty), 
       data = res2plot[which(res2plot$nfolds == 10), ]) + 
    geom_line(size = 1) + 
    geom_point(size = 2) + 
    facet_grid(nobs ~ nvars, labeller = label_parsed) + 
    scale_colour_manual(values = c("#7570b3", "#ff7f00", "#e7298a", "#33a02c", "purple")) +
    xlab("Number of Cores") +
    ylab("Time in log(Seconds)") +
    theme_bw(base_size = 12) + science_theme + 
    scale_x_continuous(breaks = sort(unique(res2plot$Cores))) + 
    theme(legend.position = "bottom")

######################################################################
## 6.2. Sparse matrices
######################################################################

library("Matrix")
library("oem")
library("glmnet")
library("mvtnorm")
library("microbenchmark")
set.seed(123)

s.vec <- c(0.01, 0.05, 0.1)
nvars.vec <- c(100, 250, 500, 1000)
nobs.vec <- c(1e5, 1e6)

n.runs <- 10
rho <- 0.5

res.mat <- array(NA, dim = c(length(s.vec) * length(nvars.vec) * length(nobs.vec), 6))
ct <- 0

for (j in 1:length(nvars.vec)) {
    nvars <- nvars.vec[j]
    sigma <- rho ^ abs(outer(1:nvars, 1:nvars, FUN = "-"))
    for (i in 1:length(nobs.vec)) {
        for (k in 1:length(s.vec)) {
            density <- s.vec[k]
            
            ct <- ct + 1
            nobs <- nobs.vec[i]
            xs <- rsparsematrix(nobs, nvars, density = density)
            ys <- drop(xs %*% c(-0.5, -0.5, 0.5, 0.5, 1, rep(0, nvars - 5))) + rnorm(nobs, sd = 2)
            x.dense <- as.matrix(xs)
            
            oefit <- oem(x = xs, y = ys, penalty = "lasso")
            
            grps <- rep(1:(nvars/25), each = 25)
            lam <- oefit$lambda[[1]]
            mb <- microbenchmark("oem[sparse]" = oem(x = xs, y = ys, penalty = "lasso", lambda = lam, tol = 1e-8),
                                 "glmnet[sparse]" = glmnet(x = xs, y = ys, lambda = lam, thresh = 1e-8),
                                 "oem[dense]" = oem(x = x.dense, y = ys, penalty = "lasso", lambda = lam, tol = 1e-8),
                                 times = n.runs,
                                 unit = "s")
            names <- levels(summary(mb)$expr)[summary(mb)$expr]
            if (ct == 1) {
                colnames(res.mat) <- c("sparsity", "nobs", "nvars", names)
            }
            res.mat[ct, 1:3] <- c(1-density, nobs, nvars)
            res.mat[ct, 4:ncol(res.mat)] <- summary(mb)$mean
            print(res.mat[ct, ])
        }
    }
}

res2 <- res.mat[, -6]
colnames(res2)[4:5] <- c("oem", "glmnet")

library("tidyr")
sp.dat2plot <- as.data.frame(res2) %>% gather(Method, Seconds, -sparsity, -nobs, -nvars)

sp.dat2plot$nobs <- factor(sp.dat2plot$nobs)
levels(sp.dat2plot$nobs) <- c("n == 10^5", "n == 10^6")

sp.dat2plot$sparsity <- factor(sp.dat2plot$sparsity)
levels(sp.dat2plot$sparsity) <- paste0("sparsity == ", levels(sp.dat2plot$sparsity))

ggplot(aes(x = nvars, y = log(Seconds), group = Method, color = Method), 
       data = sp.dat2plot) +
    geom_line(aes(linetype = Method), size = 1) + 
    geom_point(aes(shape = Method), size = 2) + 
    facet_grid(nobs ~ sparsity, labeller = my_label_parsed) + 
    scale_colour_manual(values = c("#7570b3", "#e7298a", "#33a02c", "#ff7f00", "purple", 
                                   "#ff7f00", "grey50", "chartreuse1", "cyan")) +
    scale_linetype_manual(values = c("dashed", "dashed", "solid", "solid", "solid", "solid", "solid", "solid")) +
    scale_shape_manual(values = c(19, 19, 19, 19, 19, 25, 17, 15)) +
    xlab("Number of Variables") +
    ylab("Time in log(Seconds)") +
    theme_bw(base_size = 12) + science_theme + 
    scale_x_continuous(breaks = sort(unique(sp.dat2plot$nvars))) + 
    theme(legend.position = "bottom",
          axis.text.x = element_text(angle = 90, hjust = 1))

######################################################################
## 6.3. Penalized logistic regression
######################################################################

## WARNING: TAKES A LONG TIME!

library("oem")
library("glmnet")
library("mvtnorm")
library("ncvreg")
library("gglasso")
library("grpreg")
library("microbenchmark")
set.seed(123)

nvars.vec <- c(50, 100, 250, 500)
nobs.vec <- c(1e4, 1e5, 1e6)

n.runs <- 10
rho <- 0.5

res.mat <- array(NA, dim = c(1 * length(nvars.vec) * length(nobs.vec), 13))
loss.diff.mat.oem <- loss.diff.mat.oem.ub <- array(NA, dim = c(1 * length(nvars.vec) * length(nobs.vec), 7))
ct <- 0

mcp.pen <- function(t, lambda, gamma) {   
    ifelse(abs(t) < lambda * gamma, lambda * (abs(t) - t ^ 2 / (2 * lambda * gamma)),
           (lambda ^ 2) * gamma * 0.5)
}

grp.pen <- function(t, lambda, groups, intercept = FALSE) {
    unique.groups <- unique(groups)
    ng <- length(unique.groups)
    
    pen <- 0
    for (g in 1:ng) {
        grp.idx <- which(groups == unique.groups[g])
        grp.size <- length(grp.idx)
        if (intercept & g == 1) {
        } else {
            pen <- pen + lambda * sqrt(grp.size) * sqrt(sum((t[grp.idx]) ^ 2))
        }
    }
    pen
}

logistic.loss <- function(beta, x, y) {
    xbeta <- x %*% beta
    exb <- exp( xbeta )
    prob1 <- exb / (1 + exb)
    logexb <- log(prob1)
    
    y0 <- 1 - y
    
    logexb0 <- log(1 - prob1)
    
    logL <- as.vector(apply(y %*% logexb + y0 %*% logexb0, 2, sum))
    -logL
}

sq.err.loss <- function(beta, x, y) {
    xbeta <- x %*% beta
    0.5 * sum((y - xbeta) ^ 2) / nrow(x)
}

mcp.logistic <- function(beta, x, y, lambda, gamma) {
    nobs <- NROW(x)
    loss <- logistic.loss(beta, x, y)
    if (sd(x[, 1]) == 0) {
        pen <- sum(mcp.pen(beta[-1], lambda, gamma))
    } else {
        pen <- sum(mcp.pen(beta, lambda, gamma))
    }
    loss / nobs + pen
}

lasso.linear <- function(beta, x, y, lambda) {
    nobs <- NROW(x)
    loss <- sq.err.loss(beta, x, y)
    if (sd(x[, 1]) == 0) {
        pen <- lambda * sum(abs(beta[-1]))
    } else {
        pen <- lambda * sum(abs(beta))
    }
    loss + pen
}

lasso.logistic <- function(beta, x, y, lambda) {
    nobs <- NROW(x)
    loss <- logistic.loss(beta, x, y)
    if (sd(x[, 1]) == 0) {
        pen <- lambda * sum(abs(beta[-1]))
    } else {
        pen <- lambda * sum(abs(beta))
    }
    loss / nobs + pen
}

grp.logistic <- function(beta, x, y, lambda, groups) {
    nobs <- NROW(x)
    loss <- logistic.loss(beta, x, y)
    pen <- grp.pen(beta, lambda, groups, intercept = sd(x[, 1]) == 0)
    loss / nobs + pen
}

mcp.linear <- function(beta, x, y, lambda, gamma) {
    nobs <- NROW(x)
    loss <- sq.err.loss(beta, x, y)
    if (sd(x[, 1]) == 0) {
        pen <- sum(mcp.pen(beta[-1], lambda, gamma))
    } else {
        pen <- sum(mcp.pen(beta, lambda, gamma))
    }
    loss + pen
}

grp.linear <- function(beta, x, y, lambda, groups) {
    nobs <- NROW(x)
    loss <- sq.err.loss(beta, x, y)
    pen <- grp.pen(beta, lambda, groups, intercept = sd(x[, 1]) == 0)
    loss + pen
}

for (j in 1:length(nvars.vec)) {
    nvars <- nvars.vec[j]
    sigma <- rho ^ abs(outer(1:nvars, 1:nvars, FUN = "-"))
    for (i in 1:length(nobs.vec)) {
        ct <- ct + 1
        nobs <- nobs.vec[i]
        x.unscaled <- rmvnorm(nobs, mean = numeric(nvars), sigma = sigma)
        
        ## need some way to get around the fact that 
        ## ncvreg standardizes no matter what and gglasso
        ## does not allow standardization. we need to do 
        ## this so comparisons of the loss functions
        ## are on the same scale for all methods
        x <- scale(x.unscaled)
        true.beta <- c(-0.5, -0.5, 0.5, 0.5, 1, rep(0, nvars - 5))
        xbeta <- drop(x %*% true.beta)
        y <- rbinom(nobs, 1, prob = 1 / (1 + exp(-xbeta)))
        
        grp.size <- 25
        grps <- rep(1:(nvars/grp.size), each = grp.size)
        wts <- rep(sqrt(grp.size), nvars/grp.size)
        
        oefit <- oem(x = x, y = y, penalty = c("lasso", "mcp", "grp.lasso"), 
                     family = "binomial", hessian.type = "upper.bound",
                     nlambda = 25L,
                     tol = 1e-12, irls.tol = 1e-10, gamma = 3, 
                     groups = grps, group.weights = wts,
                     standardize = FALSE)
        lam <- unique(oefit$lambda)[[1]]
        
        mb <- microbenchmark("oem[lasso]" = {oem_lasso <- oem(x = x, y = y, penalty = "lasso", lambda = lam, tol = 1e-6, irls.tol = 1e-3, family = "binomial", standardize = FALSE)},
                             "oem[mcp]" = {oem_mcp <- oem(x = x, y = y, penalty = "mcp", lambda = lam, tol = 1e-6, irls.tol = 1e-3, gamma = 3, family = "binomial", standardize = FALSE)},
                             "oem[grp.lasso]" = {oem_grp.lasso <- oem(x = x, y = y, penalty = "grp.lasso", lambda = lam, tol = 1e-6, irls.tol = 1e-3, groups = grps, family = "binomial", group.weights = wts, standardize = FALSE)},
                             "glmnet[lasso]" = {glmnet_lasso <- glmnet(x = x, y = y, lambda = lam, thresh = 1e-11, family = "binomial", standardize = FALSE)},
                             "oem.ub[lasso]" = {oem.ub_lasso <- oem(x = x, y = y, penalty = "lasso", lambda = lam, tol = 1e-7, irls.tol = 1e-5, family = "binomial", hessian.type = "upper.bound", standardize = FALSE)},
                             "oem.ub[mcp]" = {oem.ub_mcp <- oem(x = x, y = y, penalty = "mcp", lambda = lam, tol = 1e-7, irls.tol = 1e-5, gamma = 3, family = "binomial", hessian.type = "upper.bound", standardize = FALSE)},
                             "oem.ub[grp.lasso]" = {oem.ub_grp.lasso <- oem(x = x, y = y, penalty = "grp.lasso", lambda = lam, tol = 1e-7, irls.tol = 1e-5, groups = grps, family = "binomial", hessian.type = "upper.bound", group.weights = wts, standardize = FALSE)},
                             "glmnet.ub[lasso]" = {glmnet.ub_lasso <- glmnet(x = x, y = y, lambda = lam, thresh = 1e-11, family = "binomial", type.logistic = "modified.Newton", standardize = FALSE)},
                             "ncvreg[mcp]" = {ncvreg_mcp <- ncvreg(X = x, y = y, lambda = lam, gamma = 3, eps = 1e-8, family = "binomial")},
                             "grpreg[grp.lasso]" = {grpreg_grp.lasso <- grpreg(X = x, y = y, lambda = lam, group = grps, eps = 1e-8, family = "binomial", group.multiplier = wts)},
                             "gglasso[grp.lasso]" = {gglasso_grp.lasso <- gglasso(x = x, y = 2 * y - 1, lambda = lam, group = grps, eps = 1e-10, loss = "logit", pf = wts)},
                             times = n.runs,
                             unit = "s")
        names <- levels(summary(mb)$expr)[summary(mb)$expr]
        
        mcp.ncvreg_lf <- sapply(1:length(lam), function(ii) mcp.logistic(ncvreg_mcp$beta[, ii],      cbind(1, x), y, lam[ii], 3))
        mcp.oem_lf <- sapply(1:length(lam), function(ii) mcp.logistic(oem_mcp$beta[[1]][, ii],    cbind(1, x), y, lam[ii], 3))
        mcp.oem.ub_lf <- sapply(1:length(lam), function(ii) mcp.logistic(oem.ub_mcp$beta[[1]][, ii], cbind(1, x), y, lam[ii], 3))
        
        ggbeta <- rbind(gglasso_grp.lasso$b0, gglasso_grp.lasso$beta)
        
        grpreg_lf <- sapply(1:length(lam), function(ii) grp.logistic(grpreg_grp.lasso$beta[, ii],      cbind(1, x), y, lam[ii], grps))
        oem_lf <- sapply(1:length(lam), function(ii) grp.logistic(oem_grp.lasso$beta[[1]][, ii],    cbind(1, x), y, lam[ii], grps))
        oem.ub_lf <- sapply(1:length(lam), function(ii) grp.logistic(oem.ub_grp.lasso$beta[[1]][, ii], cbind(1, x), y, lam[ii], grps))
        gglasso_lf <- sapply(1:length(lam), function(ii) grp.logistic(ggbeta[, ii],                     cbind(1, x), y, lam[ii], grps))
        
        oem.m_lf <- sapply(1:length(lam), function(ii) grp.logistic(oefit$beta[[3]][, ii], cbind(1, x), y, lam[ii], grps))
        
        glbeta <- rbind(glmnet_lasso$a0,    as.matrix(glmnet_lasso$beta))
        gl.ubbeta <- rbind(glmnet.ub_lasso$a0, as.matrix(glmnet.ub_lasso$beta))
        
        lasso.glmnet_lf <- sapply(1:length(lam), function(ii) lasso.logistic(glbeta[, ii],                 cbind(1, x), y, lam[ii]))
        lasso.glmnet.ub_lf <- sapply(1:length(lam), function(ii) lasso.logistic(gl.ubbeta[, ii],              cbind(1, x), y, lam[ii]))
        lasso.oem_lf <- sapply(1:length(lam), function(ii) lasso.logistic(oem_lasso$beta[[1]][, ii],    cbind(1, x), y, lam[ii]))
        lasso.oem.ub_lf <- sapply(1:length(lam), function(ii) lasso.logistic(oem.ub_lasso$beta[[1]][, ii], cbind(1, x), y, lam[ii]))
        
        lasso.oem.m_lf <- sapply(1:length(lam), function(ii) lasso.logistic(oefit$beta[[1]][, ii],      cbind(1, x), y, lam[ii]))
        
        mean(lasso.oem.m_lf - lasso.glmnet_lf)
        mean(lasso.oem_lf - lasso.glmnet_lf)
        mean(lasso.oem.ub_lf - lasso.glmnet_lf)
        mean(lasso.oem_lf - lasso.glmnet.ub_lf)
        mean(lasso.oem.ub_lf - lasso.glmnet.ub_lf)
        
        ## LASSO
        max(abs(oefit$beta[[1]][-1, ] - oem_lasso$beta[[1]][-1, ]))
        max(abs(oefit$beta[[1]][-1, ] - glmnet_lasso$beta))
        
        max(abs(oefit$beta[[1]][-1, ] - oem.ub_lasso$beta[[1]][-1, ]))
        max(abs(oefit$beta[[1]][-1, ] - glmnet.ub_lasso$beta))
        
        ## MCP
        max(abs(oefit$beta[[2]][-1, ] - oem_mcp$beta[[1]][-1, ]))
        max(abs(oefit$beta[[2]][-1, ] - oem.ub_mcp$beta[[1]][-1, ]))
        max(abs(oefit$beta[[2]][-1, ] - ncvreg_mcp$beta[-1, ]))
        
        ## grp lasso
        max(abs(oefit$beta[[3]][-1, ] - oem_grp.lasso$beta[[1]][-1, ]))
        max(abs(oefit$beta[[3]][-1, ] - oem.ub_grp.lasso$beta[[1]][-1, ]))
        max(abs(oefit$beta[[3]][-1, ] - gglasso_grp.lasso$beta))
        ## grpreg standardizes *within* groups and thus results are different 
        max(abs(oefit$beta[[3]][-1, ] - grpreg_grp.lasso$beta[-1, ]))      
        
        if (ct == 1) {
            colnames(res.mat) <- c("nobs", "nvars", names)
            colnames(loss.diff.mat.oem) <- c("nobs", "nvars", "glmnet", "glmnet.ub",
                                             "gglasso", "grpreg", "ncvreg")
            colnames(loss.diff.mat.oem.ub) <- colnames(loss.diff.mat.oem)
        }
        res.mat[ct, 1:2] <- loss.diff.mat.oem[ct, 1:2] <- loss.diff.mat.oem.ub[ct, 1:2] <- c(nobs, nvars)
        res.mat[ct, 3:ncol(res.mat)] <- summary(mb)$mean
        
        ## LASSO
        loss.diff.mat.oem[ct, 3] <- mean(lasso.oem_lf - lasso.glmnet_lf)
        loss.diff.mat.oem[ct, 4] <- mean(lasso.oem_lf - lasso.glmnet.ub_lf)
        
        loss.diff.mat.oem.ub[ct, 3] <- mean(lasso.oem.ub_lf - lasso.glmnet_lf)
        loss.diff.mat.oem.ub[ct, 4] <- mean(lasso.oem.ub_lf - lasso.glmnet.ub_lf)
        
        ## grp lasso
        loss.diff.mat.oem.ub[ct, 5] <- mean(oem.ub_lf - gglasso_lf)
        loss.diff.mat.oem[ct, 5] <- mean(oem_lf - gglasso_lf)
        
        ## grpreg minimizes a slightly different loss function
        ## (orthogonalization occurs within groups)
        ## and thus the results are not comparable.
        loss.diff.mat.oem.ub[ct, 6] <- mean(oem.ub_lf - grpreg_lf)
        loss.diff.mat.oem[ct, 6] <- mean(oem_lf - grpreg_lf)
        
        
        ## MCP
        loss.diff.mat.oem[ct, 7] <- mean(mcp.oem_lf - mcp.ncvreg_lf)
        loss.diff.mat.oem.ub[ct, 7] <- mean(mcp.oem.ub_lf - mcp.ncvreg_lf)
        
        print("times:")
        print(res.mat[ct, ])
        print(loss.diff.mat.oem[ct, ])
        print(loss.diff.mat.oem.ub[ct, ])
    }
}

loss.oem <- data.frame(loss.diff.mat.oem)
loss.oem.ub <- data.frame(loss.diff.mat.oem.ub)

loss.oem$nobs <- factor(loss.oem$nobs)
levels(loss.oem$nobs) <- c("$10^4$", "$10^5$")

loss.oem.ub$nobs <- factor(loss.oem.ub$nobs)
levels(loss.oem.ub$nobs) <- c("$10^4$", "$10^5$")

library("stargazer")
library("tables")

loss.oem.diffs <- format(loss.oem[, 3:ncol(loss.oem)], scientific = TRUE, digits = 3)
loss.oem.diffs <- data.frame(lapply(loss.oem.diffs, as.character))
for (i in 1:ncol(loss.oem.diffs)) {
    levels(loss.oem.diffs[, i]) <- paste0("$", gsub("e", "^{", levels(loss.oem.diffs[, i])), "}$")
    loss.oem.diffs[, i] <- levels(loss.oem.diffs[, i])[loss.oem.diffs[, i]]
}

loss.oem[, 3:ncol(loss.oem)] <- loss.oem.diffs

loss.oem.diffs.ub <- format(loss.oem.ub[, 3:ncol(loss.oem.ub)], scientific = TRUE, digits = 3)
loss.oem.diffs.ub <- data.frame(lapply(loss.oem.diffs.ub, as.character))
for (i in 1:ncol(loss.oem.diffs.ub)) {
    levels(loss.oem.diffs.ub[, i]) <- paste0("$", gsub("e", "^{", levels(loss.oem.diffs.ub[, i])), "}$")
    loss.oem.diffs.ub[, i] <- levels(loss.oem.diffs.ub[, i])[loss.oem.diffs.ub[, i]]
}

loss.oem.ub[, 3:ncol(loss.oem.ub)] <- loss.oem.diffs.ub

res.list <- vector(mode = "list", length = 11)
res.list[[1]] <- data.frame(Method = "oem", Penalty = "lasso",                    res.mat[, c(1:2, 3)])
res.list[[2]] <- data.frame(Method = "glmnet", Penalty = "lasso",                 res.mat[, c(1:2, 6)])
res.list[[3]] <- data.frame(Method = "oem-upper-bound", Penalty = "lasso",        res.mat[, c(1:2, 7)])
res.list[[4]] <- data.frame(Method = "glmnet-upper-bound", Penalty = "lasso",     res.mat[, c(1:2, 10)])
res.list[[5]] <- data.frame(Method = "oem", Penalty = "MCP",                      res.mat[, c(1:2, 4)])
res.list[[6]] <- data.frame(Method = "oem-upper-bound", Penalty = "MCP",          res.mat[, c(1:2, 8)])
res.list[[7]] <- data.frame(Method = "ncvreg", Penalty = "MCP",                   res.mat[, c(1:2, 11)])
res.list[[8]] <- data.frame(Method = "oem", Penalty = "group lasso",              res.mat[, c(1:2, 5)])
res.list[[9]] <- data.frame(Method = "oem-upper-bound", Penalty = "group lasso",  res.mat[, c(1:2, 9)])
res.list[[10]] <- data.frame(Method = "grpreg", Penalty = "group lasso",          res.mat[, c(1:2, 12)])
res.list[[11]] <- data.frame(Method = "gglasso", Penalty = "group lasso",         res.mat[, c(1:2, 13)])

res.list <- lapply(res.list, function(x) {
    colnames(x)[5] <- "Seconds"
    x
})

res2plot <- do.call(rbind, res.list)

res2plot$nobs <- factor(res2plot$nobs)
levels(res2plot$nobs) <- c("n == 10^5", "n == 10^6")

ggplot(aes(x = nvars, y = log(Seconds), group = Method, color = Method), 
       data = res2plot) +
    geom_line(aes(linetype = Method), size = 1) + 
    geom_point(aes(shape = Method), size = 2) + 
    facet_grid(nobs ~ Penalty, labeller = my_label_parsed) + 
    scale_colour_manual(values = c("#7570b3", "#e7298a", "#7570b3", "#e7298a", "purple", 
                                   "#ff7f00", "grey50", "chartreuse1", "cyan", "seagreen1")) +
    scale_linetype_manual(values = c("solid", "solid", "dashed", "dashed", "solid", "solid", "solid", "solid", "solid")) +
    scale_shape_manual(values = c(19, 17, 19, 17, 19, 25, 17, 15, 19)) +
    xlab("Number of Variables") +
    ylab("Time in log(Seconds)") +
    theme_bw(base_size = 12) + science_theme + 
    scale_x_continuous(breaks = sort(unique(res2plot$nvars))) + 
    theme(legend.position = "bottom",
          axis.text.x = element_text(angle = 90, hjust = 1))

loss.oem <- data.frame(loss.diff.mat.oem)
loss.oem.ub <- data.frame(loss.diff.mat.oem.ub)

loss.oem$nobs <- factor(loss.oem$nobs)
levels(loss.oem$nobs) <- c("$10^4$", "$10^5$")

loss.oem.ub$nobs <- factor(loss.oem.ub$nobs)
levels(loss.oem.ub$nobs) <- c("$10^4$", "$10^5$")

library("stargazer")
library("tables")

loss.oem.diffs <- format(loss.oem[, 3:ncol(loss.oem)], scientific = TRUE, digits = 3)
loss.oem.diffs <- data.frame(lapply(loss.oem.diffs, as.character))
for (i in 1:ncol(loss.oem.diffs)) {
    levels(loss.oem.diffs[, i]) <- paste0("$", gsub("e", "^{", levels(loss.oem.diffs[, i])), "}$")
    loss.oem.diffs[, i] <- levels(loss.oem.diffs[, i])[loss.oem.diffs[, i]]
}

loss.oem[, 3:ncol(loss.oem)] <- loss.oem.diffs

loss.oem.diffs.ub <- format(loss.oem.ub[, 3:ncol(loss.oem.ub)], scientific = TRUE, digits = 3)
loss.oem.diffs.ub <- data.frame(lapply(loss.oem.diffs.ub, as.character))
for (i in 1:ncol(loss.oem.diffs.ub)) {
    levels(loss.oem.diffs.ub[, i]) <- paste0("$", gsub("e", "^{", levels(loss.oem.diffs.ub[, i])), "}$")
    loss.oem.diffs.ub[, i] <- levels(loss.oem.diffs.ub[, i])[loss.oem.diffs.ub[, i]]
}

loss.oem.ub[, 3:ncol(loss.oem.ub)] <- loss.oem.diffs.ub

latexTable(tabular(RowFactor(nobs) * RowFactor(nvars) ~ (glmnet + glmnet.ub + gglasso + ncvreg) * Heading() * identity, 
                   data = loss.oem))

latexTable(tabular(RowFactor(nobs) * RowFactor(nvars) ~ (glmnet + glmnet.ub + gglasso + ncvreg) * Heading() * identity, 
                   data = loss.oem.ub))


