pkgs <- c("dendextend", "pls", "cluster", "corrplot", "glmnet", "MLGL", "gglasso")
for (pkg in pkgs) {
    if (!requireNamespace(pkg, quietly = TRUE)) {
        install.packages(pkg)
    }
}
if (!requireNamespace("standGL", quietly = TRUE)) {
    # this package is archived on CRAN: 
    install.packages("https://cran.r-project.org/src/contrib/Archive/standGL/standGL_1.1.tar.gz")
}

library("dendextend")
library("MLGL")
stopifnot(packageVersion("MLGL") >= "0.6.7")
library("glmnet")
library("cluster")
library("standGL")
library("gglasso")

source("crl.R")
source("avg.R")

outDir <- "."

###  Run compute the resulting colored dendrogram representing selected clusters of variables.
MLGLDend <- function(res) {
  palette("default")
  hc <- res$res$hc
  dend <- as.dendrogram(hc)
  ind_lambdaopt <- which(res$res$lambda == max(res$lambdaOpt))
  allgroup <- MLGL:::groupHier(res$res$group[[ind_lambdaopt]], res$res$var[[ind_lambdaopt]])
  index <- 2 # starting color for coloring dendrogram labels
  colorGroup <- vector()
  allVar <- vector()
  for (currentGroup in res$selectedGroups) {
    if (currentGroup < 0) {
      color <- index
      currentVar <- allgroup$varlm[which(allgroup$grouplm == currentGroup)]
      dend <- color_labels(dend, col = color, labels = labels(dend)[which(hc$order %in% currentVar)])
      colorGroup <- c(colorGroup,rep(index, length(currentVar)))
      allVar <- c(allVar, currentVar)
      index <- index+1
    } else {
      color <- index
      currentVar <- res$var[which(res$group == currentGroup)]
      dend <- color_labels(dend, col = color, labels = labels(dend)[which(hc$order %in% currentVar)])
      colorGroup <- c(colorGroup, rep(index, length(currentVar)))
      allVar <- c(allVar, currentVar)
      index <- index+1
    }
  }
  res$color <- colorGroup
  res$Allvar <- allVar
  res$dend <- dend
  return(res)
}

## -------------------------------- Section 3 --------------------------------

set.seed(42)
X <- simuBlockGaussian(n = 50, nBlock = 12, sizeBlock = 5, rho = 0.7)
y <- X[, c(2, 7, 12)] %*% c(2, 2, -2) + rnorm(50, 0, 0.5)

res <- MLGL(X, y)
res.HMT <- HMT(res, X, y, control = "FWER", alpha = 0.05)
res.cv <- cv.MLGL(X, y)
res.stab <- stability.MLGL(X, y)

## Figure 3
pdf("fig3a.pdf")
plot(res)
dev.off()
pdf("fig3b.pdf")
plot(res.HMT)
dev.off()
pdf("fig3c.pdf")
plot(res.cv)
dev.off()
pdf("fig3d.pdf")
plot(res.stab)
dev.off()

res <- fullProcess(X, y)
res <- fullProcess(y ~ X)

seed <- 42
set.seed(seed)

# Data
data("gasoline", package = "pls")
gasNIR <- as.matrix(gasoline$NIR)
scaleGasNIR <- as.matrix(apply(gasNIR, 2, scale))
octane <- gasoline$octane

# Parameters
fractionSampleMLGL <- 0.5
method <- "average"
currentNB <- 50
description <- paste0("boostrap_", currentNB, "_gasoline_", seed, "_", method = method)

# MLGL steps
hc <- bootstrapHclust(scaleGasNIR, frac = 1, method = "average", B = 50)
groupWeight <- computeGroupSizeWeight(hc, sizeMax = 100)

# set seed to initial value to run MLGL on the same training/test samples
set.seed(seed)
res <- fullProcess(scaleGasNIR, gasoline$octane, hc = hc, fractionSampleMLGL = 0.5, weightSizeGroup = groupWeight)

summary(res)
plot(res)

# --------------------------------Figure 5 --------------------------------
resGasoline_average <- MLGLDend(res)

# Drawing figure 5
pdf(file = paste0(outDir, "/", description, "_wide_", round(fractionSampleMLGL, digits = 2), ".pdf"),
    width = 27.5, height = 15)
plot(resGasoline_average$dend)
rect(xleft = 0, ybottom = 3.30, xright = 401, ytop = 5.90, col = rgb(0, 255, 0, max = 255, alpha = 100), lty = 0)
rect(xleft = 0, ybottom = 6.60, xright = 401, ytop = 9.50, col = rgb(0, 0, 255, max = 255, alpha = 100), lty = 0)
rect(xleft = 0, ybottom = 1.04, xright = 401, ytop = 4.86, col = rgb(255, 0, 0, max = 255, alpha = 100), lty = 0)
dev.off()

# --------------------------------Figure 6 --------------------------------
if (length(resGasoline_average$var) != 0) {
  palette("default")
  matcor <- cor(scaleGasNIR[, resGasoline_average$var])
  pdf(file = paste0(outDir, "/boostrap_", currentNB, "_corrplot_MLGL_", seed, "_", method, round(fractionSampleMLGL, digits = 2), ".pdf"),
      width = 11, height = 11)
  corrplot::corrplot(matcor, tl.col = resGasoline_average$color)
  dev.off()
}

# --------------------------------Figure 7 --------------------------------

# Parameters
nb <- c(20, 50, 100, 300)
nDraw <- 100
varIndices <- seq(1:ncol(gasoline$NIR))
fullResultsAll <- list()
varSumAll <- list()

# Computing
for (currentNB in nb) {
  fullResults <- list()
  for (seed in 1:nDraw) {
    cat(paste0("current nb = ", currentNB, "\n"))
    cat(paste0("current seed = ", seed, "\n"))
    set.seed(seed)
    description <- paste0("boostrap_", currentNB, "_gasoline_", seed, "_", method = method)
    hc <- MLGL:::bootstrapHclust(scaleGasNIR, frac = 1, method = method, B = currentNB)
    groupWeight <- MLGL:::computeGroupSizeWeight(hc, sizeMax = 100)
    res <- MLGL::fullProcess(as.matrix(scaleGasNIR), gasoline$octane, hc = hc,
                             fractionSampleMLGL = fractionSampleMLGL, weightSizeGroup = groupWeight)
    resGasoline <- MLGLDend(res)
    fullResults[[seed]] <- resGasoline
  }
  fullResultsAll[[currentNB]] <- fullResults
  currentDf <- as.data.frame(lapply(fullResults, function(x) varIndices %in% x$var))
  colnames(currentDf) <- seq(1:nDraw)
  varSum <- apply(currentDf, MARGIN = 1, FUN = sum)
  varSumAll[[currentNB]] <- varSum
}

# Drawing figure 7
pdf(file = paste0(outDir, "/bootstrap_selection_", method, ".pdf"), width = 14, height = 7)
matplot(x = seq(900, 1700, 2), y = t(gasNIR), type = "l",
        xlab = "Wavelength", ylab = "Selection rate",
        col = "darkgray", xlim = c(900, 1700), ylim = c(0, 1.4),
        main = "Selection rates of variables (wavelength) for several value of B")
points(x = 900 + 2 * (rep(1:401)), y = varSumAll[[20]] / 100, col = "green")
points(x = 900 + 2 * (rep(1:401)), y = varSumAll[[50]] / 100, col = "black")
points(x = 900 + 2 * (rep(1:401)), y = varSumAll[[100]] / 100, col = "blue")
points(x = 900 + 2 * (rep(1:401)), y = varSumAll[[300]] / 100, col = "red")
legend(
  x = "topleft", c(
    "NIR spectra",
    paste0("Selected variables, B=20"),
    paste0("Selected variables, B=50"),
    paste0("Selected variables, B=100"),
    paste0("Selected variables, B=300")
  ),
  col = c("darkgray", "green", "black", "blue", "red"), lty = c(1, 0, 0, 0, 0, 0, 0),
  pch = c(NA, 1, 1, 1, 1, 1, 1), bty = "n"
)

dev.off()

# ------------------------------- Figure 11 --------------------------------- 

d <- dist(t(scaleGasNIR))
hcAll <- fastcluster::hclust(d, method = method)

# ## CGL Method
resCSGLasso <- CRLasso(scaleGasNIR[1:(nrow(scaleGasNIR)-1), ], gasoline$octane[1:(nrow(scaleGasNIR)-1)],
                       hc = hcAll, method = "standglasso")
selectedCSGVar <- which(resCSGLasso$CRL$beta[, which.min(resCSGLasso$CRLcv$cv-resCSGLasso$CRLcv$cvsd)][-1] != 0)
# 

# ## CRL Method
resCRLasso <- CRLasso(scaleGasNIR, gasoline$octane, hc = hcAll, method = "lasso")
selectedCRLasso <- which(resCRLasso$CRL$beta[, which.min(resCRLasso$CRLcv$cvlm)] != 0)
selectedCRVar <- which(resCRLasso$group %in% selectedCRLasso)

# 
# ## HCAR Method
resAvg <- avg(gasNIR, gasoline$octane, hc = hcAll, K = 10, s = c("lambda.1se", "lambda.min"))
selectedGroupAvg <- which(resAvg$beta != 0)
selectedAvgVar <- which(resAvg$group %in% selectedGroupAvg)
# 
# ## Lasso Method
resLasso <- cv.glmnet(scaleGasNIR, gasoline$octane)
selectedLassoVar <- which(resLasso$glmnet.fit$beta[, which.min(resLasso$cvm)] != 0)

# ########## Naive group lasso Method
# 
# # cut at biggest jump
p <- ncol(scaleGasNIR)
ngroup <- p - which.max(diff(hcAll$height))
group <- cutree(hcAll, ngroup)
# # gglasso need consecutive number in group
groupord <- order(group)
group2 <- group[groupord]
resgg.cv <- cv.gglasso(scaleGasNIR[,groupord], gasoline$octane, group2, nfolds = 10)
selectedGroupLassoVar <- which(resgg.cv$gglasso.fit$beta[order(groupord), ][, which.min(resgg.cv$cvm)] != 0)

# ## plot spectra and selected variables by several method
pdf(file = "bootstrap_selected_42_average.pdf", width = 14, height = 7)
matplot(x = seq(900, 1700, 2), y = t(gasNIR), type = "l", xlab = "Wavelength",
        ylab = "NIR Value", col = "darkgray", xlim = c(900, 1700),
        ylim = c(0, 1.5), main = "Selected variables by several methods")
points(x = 900 + 2 * (selectedLassoVar - 1), y = rep(0.60, length(selectedLassoVar)), col = "black")
points(x = 900 + 2 * (resGasoline_average$var - 1), y = rep(0.65, length(resGasoline_average$var)), col = "blue")
points(x = 900 + 2 * (selectedCSGVar - 1), y = rep(0.7, length(selectedCSGVar)), col = "red")
points(x = 900 + 2 * (selectedCRVar - 1), y = rep(0.75, length(selectedCRVar)), col = "yellow")
points(x = 900 + 2 * (selectedAvgVar - 1), y = rep(0.8, length(selectedAvgVar)), col = "green")
points(x = 900 + 2 * (selectedGroupLassoVar - 1), y = rep(0.85, length(selectedGroupLassoVar)), col = "pink")
legend(x = "topleft",
       c("NIR spectra", paste0("Selected variables, Lasso : ", length(selectedLassoVar), " variables"),
         paste0("Selected variables, MLGL : ", length(resGasoline_average$var), " variables"),
         paste0("Selected variables, CGL : ", length(selectedCSGVar), " variables"),
         paste0("Selected variables, CRL: ", length(selectedCRVar), " variables"),
         paste0("Selected variables, HCAR : ", length(selectedAvgVar), " variables"),
         paste0("Selected variables, Group Lasso : ", length(selectedGroupLassoVar), " variables")),
       col = c("darkgray", "black", "blue", "red", "yellow", "green", "pink"), 
       lty = c(1, 0, 0, 0, 0, 0, 0), pch = c(NA, 1, 1, 1, 1, 1, 1), bty = "n")
dev.off() 


