# supervised group lasso

SGLasso <- function(X, y, M = floor(nrow(X)/2), B = 50, K = 10, s = c("lambda.1se","lambda.min"), plot = FALSE)
{
  # Dimension
  n = nrow(X)
  p = ncol(X)
  
  #
  yb = y - mean(y)
  Xb = scale(X, center = TRUE, scale = TRUE)
  
  #### clustering
  require(cluster)
  resgap = clusGap(t(Xb), FUN = kmeans, K.max = M, B = B)
  
  nbGroup = which.max(resgap$Tab[,3])
  
  group = kmeans(t(Xb), nbGroup)$cluster
  
  
  if(plot)
    plot(resgap)
  
  ####  lasso for each group
  foldcv <- rep(1:K, ceiling(n/K))[1:n]
  foldcv = foldcv[sample(n)]
  
  
  require(glmnet)
  groupGL = c()
  varGL = c()

  for(i in 1:nbGroup)
  {
    groupi = which(group == i)
    if(length(groupi) == 1)
    {
      groupGL = c(groupGL, i)
      varGL = c(varGL, groupi)
    }
    else
    {
      res = cv.glmnet(Xb[, groupi], yb, foldid = foldcv, intercept = FALSE)
      varToKeep = which(coef(res, s)[-1]!=0)
      # Pour toujours avoir au moins une variable
      if(length(varToKeep) == 0)
      {
        varToKeep = which(res$glmnet.fit$beta[,which(res$glmnet.fit$df!=0)[1]]!=0)
      }
      groupGL = c(groupGL, rep(i, length(varToKeep)))
      varGL = c(varGL, groupi[varToKeep])
    } 
  } # end for lasso group  
  
  
  #### group-lasso part
  require(gglasso)
  res = cv.gglasso(Xb[,varGL], y, group = groupGL,nfolds = K, intercept = FALSE)
  beta = coef(res, s)[-1]

  return(list(sel = varGL[which(beta!=0)], group = group, groupGL = groupGL, varGL = varGL))
}
