###############################################################################
###############################################################################
## 
## Description:
## Replication script for: Peeters, C.F.W., Bilgrau, A.E., & van Wieringen, W.N.
## 'rags2ridges: A One-Stop-L2-Shop for Graphical Modeling of High-Dimensional 
##  Precision Matrices'
##
## Setup:
## The structure of the script follows the structure of the related article.
## The preliminaries cover invoking the package and accessing the data.
## Section I covers the extraction, visualization and analysis of single 
## networks. Section II covers the joint extraction, visualization and 
## analysis of multiple networks.
##
###############################################################################
###############################################################################

###############################################################################
###############################################################################
## 
## Preliminaries
##
###############################################################################
###############################################################################

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Invoke package**
#' **------------------------------------------------------------------------**

## Needed package
library("rags2ridges")

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 1: Get acquainted with the data (Preamble Section 3)**
#' **------------------------------------------------------------------------**

## Data packaged as 3 data objects in ADdata object
data("ADdata", package = "rags2ridges")

## To probe objects one could use:
# - objects()
# - head(ADmetabolites)
# - head(sampleInfo)
# - colnames(variableInfo); table(variableInfo)

###############################################################################
###############################################################################
## 
## Code Section 1: Single Network
## Corresponding to Section 3.1 of the text
##
###############################################################################
###############################################################################

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 2: Extract and scale data for AD Class 2**
#' **------------------------------------------------------------------------**

ADclass2 <- ADmetabolites[, sampleInfo$ApoEClass == "Class 2"]
ADclass2 <- scale(t(ADclass2))

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 3: Precision matrix estimation with given penalty value**
#' **------------------------------------------------------------------------**

P <- ridgeP(covML(ADclass2), lambda = .3)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 4: Find optimal regularized precision matrix**
#' **------------------------------------------------------------------------**

## Use kCVauto to find optimal regularized precision matrix
## Uses ridgeP() internally in search or optimal penalty value
set.seed(1234)
OPT <- optPenalty.kCVauto(
  ADclass2, 
  fold      = 10,
  lambdaMin = 1e-07, 
  lambdaMax = 20,
  target    = default.target(covML(ADclass2), type = "DUPV"))

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 5: Have a look at optimal penalty**
#' **------------------------------------------------------------------------**

OPT$optLambda

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 6: Assess Conditioning (Figure 3)**
#' **------------------------------------------------------------------------**

## Assess conditioning of optimal precision using CN plot
CNplot(covML(ADclass2), 
       lambdaMin = 1e-07, 
       lambdaMax = 20, 
       step      = 5000,
       target    = default.target(covML(ADclass2), type = "DUPV"),
       Iaids     = TRUE,
       vertical  = TRUE,
       value     = OPT$optLambda,
       verbose   = FALSE)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 6: Extract Network**
#' **------------------------------------------------------------------------**

## Threshold the optimal precision matrix
## Retain those elements whose posterior probability of being present >= .999
P0 <- sparsify(OPT$optPrec,
               threshold = "localFDR",
               FDRcut    = .999,
               verbose   = FALSE)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 7: Heatmap of sparsified partial correlation matrix (Figure 4)**
#' **------------------------------------------------------------------------**

plot(P0$sparseParCor, diag = FALSE, textsize = 3)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 8: Visualize Network with calls to Ugraph (Figure 5)**
#' **------------------------------------------------------------------------**

opar <- par(mfrow = c(1, 3))
## Simple visualization
## Circular layout, no pruning
Ugraph(P0$sparseParCor, 
       type  = "fancy", 
       Vsize = 3, 
       Vcex  = .1)

## Pruning can possibly give more clear idea of structure
Ugraph(P0$sparseParCor, 
       type  = "fancy", 
       Vsize = 3, 
       Vcex  = .1, 
       prune = TRUE)

## Type of layout is important
## Layout with FR algorithm
set.seed(1567)
Coords <- Ugraph(P0$sparseParCor, 
                 type  = "fancy", 
                 lay   = "layout_with_fr", 
                 prune = TRUE, 
                 Vsize = 7, 
                 Vcex  = .3)
par(opar)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 9: Visualize Network with node-coloring (Figure 6)**
#' **------------------------------------------------------------------------**

## Colorings may add additional information
## Creating vector of colors (simple way) to indicate compound family
PcorP  <- pruneMatrix(P0$sparseParCor)
Colors <- rownames(PcorP)
Colors[grep("Amine", rownames(PcorP))]     <- "lightblue"
Colors[grep("Org.Acid", rownames(PcorP))]  <- "orange"
Colors[grep("Lip", rownames(PcorP))]       <- "yellow"
Colors[grep("Ox.Stress", rownames(PcorP))] <- "purple"

Ugraph(PcorP, 
       type   = "fancy", 
       lay    = NULL, 
       coords = Coords,
       Vcolor = Colors, 
       Vsize  = 7, 
       Vcex   = .3)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 10: Obtain simple network statistics**
#' **------------------------------------------------------------------------**

## Simple network statistics
## The warning can be ignored: standard warning when calculating closeness 
##     centrality for a disconnected graph
NwkSTATS <- GGMnetworkStats(PcorP)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 11: Looking at Node Metrics: degree**
#' **------------------------------------------------------------------------**

## Look at degree
head(sort(NwkSTATS$degree, decreasing = TRUE))

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 12: Looking at Node Metrics: betweenness**
#' **------------------------------------------------------------------------**

## Look at Betweennes Centrality
head(sort(NwkSTATS$betweenness, decreasing = TRUE))

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 13: Simple Network Analysis: Path Metrics (Figure 7)**
#' **------------------------------------------------------------------------**

## Find 2 strongest paths between Amines 1 and 3
## Determine if these are mediating or moderating paths
## The print out is not included in the paper
Paths <- GGMpathStats(P0$sparsePrecision, 
                      node1   = 1, 
                      node2   = 2, 
                      lay     = NULL, 
                      coords  = Coords,
                      nrPaths = 2, 
                      Vsize   = 4, 
                      Vcex    = .2, 
                      prune   = TRUE,
                      scale   = 2,
                      legend  = FALSE)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 14: Simple Network Analysis: Community Search (Figure 8)**
#' **------------------------------------------------------------------------**

## Find and visualize the communities for the extracted network
## Finding communities on the basis of betweenness-centrality
set.seed(24354)
Commy <- Communities(PcorP, Vcolor = Colors, Vsize = 7,  Vcex = .3)

## Peeking at memberships can be achieved by calling Commy$membership

###############################################################################
###############################################################################
## 
## Section 2: Multiple Networks
## Corresponding to Section 3.2 of the text
##
###############################################################################
###############################################################################

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 15: Lists of Class Data**
#' **------------------------------------------------------------------------**

## Subset
ADclass1 <- ADmetabolites[, sampleInfo$ApoEClass == "Class 1"]
ADclass2 <- ADmetabolites[, sampleInfo$ApoEClass == "Class 2"]

## Transpose and scale data
ADclass1 <- scale(t(ADclass1))
ADclass2 <- scale(t(ADclass2))

## Correlations for subsets
rAD1 <- cor(ADclass1)
rAD2 <- cor(ADclass2)

## Constructing list of correlation matrices
Rlist <- list(rAD1 = rAD1, rAD2 = rAD2)
samps <- c(nrow(ADclass1), nrow(ADclass2))

## Constructing list of target matrices and data
Tlist <- default.target.fused(Slist = Rlist, ns = samps, type = "DUPV")
Ylist <- list(AD1data = ADclass1, AD2data = ADclass2)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 16: Fused class-specific Precision Matrices**
#' **------------------------------------------------------------------------**

## Finding optimal penalty parameters and precision matrices
## Will take a few minutes
set.seed(8910)
OPTf <- optPenalty.fused(
  Ylist     = Ylist, 
  Tlist     = Tlist, 
  lambda    = as.matrix(cbind(c("ridge1", "fusion"),c("fusion", "ridge2"))),
  cv.method = "kCV",
  k         = 10,
  verbose   = FALSE)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 17: Peeking at optimal penalty-values**
#' **------------------------------------------------------------------------**

OPTf$lambda.unique

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 18: Extract Networks**
#' **------------------------------------------------------------------------**

## Threshold the optimal precision matrices
## Retain those elements whose posterior probability of being present >= .999
P0s <- sparsify.fused(OPTf$Plist, 
                      threshold = "localFDR", 
                      FDRcut    = .999,
                      verbose   = FALSE)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 19: Visualize Networks (Figure 9)**
#' **------------------------------------------------------------------------**

## Visualizing classes with same coordinates helps (visual) comparison
## First, need to retain union of features over class-networks
TST <- Union(P0s$AD1data$sparseParCor, P0s$AD2data$sparseParCor)

## Convenience, rename sparsified partial correlation matrices over union
PCclass1 <- TST$M1subset
PCclass2 <- TST$M2subset

## Set coloring
Colors <- rownames(PCclass2)
Colors[grep("Amine", rownames(PCclass2))]     <- "lightblue"
Colors[grep("Org.Acid", rownames(PCclass2))]  <- "orange"
Colors[grep("Lip", rownames(PCclass2))]       <- "yellow"
Colors[grep("Ox.Stress", rownames(PCclass2))] <- "purple"

set.seed(111213)
opar <- par(mfrow = c(1, 3))
## Visualize first network, retain coordinates
Coords <- Ugraph(PCclass2, 
                 type   = "fancy", 
                 lay    = "layout_with_fr", 
                 Vcolor = Colors, 
                 prune  = FALSE, 
                 Vsize  = 7, 
                 Vcex   = .3,
                 main   = "AD Class 2")

## Visualize second network with retained coordinates
## Printed output refers to the node coordinates
Ugraph(PCclass1, 
       type   = "fancy",
       lay    = NULL,
       coords = Coords,
       Vcolor = Colors, 
       prune  = FALSE, 
       Vsize  = 7, 
       Vcex   = .3,
       main   = "AD Class 1")

## Visualize the edges unique to each class
DiffGraph(PCclass1, PCclass2,
          lay    = NULL,
          coords = Coords,
          Vcolor = Colors, 
          Vsize  = 7, 
          Vcex   = .3,
          main   = "Differential Network")
par(opar)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 20: Simple Network Analysis: Node metrics**
#' **------------------------------------------------------------------------**

## Make list of sparse Partial correlation (or precision) matrices
PC0list <- list(PCclass1 = PCclass1, PCclass2 = PCclass2)

## GGMnetworks.fused
## The warning can be ignored: standard warning when calculating closeness 
##     centrality for a disconnected graph
NwkSTATSList <- GGMnetworkStats.fused(PC0list)

## Compare top degree centralities
DegreesAD1  <- data.frame(rownames(NwkSTATSList), 
                          NwkSTATSList$PCclass1.degree)
DegreesAD2  <- data.frame(rownames(NwkSTATSList), 
                          NwkSTATSList$PCclass2.degree)
DegreesAD1o <- DegreesAD1[order(DegreesAD1[, 2], decreasing = TRUE), ]
DegreesAD2o <- DegreesAD2[order(DegreesAD2[, 2], decreasing = TRUE), ]
head(DegreesAD1o, 7)
head(DegreesAD2o, 7)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 21: Simple Network Analysis: Communities (Figure 10)**
#' **------------------------------------------------------------------------**

## Finding feature-communities on the basis of betweenness-centrality
set.seed(141516)
opar <- par(mfrow = c(1, 2))
CommyC1 <- Communities(PCclass1, Vcolor = Colors, Vsize = 7,  Vcex = .3,
                       main = "Modules AD Class 1")
CommyC2 <- Communities(PCclass2, Vcolor = Colors, Vsize = 7,  Vcex = .3,
                       main = "Modules AD Class 2")
par(opar)

## Peeking at memberships can be done through:
#- CommyC1$membership
#- CommyC2$membership

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 22: Plotting degree distributions (Figure 11)**
#' **------------------------------------------------------------------------**

## Testing closely corresponds to assessing global network properties
## Plot degree distributions
plot(density(DegreesAD1[, 2]), col = "red",  xlim = c(-1, 8),
     xlab = "Degree", main = "")
lines(density(DegreesAD2[, 2]), col = "blue")
legenda <- c("AD class 1", "AD class 2")
legend(5, .5, legend = legenda, lwd = rep(1, 2), lty = rep(1, 2), 
       col = c("red", "blue"), cex = .9)

#'#############################################################################
#'#############################################################################
#' **------------------------------------------------------------------------**
#' **Chunk 23: Simple Network Analysis: Testing degree distributions**
#' **------------------------------------------------------------------------**

## Testing degree distributions with dependent 2-group Wilcoxon Signed Rank Test
wilcox.test(DegreesAD1[, 2], DegreesAD2[, 2], paired = TRUE,
            alternative = "less")