## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#   Script to replicate the results presented in the manuscript: 
#   "BDgraph: An R Package for Bayesian Structure Learning in Graphical Models"
#   Authors: Reza Mohammadi (a.mohammadi@uva.nl) 
#            Ernst C. Wit   (e.c.wit@rug.nl)
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#  Section 2: User interface
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

if (!require("BDgraph")) {
    install.packages("BDgraph")
}

## Loading the "BDgraph" package
library("BDgraph")
    
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#  Section 3.1: Sampling from G-Wishart distribution, based on Algorithm 1
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

adj <- matrix(c(0, 0, 1, 0, 0, 0, 1, 0, 0), 3, 3)
adj   # adjacency matrix 
   
set.seed(1)

# Sampling from G-Wishart distribution with parameters b and D
sample <- rgwish(n = 1, adj = adj, b = 3, D = diag(3))
round(sample, 2)
     
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#  Section 5: An example on simulated data
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

set.seed(5)

# Generating multivariate Gaussian data based on a 'scale-free' graph structure
data.sim <- bdgraph.sim(n = 60, p = 8, graph = "scale-free", type = "Gaussian")  
round(head(data.sim $ data, 4), 2)
    
# Running the BDMCMC sampling algorithm for Gaussian graphical models (ggm) 
sample.bdmcmc <- bdgraph(data = data.sim, method = "ggm", algorithm = "bdmcmc", 
                         iter = 5000, save = TRUE, cores = 1)
   
## - - - - Figure 3 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
# Summary result of the BDMCMC algorithm
pdf("Figure_3.pdf", width = 7, height = 7)
summary(sample.bdmcmc) 
dev.off()  
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
   
# Running the RJMCMC sampling algorithm for Gaussian graphical models (ggm) 
sample.rjmcmc <- bdgraph(data = data.sim, method = "ggm", algorithm = "rjmcmc", 
                          iter = 5000, save = TRUE, cores = 1)
   
## - - - - Figure 4 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
# ROC plot to compare the performance of the BDMCMC and RJMCMC algorithms
pdf("Figure_4.pdf", width = 5, height = 5)
par(mar = c(3.9, 4.1, 1.9, 0.2))  
plotroc(data.sim, sample.bdmcmc, sample.rjmcmc, smooth = TRUE, label = FALSE)
legend("bottomright", c("BDMCMC", "RJMCMC"), lty = 1:2, col = c(1, 4), lwd = c(2, 2), cex = 1.5)
dev.off()  
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
   
# Comparing the performance of the BDMCMC and RJMCMC algorithms
compare(data.sim, sample.bdmcmc, sample.rjmcmc, main = c("Target", "BDMCMC", "RJMCMC") )
   
## - - - - Figure 5 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
# Monitoring the convergence of the both algorithms
pdf("Figure_5.pdf", width = 10, height = 4.5)
op <- par(mfrow = c(1, 2), mar = c(4, 4.1, 2.1, 1.5))
plotcoda(sample.bdmcmc)      # for BDMCMC algorithm  
plotcoda(sample.rjmcmc)      # for RJMCMC algorithm 
par(op)   
dev.off()  
   
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#  Section 6.1: Application to labor force survey data
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

data("surveyData", package = "BDgraph")     # Loading the labor force survey dataset
head(surveyData, 5)

## The data is stored in an integer matrix with the ordinal variables
## already having suitable scores assigned. Alternatively also a
## data.frame where ordinal variables are coded as factors could be
## bused and might be preferable.

set.seed(3)

# Running the BDMCMC sampling algorithm for Gaussian copula graphical models (gcgm) 
sample.bdmcmc <- bdgraph(data = surveyData, method = "gcgm", iter = 10000, burnin = 7000, cores = 1)      
summary(sample.bdmcmc)
      
## - - - - Figure 6 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
set.seed(6)

edge.label <- c("  +", "  +", "  +", "  -", "  +", "  -", "  -", "  +", "  +", "  +", "  -", "  -")

pdf("Figure_6.pdf", width = 10, height = 10)
par(mar = c(0, 0, 0, 0)) 
plot(sample.bdmcmc, layout = igraph::layout.fruchterman.reingold, vertex.color = "white", 
     edge.color = "gray50", edge.label = edge.label, edge.label.cex = 2, edge.label.color = "gray10", 
     vertex.size = 24, vertex.label.font = 2, vertex.label.color = "black", vertex.label.cex = 1.6, 
     vertex.label.dist = 0, margin = -0.05 , edge.arrow.width = 5)
dev.off()  
     
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#  Section 6.2: Application to human gene expression
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

data("geneExpression", package = "BDgraph")  # Loading the human gene expression dataset
dim(geneExpression)
    
## - - - - Figure 7 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
pdf("Figure_7.pdf", width = 10, height = 6)
op <- par(mfrow = c(2, 3), mar = c(5, 2.8, 0.3, 0.2))
for(i in 1 : 6)
    hist(geneExpression[ , i ], xlab = colnames(geneExpression)[ i ], ylim = c(0, 40), 
          xlim = c(5, 16), cex.lab = 2, cex.axis = 1.7, main = NULL, col = "grey90")
par(op) 
dev.off()  
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
    
set.seed(4)
# Running the BDMCMC sampling algorithm for Gaussian copula graphical models (gcgm) 
sample.bdmcmc <- bdgraph(data = geneExpression, method = "gcgm", g.prior = 0.1, 
                          iter = 10000, burnin = 7000, cores = 1)      
   
## - - - - Figure 8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
if (!require("Matrix")) {
    install.packages("Matrix")
}
library("Matrix") 
p_links <- Matrix(plinks(sample.bdmcmc)) 

pdf("Figure_8.pdf", width = 7, height = 7)
image(p_links + t(p_links), useAbs = F, col.regions = grey(seq(1, 0, length = 256)), 
       xlab = NULL, ylab = NULL, sub = NULL, lwd = 0.01, 
       main = "Posterior Probabilities of all Links")
dev.off()  
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
    
selected_graph <- select(sample.bdmcmc, cut = 0.5)
      
colnames(selected_graph) <- substr(colnames(geneExpression), 1, 7) 
    
# for removing nodes without links
nodes_size      <- apply(selected_graph + t(selected_graph), 2, sum)
nodes_size_zero <- which(nodes_size == 0)
selected_graph  <- selected_graph[ -nodes_size_zero, -nodes_size_zero ] 

## - - - - Figure 9 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
set.seed(9)
pdf("Figure_9.pdf", width = 10, height = 10)
par(mar = c(1.2, 1.2, 1.2, 1.2))   
plot.graph(selected_graph, layout = igraph::layout.fruchterman.reingold, edge.color = "gray20", 
            vertex.color = "red", vertex.size = 2, vertex.label.font = 1, 
            vertex.label.color = "gray50", vertex.label.dist = 0.15, 
            margin = -0.05 , edge.arrow.width = 5)
dev.off()  
    
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#  Appendix: Dealing with memory usage restriction 
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |

iter   <- 100000
burnin <- 50000
p      <- 100
    
graph  <- matrix(1, nrow = p, ncol = p) 
print((iter - burnin) * object.size(graph), units = "auto") 
# 3.7 Gb
    
string_graph <- paste(graph[ upper.tri(graph) ], collapse = "")
print((iter - burnin) * object.size(string_graph), units = "auto") 
# 241.1 Mb
       
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |