## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Script to replicate the results presented in the manuscript: # "BDgraph: An R Package for Bayesian Structure Learning in Graphical Models" # Authors: Reza Mohammadi (a.mohammadi@uva.nl) # Ernst C. Wit (e.c.wit@rug.nl) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Section 2: User interface ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | if (!require("BDgraph")) { install.packages("BDgraph") } ## Loading the "BDgraph" package library("BDgraph") ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Section 3.1: Sampling from G-Wishart distribution, based on Algorithm 1 ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | adj <- matrix(c(0, 0, 1, 0, 0, 0, 1, 0, 0), 3, 3) adj # adjacency matrix set.seed(1) # Sampling from G-Wishart distribution with parameters b and D sample <- rgwish(n = 1, adj = adj, b = 3, D = diag(3)) round(sample, 2) ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Section 5: An example on simulated data ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | set.seed(5) # Generating multivariate Gaussian data based on a 'scale-free' graph structure data.sim <- bdgraph.sim(n = 60, p = 8, graph = "scale-free", type = "Gaussian") round(head(data.sim $ data, 4), 2) # Running the BDMCMC sampling algorithm for Gaussian graphical models (ggm) sample.bdmcmc <- bdgraph(data = data.sim, method = "ggm", algorithm = "bdmcmc", iter = 5000, save = TRUE, cores = 1) ## - - - - Figure 3 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -| # Summary result of the BDMCMC algorithm pdf("Figure_3.pdf", width = 7, height = 7) summary(sample.bdmcmc) dev.off() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Running the RJMCMC sampling algorithm for Gaussian graphical models (ggm) sample.rjmcmc <- bdgraph(data = data.sim, method = "ggm", algorithm = "rjmcmc", iter = 5000, save = TRUE, cores = 1) ## - - - - Figure 4 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -| # ROC plot to compare the performance of the BDMCMC and RJMCMC algorithms pdf("Figure_4.pdf", width = 5, height = 5) par(mar = c(3.9, 4.1, 1.9, 0.2)) plotroc(data.sim, sample.bdmcmc, sample.rjmcmc, smooth = TRUE, label = FALSE) legend("bottomright", c("BDMCMC", "RJMCMC"), lty = 1:2, col = c(1, 4), lwd = c(2, 2), cex = 1.5) dev.off() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Comparing the performance of the BDMCMC and RJMCMC algorithms compare(data.sim, sample.bdmcmc, sample.rjmcmc, main = c("Target", "BDMCMC", "RJMCMC") ) ## - - - - Figure 5 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -| # Monitoring the convergence of the both algorithms pdf("Figure_5.pdf", width = 10, height = 4.5) op <- par(mfrow = c(1, 2), mar = c(4, 4.1, 2.1, 1.5)) plotcoda(sample.bdmcmc) # for BDMCMC algorithm plotcoda(sample.rjmcmc) # for RJMCMC algorithm par(op) dev.off() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Section 6.1: Application to labor force survey data ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | data("surveyData", package = "BDgraph") # Loading the labor force survey dataset head(surveyData, 5) ## The data is stored in an integer matrix with the ordinal variables ## already having suitable scores assigned. Alternatively also a ## data.frame where ordinal variables are coded as factors could be ## bused and might be preferable. set.seed(3) # Running the BDMCMC sampling algorithm for Gaussian copula graphical models (gcgm) sample.bdmcmc <- bdgraph(data = surveyData, method = "gcgm", iter = 10000, burnin = 7000, cores = 1) summary(sample.bdmcmc) ## - - - - Figure 6 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -| set.seed(6) edge.label <- c(" +", " +", " +", " -", " +", " -", " -", " +", " +", " +", " -", " -") pdf("Figure_6.pdf", width = 10, height = 10) par(mar = c(0, 0, 0, 0)) plot(sample.bdmcmc, layout = igraph::layout.fruchterman.reingold, vertex.color = "white", edge.color = "gray50", edge.label = edge.label, edge.label.cex = 2, edge.label.color = "gray10", vertex.size = 24, vertex.label.font = 2, vertex.label.color = "black", vertex.label.cex = 1.6, vertex.label.dist = 0, margin = -0.05 , edge.arrow.width = 5) dev.off() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Section 6.2: Application to human gene expression ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | data("geneExpression", package = "BDgraph") # Loading the human gene expression dataset dim(geneExpression) ## - - - - Figure 7 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -| pdf("Figure_7.pdf", width = 10, height = 6) op <- par(mfrow = c(2, 3), mar = c(5, 2.8, 0.3, 0.2)) for(i in 1 : 6) hist(geneExpression[ , i ], xlab = colnames(geneExpression)[ i ], ylim = c(0, 40), xlim = c(5, 16), cex.lab = 2, cex.axis = 1.7, main = NULL, col = "grey90") par(op) dev.off() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | set.seed(4) # Running the BDMCMC sampling algorithm for Gaussian copula graphical models (gcgm) sample.bdmcmc <- bdgraph(data = geneExpression, method = "gcgm", g.prior = 0.1, iter = 10000, burnin = 7000, cores = 1) ## - - - - Figure 8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -| if (!require("Matrix")) { install.packages("Matrix") } library("Matrix") p_links <- Matrix(plinks(sample.bdmcmc)) pdf("Figure_8.pdf", width = 7, height = 7) image(p_links + t(p_links), useAbs = F, col.regions = grey(seq(1, 0, length = 256)), xlab = NULL, ylab = NULL, sub = NULL, lwd = 0.01, main = "Posterior Probabilities of all Links") dev.off() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | selected_graph <- select(sample.bdmcmc, cut = 0.5) colnames(selected_graph) <- substr(colnames(geneExpression), 1, 7) # for removing nodes without links nodes_size <- apply(selected_graph + t(selected_graph), 2, sum) nodes_size_zero <- which(nodes_size == 0) selected_graph <- selected_graph[ -nodes_size_zero, -nodes_size_zero ] ## - - - - Figure 9 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -| set.seed(9) pdf("Figure_9.pdf", width = 10, height = 10) par(mar = c(1.2, 1.2, 1.2, 1.2)) plot.graph(selected_graph, layout = igraph::layout.fruchterman.reingold, edge.color = "gray20", vertex.color = "red", vertex.size = 2, vertex.label.font = 1, vertex.label.color = "gray50", vertex.label.dist = 0.15, margin = -0.05 , edge.arrow.width = 5) dev.off() ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | # Appendix: Dealing with memory usage restriction ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | iter <- 100000 burnin <- 50000 p <- 100 graph <- matrix(1, nrow = p, ncol = p) print((iter - burnin) * object.size(graph), units = "auto") # 3.7 Gb string_graph <- paste(graph[ upper.tri(graph) ], collapse = "") print((iter - burnin) * object.size(string_graph), units = "auto") # 241.1 Mb ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |