set.seed(4419) library("BiDAG") ### MAP Discovery # Load data data("gsim100", package = "BiDAG") # Initialize score object score100 <- scoreparameters("bge", gsim100) ## MAP DAG # Base DAG using the PC algorithm basefit <- learnBN(scorepar = score100, algorithm = "order", plus1 = FALSE) getMCMCscore(basefit) # True DAG data("gsimmat", package = "BiDAG") DAGscore(scorepar = score100, incidence = gsimmat) # PC vs True DAG compareDAGs(getDAG(basefit), gsimmat, cpdag = TRUE)[c("TPR", "FPRn", "SHD")] # Iterative MCMC iterativefit <- learnBN(score100, algorithm = "orderIter", scoreout = TRUE, verbose = FALSE) summary(iterativefit) plot(iterativefit) # Assess performance of iterative fit it100 <- itercomp(iterativefit, gsimmat) plot(it100, vars = c("score", "TPR"), showit = c(1:6)) plot(it100, vars = c("FP", "SHD"), col = 2, showit = c(1:6)) ### Sampling from the posterior iterSpace <- getSpace(iterativefit) orderfit <- sampleBN(score100, algorithm = "order", scoretable = iterSpace) plot(orderfit) # Repeat MCMC to assess convergence orderfit2 <- sampleBN(score100, algorithm = "order", scoretable = iterSpace) epd <- lapply(list(orderfit, orderfit2), edgep, pdag = TRUE) plotpcor(epd, xlab = "run 1", ylab = "run 2") ## Assess convergence of partition MCMC partitionfit <- sampleBN(score100, algorithm = "partition", scoretable = iterSpace) partitionfit2 <- sampleBN(score100, algorithm = "partition", scoretable = iterSpace) epd2 <- lapply(list(partitionfit, partitionfit2), edgep, pdag = TRUE) plotpcor(epd2, xlab = "run 1", ylab = "run 2") # Compare DAGs with a certain posterior probability to true DAG samplecomp(orderfit, gsimmat, pdag = TRUE, p = c(0.5, 0.7, 0.9, 0.95)) ## Rerun comparison for a simulated dataset with N = 1000 data("gsim", package = "BiDAG") score1k <- scoreparameters("bge", gsim) iterativefit1k <- learnBN(score1k, algorithm = "orderIter", scoreout = TRUE, verbose = FALSE) iterSpace1k <- getSpace(iterativefit1k) orderfit1k <- sampleBN(score1k, algorithm = "order", scoretable = iterSpace1k) samplecomp(orderfit1k, gsimmat, pdag = TRUE, p = c(0.5, 0.7, 0.9, 0.95)) # MAP compareDAGs(getDAG(iterativefit), gsimmat, cpdag = TRUE) compareDAGs(getDAG(iterativefit1k), gsimmat, cpdag = TRUE) ### Learning DBNs set.seed(4419) data("DBNdata", package = "BiDAG") data("DBNmat", package = "BiDAG") DBNscore <- scoreparameters("bge", DBNdata, DBN = TRUE, dbnpar = list(samestruct = TRUE, slices = 5, b = 3)) DBNfit <- learnBN(DBNscore, algorithm = "orderIter", verbose = FALSE, scoreout = TRUE) plotdiffsDBN(getDAG(DBNfit), DBNmat, "trans", b = 3) ### Application set.seed(4419) data(kirc, package = "BiDAG") data(kirp, package = "BiDAG") data(interactions, package = "BiDAG") data(mapping, package = "BiDAG") # Construct prior matrix from a list of interactions geneNames <- colnames(kirp) edgepmat <- string2mat(geneNames, interactions, mapping, type = "pf", pf = 2) # Initialize score objects KIRPscore <- scoreparameters("bde", kirp, bdepar = list(chi = 0.5, edgepf = 16), edgepmat = edgepmat) KIRCscore <- scoreparameters("bde", kirc, bdepar = list(chi = 0.5, edgepf = 16), edgepmat = edgepmat) # MAP models itKirp <- learnBN(KIRPscore, algorithm = "orderIter", verbose = FALSE, scoreout = TRUE) itKirc <- learnBN(KIRCscore, algorithm = "orderIter", verbose = FALSE, scoreout = TRUE) # Extract MAP equivalence classes kircMAP <- getDAG(itKirc, cp = TRUE) kirpMAP <- getDAG(itKirp, cp = TRUE) # Joint MAP graph plot2in1(kirpMAP, kircMAP, fontsize = 20, name1 = "KIRP", name2 = "KIRC", cex.main = 1.5, main="") # Extract search spaces kirpSpace<-getSpace(itKirp) kircSpace<-getSpace(itKirc) # Run pairs of MCMC chains for convergence diagnostics partKirp <- list() partKirp[[1]]<-sampleBN(KIRPscore, algorithm = "partition", scoretable = kirpSpace) partKirp[[2]]<-sampleBN(KIRPscore, algorithm = "partition", scoretable = kirpSpace) partKirc <- list() partKirc[[1]]<-sampleBN(KIRCscore, algorithm = "partition", scoretable = kircSpace) partKirc[[2]]<-sampleBN(KIRCscore, algorithm = "partition", scoretable = kircSpace) # Posterior probabilities of single edges epKirc <- lapply(partKirc, edgep, pdag = TRUE) epKirp <- lapply(partKirp, edgep, pdag = TRUE) # Concordance visualization plotpcor(epKirc, xlab = "run 1", ylab = "run 2", cex.lab = 1.5, cex.axis = 1.5, main = "KIRC") plotpcor(epKirp, xlab = "run 1", ylab = "run 2", cex.lab = 1.5, cex.axis = 1.5, main = "KIRP") # Posterior traces plotpedges(partKirc[[1]], pdag = TRUE, cut = 0, highlight = itKirc$CPDAG, cex.lab = 1.5, cex.axis = 1.5, main = "KIRC") plotpedges(partKirp[[1]], pdag = TRUE, cut = 0, highlight = itKirp$CPDAG, cex.lab = 1.5, cex.axis = 1.5, main = "KIRP") # Consensus graphs consKirp <- modelp(partKirp[[1]], p = 0.5, pdag = TRUE) consKirc <- modelp(partKirc[[1]], p = 0.5, pdag = TRUE) # Union of connected nodes in two models mutKIRP <- unique(c(colnames(connectedSubGraph(consKirp)), colnames(connectedSubGraph(kirpMAP)))) # Visualize comparison between KIRP MAP and consensus models plotdiffs(getSubGraph(kirpMAP, mutKIRP), getSubGraph(consKirp, mutKIRP), name1 = "KIRP MAP", name2 = "KIRP p>0.5", estimated = FALSE, main = "", cex.main = 1.5, fontsize = 20) # Compare KIRC MAP and consensus models compareDAGs(consKirc, kircMAP) # Compare KIRP and KIRC grpahs plot2in1(consKirp, consKirc, fontsize = 20, name1 = "KIRP", name2 = "KIRC") ### Runtime set.seed(4419) # Number of nodes in network n = 100 # Number of observations N = 1000 # Max number of parents npar<-c(1, 3, 5, 7, 9, 10, 12, 14, 16, 18, 20) # Generate DAG myDAG <- pcalg::randomDAG(n, 3/n, lB = 0.4, uB = 2) # Generate data dta <- pcalg::rmvDAG(N, myDAG) # Empty search space ss0 <- matrix(0, nrow = n, ncol = n) # Simulate search spaces with K parents ss <- list() K <- 20 for(i in 1:K) { ss[[i]] <- ss0 ss[[i]][2:(i+1), 1] <- 1 } # Initialize score object param <- scoreparameters("bge", dta, bgnodes = c(3:n)) # Data frame to store results K <- c(npar, npar[1:8], npar[1:6]) algorithm <- c(rep("order", 19), rep("partition", 6)) plus1 <- c(rep(FALSE, 11), rep(TRUE, 8), rep("", 6)) hardlim <- c(rep(20, 11), rep(14, 8), rep(11, 6)) timeK<-data.frame(K, algorithm, plus1, hardlim) # Run simulation for(i in 1:nrow(timeK)){ samplefit <- sampleBN(param, algorithm = timeK[i,2], plus1 = timeK[i,3], startspace = ss[[timeK[i,1]]], iterations = 1000, stepsave = 500, hardlimit = timeK[i,4]) timeK$'time (sec)'[i]<-as.numeric(getRuntime(samplefit, 1)) } timeK$scheme <- paste(timeK$algorithm, timeK$plus1, sep=" ") # R package ggplot2 is needed for plotting library(ggplot2) lab_H <- c(expression(paste("order MCMC, ", H)), expression(paste("order MCMC, ", H^{'+'})), expression(paste("partition MCMC, ", H^{'+'}))) ggplot(timeK, aes(K, `time (sec)`, col = scheme)) + geom_point() + geom_line() + scale_color_discrete(labels = lab_H) + scale_shape_discrete(labels = lab_H) sessionInfo()