set.seed(4419)
library("BiDAG")

### MAP Discovery
# Load data
data("gsim100", package = "BiDAG")

# Initialize score object
score100 <- scoreparameters("bge", gsim100)

## MAP DAG
# Base DAG using the PC algorithm
basefit <- learnBN(scorepar = score100, algorithm = "order", plus1 = FALSE)
getMCMCscore(basefit)

# True DAG
data("gsimmat", package = "BiDAG")
DAGscore(scorepar = score100, incidence = gsimmat)

# PC vs True DAG
compareDAGs(getDAG(basefit), gsimmat, cpdag = TRUE)[c("TPR", "FPRn", "SHD")]

# Iterative MCMC
iterativefit <- learnBN(score100, algorithm = "orderIter", scoreout = TRUE, verbose = FALSE)
summary(iterativefit)
plot(iterativefit)

# Assess performance of iterative fit
it100 <- itercomp(iterativefit, gsimmat)
plot(it100, vars = c("score", "TPR"), showit = c(1:6))
plot(it100, vars = c("FP", "SHD"), col = 2, showit = c(1:6))

### Sampling from the posterior
iterSpace <- getSpace(iterativefit)
orderfit <- sampleBN(score100, algorithm = "order", scoretable = iterSpace)
plot(orderfit)

# Repeat MCMC to assess convergence
orderfit2 <- sampleBN(score100, algorithm = "order", scoretable = iterSpace)
epd <- lapply(list(orderfit, orderfit2), edgep, pdag = TRUE)
plotpcor(epd, xlab = "run 1", ylab = "run 2")

## Assess convergence of partition MCMC
partitionfit <- sampleBN(score100, algorithm = "partition", scoretable = iterSpace)
partitionfit2 <- sampleBN(score100, algorithm = "partition", scoretable = iterSpace)
epd2 <- lapply(list(partitionfit, partitionfit2), edgep, pdag = TRUE)
plotpcor(epd2, xlab = "run 1", ylab = "run 2")

# Compare DAGs with a certain posterior probability to true DAG
samplecomp(orderfit, gsimmat, pdag = TRUE, p = c(0.5, 0.7, 0.9, 0.95))

## Rerun comparison for a simulated dataset with N = 1000
data("gsim", package = "BiDAG")
score1k <- scoreparameters("bge", gsim)
iterativefit1k <- learnBN(score1k, algorithm = "orderIter", scoreout = TRUE, verbose = FALSE)
iterSpace1k <- getSpace(iterativefit1k)
orderfit1k <- sampleBN(score1k, algorithm = "order", scoretable = iterSpace1k)
samplecomp(orderfit1k, gsimmat, pdag = TRUE, p = c(0.5, 0.7, 0.9, 0.95))

# MAP
compareDAGs(getDAG(iterativefit), gsimmat, cpdag = TRUE)
compareDAGs(getDAG(iterativefit1k), gsimmat, cpdag = TRUE)


### Learning DBNs
set.seed(4419)
data("DBNdata", package = "BiDAG")
data("DBNmat", package = "BiDAG")
DBNscore <- scoreparameters("bge", DBNdata,  DBN = TRUE, dbnpar = list(samestruct = TRUE, slices = 5, b = 3))
DBNfit <- learnBN(DBNscore, algorithm = "orderIter", verbose = FALSE, scoreout = TRUE)
plotdiffsDBN(getDAG(DBNfit), DBNmat, "trans", b = 3)


### Application
set.seed(4419)
data(kirc, package = "BiDAG")
data(kirp, package = "BiDAG")
data(interactions, package = "BiDAG")
data(mapping, package = "BiDAG")

# Construct prior matrix from a list of interactions
geneNames <- colnames(kirp)
edgepmat <- string2mat(geneNames, interactions, mapping, type = "pf", pf = 2)

# Initialize score objects
KIRPscore <- scoreparameters("bde", kirp, bdepar = list(chi = 0.5, edgepf = 16), edgepmat = edgepmat)
KIRCscore <- scoreparameters("bde", kirc, bdepar = list(chi = 0.5, edgepf = 16), edgepmat = edgepmat)

# MAP models
itKirp <- learnBN(KIRPscore, algorithm = "orderIter", verbose = FALSE, scoreout = TRUE)
itKirc <- learnBN(KIRCscore, algorithm = "orderIter", verbose = FALSE, scoreout = TRUE)

# Extract MAP equivalence classes
kircMAP <- getDAG(itKirc, cp = TRUE)
kirpMAP <- getDAG(itKirp, cp = TRUE)

# Joint MAP graph
plot2in1(kirpMAP, kircMAP, fontsize = 20, name1 = "KIRP", name2 = "KIRC", cex.main = 1.5, main="")

# Extract search spaces
kirpSpace<-getSpace(itKirp)
kircSpace<-getSpace(itKirc)

# Run pairs of MCMC chains for convergence diagnostics
partKirp <- list()
partKirp[[1]]<-sampleBN(KIRPscore, algorithm = "partition", scoretable = kirpSpace)
partKirp[[2]]<-sampleBN(KIRPscore, algorithm = "partition", scoretable = kirpSpace)

partKirc <- list()
partKirc[[1]]<-sampleBN(KIRCscore, algorithm = "partition", scoretable = kircSpace)
partKirc[[2]]<-sampleBN(KIRCscore, algorithm = "partition", scoretable = kircSpace)

# Posterior probabilities of single edges
epKirc <- lapply(partKirc, edgep, pdag = TRUE)
epKirp <- lapply(partKirp, edgep, pdag = TRUE)

# Concordance visualization
plotpcor(epKirc, xlab = "run 1", ylab = "run 2", cex.lab = 1.5, cex.axis = 1.5, main = "KIRC")
plotpcor(epKirp, xlab = "run 1", ylab = "run 2", cex.lab = 1.5, cex.axis = 1.5, main = "KIRP")

# Posterior traces
plotpedges(partKirc[[1]], pdag = TRUE, cut = 0, highlight = itKirc$CPDAG, cex.lab = 1.5, cex.axis = 1.5, main = "KIRC")
plotpedges(partKirp[[1]], pdag = TRUE, cut = 0, highlight = itKirp$CPDAG, cex.lab = 1.5, cex.axis = 1.5, main = "KIRP")

# Consensus graphs
consKirp <- modelp(partKirp[[1]], p = 0.5, pdag = TRUE)
consKirc <- modelp(partKirc[[1]], p = 0.5, pdag = TRUE)

# Union of connected nodes in two models
mutKIRP <- unique(c(colnames(connectedSubGraph(consKirp)), colnames(connectedSubGraph(kirpMAP))))

# Visualize comparison between KIRP MAP and consensus models
plotdiffs(getSubGraph(kirpMAP, mutKIRP),
          getSubGraph(consKirp, mutKIRP),
          name1 = "KIRP MAP",
          name2 = "KIRP p>0.5",
          estimated = FALSE,
          main = "",
          cex.main = 1.5,
          fontsize = 20)

# Compare KIRC MAP and consensus models
compareDAGs(consKirc, kircMAP)

# Compare KIRP and KIRC grpahs
plot2in1(consKirp, consKirc, fontsize = 20, name1 = "KIRP", name2 = "KIRC")

### Runtime
set.seed(4419)
# Number of nodes in network
n = 100

# Number of observations
N = 1000

# Max number of parents
npar<-c(1, 3, 5, 7, 9, 10, 12, 14, 16, 18, 20)

# Generate DAG
myDAG <- pcalg::randomDAG(n, 3/n, lB = 0.4, uB = 2)

# Generate data
dta <- pcalg::rmvDAG(N, myDAG)

# Empty search space
ss0 <- matrix(0, nrow = n, ncol = n)

# Simulate search spaces with K parents
ss <- list()
K <- 20
for(i in 1:K) {
  ss[[i]] <- ss0
  ss[[i]][2:(i+1), 1] <- 1
}

# Initialize score object
param <- scoreparameters("bge", dta, bgnodes = c(3:n))

# Data frame to store results
K <- c(npar, npar[1:8], npar[1:6])
algorithm <- c(rep("order", 19), rep("partition", 6))
plus1 <- c(rep(FALSE, 11), rep(TRUE, 8), rep("", 6))
hardlim <- c(rep(20, 11), rep(14, 8), rep(11, 6))
timeK<-data.frame(K, algorithm, plus1, hardlim)

# Run simulation
for(i in 1:nrow(timeK)){
  samplefit <- sampleBN(param, algorithm = timeK[i,2], plus1 = timeK[i,3], startspace = ss[[timeK[i,1]]], iterations = 1000, stepsave = 500, hardlimit = timeK[i,4])
  timeK$'time (sec)'[i]<-as.numeric(getRuntime(samplefit, 1))
}

timeK$scheme <- paste(timeK$algorithm, timeK$plus1, sep=" ")

# R package ggplot2 is needed for plotting
library(ggplot2)
lab_H <- c(expression(paste("order MCMC, ", H)), expression(paste("order MCMC, ", H^{'+'})), expression(paste("partition MCMC, ", H^{'+'})))

ggplot(timeK, aes(K, `time (sec)`, col = scheme)) + geom_point() + geom_line() + scale_color_discrete(labels = lab_H) + scale_shape_discrete(labels = lab_H)

sessionInfo()