#####################################################################
#                                                                   #
#                   Replication material for                        # 
#     BSL: An R Package for Efficient Parameter Estimation for      #
#     Simulation-Based Models via Bayesian Synthetic Likelihood     #
#        Ziwen An, Leah F. South and Christopher Drovandi           #
#                                                                   #
#####################################################################

# Note: Some code chunks may take hours to run. Results (used in the
# paper) have been provided using the same parameter settings below to
# save running time.  Users may set run to TRUE to replicate the
# results and save them in a specified folder or to FALSE to only load
# the prepared results which can be found in folder
# "prepared_results":
run <- FALSE
folder <- "prepared_results"

# Install dependencies if not already installed
list.of.packages <- c("glasso", "ggplot2", "MASS", "mvtnorm", "copula", 
                      "whitening", "graphics", "gridExtra", "foreach", "coda", 
                      "Rcpp", "doRNG", "methods", "stringr", "Rcpp", 
                      "RcppArmadillo", "ggcorrplot", "doParallel")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])]
if (length(new.packages)) {
    install.packages(new.packages)
}

# Install the BSL package if necessary
if (!require("BSL")) {
    install.packages("BSL")
}

###################################################
### Preliminaries
###################################################
# install the packages if you have not already installed them
library("MASS")
library("BSL")
library("ggcorrplot")
library("gridExtra")
library("doParallel")
library("doRNG")

###################################################
### 3. Using the BSL package
###################################################
### 3.1. Description of the MA(2) example
###################################################
data("ma2", package = "BSL")

###################################################
### 3.2. The model object
###################################################
ma2Model <- newModel(fnSim = ma2_sim, fnSum = function(x) x, 
  simArgs = list(T = 50), theta0 = c(0.6, 0.2), 
  fnLogPrior = ma2_prior)

ma2_sim
ma2_sim_vec
ma2_prior

###################################################
### 3.3. The main function
###################################################
if (run) {
    set.seed(1)
    resultMa2BSL <- bsl(y = ma2$data, n = 500, M = 100000, model = ma2Model, 
                        covRandWalk = ma2$cov, method = "BSL", verbose = 0L)    
    saveRDS(resultMa2BSL, file = file.path(folder, "resultMa2BSL.rds"))

    resultMa2uBSL <- bsl(y = ma2$data, n = 500, M = 100000, 
                         model = ma2Model, covRandWalk = ma2$cov, method = "uBSL",
                         verbose = 0L)
    saveRDS(resultMa2uBSL, file = file.path(folder, "resultMa2uBSL.rds"))

    resultMa2SemiBSL <- bsl(y = ma2$data, n = 500, M = 100000, 
                            model = ma2Model, covRandWalk = ma2$cov, method = "semiBSL",
                            verbose = 0L)
    saveRDS(resultMa2SemiBSL, file = file.path(folder, "resultMa2SemiBSL.rds"))

    resultMa2rBSLM <- bsl(y = ma2$data, n = 500, M = 100000, 
                          model = ma2Model, covRandWalk = ma2$cov, method = "BSLmisspec", 
                          misspecType = "mean", tau = 0.5, verbose = 0L)
    slot(resultMa2rBSLM, "gamma") <- matrix(NA_real_, 1, 1)
    saveRDS(resultMa2rBSLM, file = file.path(folder, "resultMa2rBSLM.rds"))
    
    resultMa2rBSLV <- bsl(y = ma2$data, n = 500, M = 100000, 
                          model = ma2Model, covRandWalk = ma2$cov, method = "BSLmisspec", 
                          misspecType = "variance", tau = 0.5, verbose = 0L)
    slot(resultMa2rBSLV, "gamma") <- matrix(NA_real_, 1, 1)
    saveRDS(resultMa2rBSLV, file = file.path(folder, "resultMa2rBSLV.rds"))
}

###################################################
### Figure 1
###################################################
d <- 50
entry_diag <- 1 + 0.6^2 + 0.2^2
entry_subdiag <- 0.6 + 0.6*0.2
entry_subsubdiag <- 0.2
cov_ma2 <- diag(rep(entry_diag, d))
for (i in 1:(d-1)) {
  cov_ma2[i, i+1] <- cov_ma2[i+1, i] <- entry_subdiag
}

for (i in 1:(d-2)) {
  cov_ma2[i, i+2] <- cov_ma2[i+2, i] <- entry_subsubdiag
}

inv_cov_ma2 <- solve(cov_ma2)
cor_ma2 <- cov2cor(cov_ma2)
inv_cor_ma2 <- solve(cor_ma2)

gg1 <- ggcorrplot(cor_ma2) + 
  ggtitle("correlation matrix") + 
  theme(plot.title = element_text(hjust = 0.5))
gg2 <- ggcorrplot(inv_cor_ma2) + 
  ggtitle("inverse correlation matrix") + 
  theme(plot.title = element_text(hjust = 0.5))
grid.arrange(gg1, gg2, ncol = 2)

# Shrinkage of the likelihood estimator
if (run) {
    set.seed(1)
    resultMa2BSLasso <- bsl(y = ma2$data, n = 300, M = 100000, 
                            model = ma2Model, covRandWalk = ma2$cov, method = "BSL", 
                            shrinkage = "glasso", penalty = 0.027, verbose = 0L)
    saveRDS(resultMa2BSLasso, file = file.path(folder, "resultMa2BSLasso.rds"))

    resultMa2BSLWarton <- bsl(y = ma2$data, n = 300, M = 100000, 
                              model = ma2Model, covRandWalk = ma2$cov, method = "BSL", 
                              shrinkage = "Warton", penalty = 0.75, verbose = 0L)
    saveRDS(resultMa2BSLWarton, file = file.path(folder, "resultMa2BSLWarton.rds"))
    
    W <- estimateWhiteningMatrix(20000, ma2Model, method = "PCA", 
                                 thetaPoint = c(0.6, 0.2))
    
    resultMa2BSLWhitening <- bsl(y = ma2$data, n = 300, M = 100000, 
                                 model = ma2Model, covRandWalk = ma2$cov, method = "BSL", 
                                 shrinkage = "Warton", penalty = 0.6, whitening = W,
                                 verbose = 0L)
    saveRDS(resultMa2BSLWhitening, file = file.path(folder, "resultMa2BSLWhitening.rds"))
}

# Parallel computation
if (run) {
    ncores <- detectCores()
    cl <- makeCluster(ncores - 1)
    registerDoParallel(cl)
    registerDoRNG(1)

    resultMa2BSLParallel <- bsl(y = ma2$data, n = 500, M = 100000, 
                                model = ma2Model, covRandWalk = ma2$cov, method = "BSL", 
                                parallel = TRUE, verbose = 0L)
    saveRDS(resultMa2BSLParallel, file = file.path(folder, "resultMa2BSLParallel.rds"))
    
    stopCluster(cl)
    registerDoSEQ()
}

###################################################
### 3.4. Interpret and visualize the BSL result
###################################################
# Load the prepared results for the MA(2) example to save the computation time.
# The following results are all replicable with the example code above.
resultMa2BSL          <- readRDS(file.path(folder, "resultMa2BSL.rds"))
resultMa2uBSL         <- readRDS(file.path(folder, "resultMa2uBSL.rds"))
resultMa2SemiBSL      <- readRDS(file.path(folder, "resultMa2SemiBSL.rds"))
resultMa2rBSLM        <- readRDS(file.path(folder, "resultMa2rBSLM.rds"))
resultMa2rBSLV        <- readRDS(file.path(folder, "resultMa2rBSLV.rds"))
resultMa2BSLasso      <- readRDS(file.path(folder, "resultMa2BSLasso.rds"))
resultMa2BSLWarton    <- readRDS(file.path(folder, "resultMa2BSLWarton.rds"))
resultMa2BSLWhitening <- readRDS(file.path(folder, "resultMa2BSLWhitening.rds"))

resultMa2BSL
summary(resultMa2BSL)
plot.ts(getLoglike(resultMa2BSL))

plot(resultMa2BSL, which = 1, thetaTrue = c(0.6, 0.2), thin = 30)
mtext("Approximate Univariate Posteriors", line = 1, cex = 1.5)

plot(resultMa2BSL, which = 2, thetaTrue = c(0.6, 0.2), thin = 30, 
  options.density = list(color = "coral4", fill = "coral", alpha = 0.5),
  options.theme = list(panel.background = element_rect(fill = "beige"), 
    plot.margin = grid::unit(rep(0.05, 4), "npc")))

ma2Results <- list(resultMa2BSL, resultMa2uBSL, resultMa2SemiBSL, 
  resultMa2rBSLM, resultMa2rBSLV, resultMa2BSLasso, 
  resultMa2BSLWarton, resultMa2BSLWhitening)
names(ma2Results) <- c("BSL", "uBSL", "semiBSL", "rBSLM", "rBSLV", 
  "BSLasso", "BSLWarton", "BSLWhitening")
t(sapply(ma2Results, summary))

par(mar = c(5, 4, 1, 2), oma = c(0, 1, 2, 0))
combinePlotsBSL(ma2Results, which = 1, thetaTrue = c(0.6, 0.2), 
  thin = 30, lty = 1:8, lwd = rep(2, 8), legendNcol = 4)
mtext("Approximate Univariate Posteriors", outer = TRUE, cex = 1.5)

combinePlotsBSL(ma2Results, which = 2, thetaTrue = c(0.6, 0.2), 
  thin = 30, options.linetype = list(values = 1:8), 
  options.size = list(values = rep(1, 8)),
  options.theme = list(plot.margin = grid::unit(rep(0.03, 4), "npc"),
    axis.title = ggplot2::element_text(size = 12), 
    axis.text = ggplot2::element_text(size = 8),
    legend.text = ggplot2::element_text(size = 12)))


###################################################
### 3.5. Selecting the penalty parameter for shrinkage
###################################################
ssy <- ma2_sum(ma2$data)
lambda_all <- list(exp(seq(-3, 0.5, length.out = 20)), 
  exp(seq(-4, -0.5, length.out = 20)), 
  exp(seq(-5.5, -1.5, length.out = 20)), 
  exp(seq(-7, -2, length.out = 20)))
if (run) {
    set.seed(100)
    selectPenaltyMA2 <- selectPenalty(ssy = ssy, n = c(50, 150, 300, 500), 
                                      lambda_all, theta = c(0.6, 0.2), M = 100, sigma = 1.5, 
                                      model = ma2Model, method = "BSL", shrinkage = "glasso",
                                      verbose = 0L)
    saveRDS(selectPenaltyMA2, file = file.path(folder, "selectPenaltyMA2.rds"))
}
selectPenaltyMA2      <- readRDS(file.path(folder, "selectPenaltyMA2.rds"))

selectPenaltyMA2
plot(selectPenaltyMA2)

###################################################
### 4. Toad example
###################################################
### 4.2. Approximate the posterior with BSL
###################################################
set.seed(1)
data("toad", package = "BSL")
toadModel <- newModel(fnSim = toad_sim, fnSum = toad_sum, 
  theta0 = toad$theta0, fnLogPrior = toad_prior, 
  simArgs = toad$sim_args_real,
  thetaNames = expression(alpha, gamma, p[0]))

ncores <- detectCores()
cl <- makeCluster(ncores - 1)
registerDoParallel(cl)

sim <- simulation(toadModel, n = 1000, theta = toad$theta0, seed = 10, 
  parallel = TRUE)
par(mfrow = c(6, 8), mar = c(3, 1.5, 0.5, 0.5))
for (i in 1:48) plot(density(sim$ssx[, i]), main = "", xlab = "")

stopCluster(cl)
registerDoSEQ()

covWalk <- toad$cov
paraBound <- matrix(c(1, 2, 0, 100, 0, 0.9), 3, 2, byrow = TRUE)

if (run) {
    ncores <- detectCores()
    cl <- makeCluster(ncores - 1)
    registerDoParallel(cl)
    registerDoRNG(1)

    resultToadBSL <- bsl(toad$data_real, n = 500, M = 50000, 
                         model = toadModel, method = "BSL", covRandWalk = covWalk, 
                         logitTransformBound = paraBound, parallel = TRUE,
                         verbose = 0L)
    saveRDS(resultToadBSL, file = file.path(folder, "resultToadBSL.rds"))

    resultToaduBSL <- bsl(toad$data_real, n = 500, M = 50000, 
                          model = toadModel, method = "uBSL", covRandWalk = covWalk, 
                          logitTransformBound = paraBound, parallel = TRUE,
                          verbose = 0L)
    saveRDS(resultToaduBSL, file = file.path(folder, "resultToaduBSL.rds"))

    resultToadSemiBSL <- bsl(toad$data_real, n = 500, M = 50000, 
                             model = toadModel, method = "semiBSL", covRandWalk = covWalk, 
                             logitTransformBound = paraBound, parallel = TRUE,
                             verbose = 0L)
    saveRDS(resultToadSemiBSL, file = file.path(folder, "resultToadSemiBSL.rds"))

    resultToadrBSLM <- bsl(toad$data_real, n = 500, M = 50000, 
                           model = toadModel, method = "BSLmisspec", misspecType = "mean", 
                           tau = 0.5, covRandWalk = covWalk, logitTransformBound = paraBound, 
                           parallel = TRUE, verbose = 0L)
    slot(resultToadrBSLM, "gamma") <- matrix(NA_real_, 1, 1)    
    saveRDS(resultToadrBSLM, file = file.path(folder, "resultToadrBSLM.rds"))

    resultToadrBSLV <- bsl(toad$data_real, n = 500, M = 50000, 
                           model = toadModel, method = "BSLmisspec", misspecType = "variance", 
                           tau = 0.5,  covRandWalk = covWalk, logitTransformBound = paraBound, 
                           parallel = TRUE, verbose = 0L)
    slot(resultToadrBSLV, "gamma") <- matrix(NA_real_, 1, 1)        
    saveRDS(resultToadrBSLV, file = file.path(folder, "resultToadrBSLV.rds"))

    resultToadBSLWhitening <- bsl(toad$data_real, n = 500, M = 50000, 
                                  model = toadModel, method = "BSL", shrinkage = "Warton", 
                                  penalty = 0.12, whitening = TRUE,  covRandWalk = covWalk, 
                                  logitTransformBound = paraBound, parallel = TRUE,
                                  verbose = 0L)
    saveRDS(resultToadBSLWhitening, file = file.path(folder, "resultToadBSLWhitening.rds"))

    stopCluster(cl)
    registerDoSEQ()
}

# Load the prepared results for the toad example to save the computation time.
# The following results are all replicable with the example code above.
resultToadBSL          <- readRDS(file.path(folder, "resultToadBSL.rds"))
resultToaduBSL         <- readRDS(file.path(folder, "resultToaduBSL.rds"))
resultToadSemiBSL      <- readRDS(file.path(folder, "resultToadSemiBSL.rds"))
resultToadrBSLM        <- readRDS(file.path(folder, "resultToadrBSLM.rds"))
resultToadrBSLV        <- readRDS(file.path(folder, "resultToadrBSLV.rds"))
resultToadBSLWhitening <- readRDS(file.path(folder, "resultToadBSLWhitening.rds"))

toadResults <- list(resultToadBSL, resultToaduBSL, resultToadSemiBSL, 
  resultToadBSLWhitening, resultToadrBSLM, resultToadrBSLV)
names(toadResults) <- c("BSL", "uBSL", "semiBSL", "BSLWhitening", 
  "rBSLM", "rBSLV")
t(sapply(toadResults, summary))

combinePlotsBSL(toadResults, which = 1, thin = 30, burnin = 5000)


