## Section 3 ##

library("MfUSampler")
my.seed <- 0
nsmp <- 1000

# Section 3.1 #
z <- c(1, 4, 7, 10, 13, 16, 19, 24)
m1.prior <- c(17, 26, 39, 27, 35, 37, 26, 23)
m2.prior <- c(215, 218, 137, 62, 36, 16, 13, 15)
m1.current <- c(46, 52, 44, 54, 38, 39, 23, 52)
m2.current <- c(290, 211, 134, 91, 53, 42, 23, 32)

X <- cbind(1, z, z^2)

# Section 3.2 #
loglike <- function(beta, X, m1, m2) {
  beta <- as.numeric(beta)
  Xbeta <- X %*% beta
  return(-sum((m1 + m2) * log(1 + exp(-Xbeta)) + m2 * Xbeta))
}
logprior <- function(beta, beta0 , W) {
  return(-0.5 * t(beta - beta0) %*% solve(W, beta - beta0))
}
logpost <- function(beta, X, m1, m2,
                    beta0 = rep(0,0, 3), W = diag(1e+6, nrow = 3)) {
  return(logprior(beta, beta0, W) + loglike(beta, X, m1, m2))
}

beta0.prior <- c(-3.17, 0.33, -0.007)
W.prior <- 1e-4 * matrix(c(638, -111, 3.9, -111, 24.1, -0.9, 3.9,
                           -0.9, 0.04), ncol = 3)

set.seed(my.seed)
beta.ini <- c(0.0, 0.0, 0.0)
beta.smp <- MfU.Sample.Run(beta.ini, logpost, nsmp = nsmp,
                           X = X, m1 = m1.current, m2 = m2.current,
                           beta0 = beta0.prior, W = W.prior)
summ.slice <- summary(beta.smp)
save(summ.slice, file = "summ.slice")
summ.slice
summ.slice$covar

pdf(file = "figure2.pdf")
plot(beta.smp)
dev.off()

# Section 3.3 #
logpost.fg <- function(beta, X, m1, m2,
                       beta0 = rep(0.0, 3), W = diag(1e+3, nrow = 3),
                       grad = FALSE) {
  Xbeta <- X %*% beta
  
  if (grad) {
    log.prior.d <- -solve(W, beta - beta0)
    log.like.d <- t(X) %*% ((m1 + m2) / (1 + exp(Xbeta)) - m2)
    return(log.prior.d + log.like.d)
  }
  
  log.prior <- -0.5 * t(beta - beta0) %*% solve(W, beta - beta0)
  log.like <- -sum((m1 + m2) * log(1 + exp(-Xbeta)) + m2 * Xbeta)
  log.post <- log.prior + log.like
  
  return(log.post)
}

# Please note that even after fixing the random seed, the results (also those not about the timings) are not exactly reproducible.
# We suspect that the samples drawn are getting increasingly different due to slight differences in floating-point arithmetics 
# for different platforms which accumulate over the sampling.
# Note that time-dependent results (e.g. effective sampling rate or speedups) cannot be replicated exactly for obvious reasons.
set.seed(my.seed)
beta.ini <- c(0.0, 0.0, 0.0)
beta.smp <- MfU.Sample.Run(beta.ini, logpost.fg, nsmp = nsmp,
                           uni.sampler = "ars",
                           control = MfU.Control(3, ars.x = list(c(-10, 0, 10),
                                                                 c(-1, 0, 1), c(-0.1, 0.0, 0.1))),
                           X = X, m1 = m1.current, m2 = m2.current,
                           beta0 = beta0.prior, W = W.prior)
summ.ars <- summary(beta.smp)
save(summ.ars, file = "summ.ars")
summ.ars

# Section 3.4 #
predfunc.mean <- function(beta, X) {
  return(1/(1 + exp(-X %*% beta)))
}

pred.mean <- predict(beta.smp, predfunc.mean, X)
predmean.summ <- summary(pred.mean)
print(predmean.summ, n = 8)

predfunc.binary <- function(beta, X) {
  return(as.numeric(runif(nrow(X)) < 1/(1 + exp(-X %*% beta))))
}
pred.binary <- predict(beta.smp, predfunc.binary, X)
predbinary.summ <- summary(pred.binary)
print(predbinary.summ, n = 8)

## Section 4 ##

# Section 4.1 #
library("mvtnorm")
set.seed(my.seed)
nrep <- 50
m.current <- m1.current + m2.current
nz.exp <- nrep * sum(m.current)
jitter <- 1.0
z.exp <- sample(z, size = nz.exp, replace = TRUE, prob = m.current) +
  (2 * runif(nz.exp) - 1) * jitter
X.exp <- cbind(1, z.exp, z.exp^2)
ngrp <- 5
beta.mat <- t(rmvnorm(ngrp, mean = beta0.prior, sigma = W.prior))
y.mat.exp <-  matrix(as.numeric(runif(ngrp * nz.exp) < 
                                1 / (1 + exp(-X.exp %*% beta.mat))),
                     ncol = ngrp)

# Section 4.2 #
hb.logprior <- function(beta.flat, beta0, W) {
  beta.mat <- matrix(beta.flat, nrow = 3)
  return(sum(apply(beta.mat, 2, logprior, beta0, W)))
}
hb.loglike <- function(beta.flat, X, y) {
  beta.mat <- matrix(beta.flat, nrow = 3)
  ngrp <- ncol(beta.mat)
  return(sum(sapply(1:ngrp, function(n) {
    xbeta <- X %*% beta.mat[, n]
    return(-sum((1-y[, n]) * xbeta + log(1 + exp(-xbeta))))
  })))
}
hb.logpost <- function(beta.flat, X, y, beta0, W) {
  return(hb.logprior(beta.flat, beta0, W) + 
         hb.loglike(beta.flat, X, y))
}

nsmp <- 10
set.seed(my.seed)
beta.flat.ini <- rep(0.0, 3 * ngrp)
beta.flat.smp <- MfU.Sample.Run(beta.flat.ini, hb.logpost,
                                X = X.exp, y = y.mat.exp,
                                beta0 = beta0.prior, W = W.prior, nsmp = nsmp)
t.naive <- attr(beta.flat.smp, "t")
save(t.naive, file = "t.naive")
cat("hb sampling time - naive method:", t.naive, "sec\n")

hb.loglike.grp <- function(beta, X, y) {
  beta <- as.numeric(beta)
  xbeta <- X %*% beta
  return(-sum((1-y) * xbeta + log(1 + exp(-xbeta))))
}
hb.logprior.grp <- logprior
hb.logpost.grp <- function(beta, X, y,
                           beta0 = rep(0,0, 3), W = diag(1e+6, nrow = 3)) {
  return(hb.logprior.grp(beta, beta0, W) +
            hb.loglike.grp(beta, X, y))
}

set.seed(my.seed)
beta.mat.buff <- matrix(rep(0.0, 3 * ngrp), nrow = 3)
beta.mat.smp <- array(NA, dim = c(nsmp, 3, ngrp))
t.revised <- proc.time()[3]
for (i in 1:nsmp) {
  for (n in 1:ngrp) {
    beta.mat.buff[, n] <- MfU.Sample(beta.mat.buff[, n], hb.logpost.grp,
                                     uni.sampler = "slice", X = X.exp,
                                     y = y.mat.exp[, n], beta0 = beta0.prior, W = W.prior)
  }
  beta.mat.smp[i, , ] <- beta.mat.buff
}
t.revised <- proc.time()[3] - t.revised
save(t.revised, file = "t.revised")
cat("hb sampling time - revised method:", t.revised, "sec\n")
cat("incremental speedup:", t.naive / t.revised, "\n")

library("doParallel")
ncores <- 2
registerDoParallel(ncores)

set.seed(my.seed)
beta.mat.buff <- matrix(rep(0.0, 3 * ngrp), nrow = 3)
beta.mat.smp <- array(NA, dim = c(nsmp, 3, ngrp))
t.parallel <- proc.time()[3]
for (i in 1:nsmp) {
    beta.mat.buff <-
        foreach(n = 1:ngrp, .combine = cbind,
                .options.multicore = list(preschedule = TRUE)) %dopar% {
                    MfU.Sample(beta.mat.buff[, n], hb.logpost.grp, uni.sampler = "slice",
                               X = X.exp, y = y.mat.exp[, n],
                               beta0 = beta0.prior, W = W.prior)
                }
    beta.mat.smp[i, , ] <- beta.mat.buff
}
t.parallel <- proc.time()[3] - t.parallel
save(t.parallel, file = "t.parallel")
cat("hb sampling time - revised & parallel method:", t.parallel, "sec\n")
cat("incremental speedup:", t.revised / t.parallel, "\n")

# Section 4.3 #
library("RcppArmadillo")
library("inline")
code <- "
arma::vec beta_cpp = Rcpp::as<arma::vec>(beta);
arma::mat X_cpp = Rcpp::as<arma::mat>(X);
arma::vec y_cpp = Rcpp::as<arma::vec>(y);
arma::vec xbeta = X_cpp * beta_cpp;
int n = X_cpp.n_rows;
double logp = 0.0;
for (int i=0; i<n; i++) {
  logp -= (1.0 - y_cpp[i]) * xbeta[i] + log(1.0 + exp(-xbeta[i]));
}
return Rcpp::wrap(logp);
"
hb.loglike.grp.rcpp <- cxxfunction(
  signature(beta = "numeric", X = "numeric", y = "numeric"),
  code, plugin = "RcppArmadillo")
hb.logpost.grp.rcpp <- function(beta, X, y,
                                 beta0 = rep(0, 0, 3), W = diag(1e+6, nrow = 3)) {
  return(hb.logprior.grp(beta, beta0, W) +
         hb.loglike.grp.rcpp(beta, X, y))
}

set.seed(my.seed)
beta.mat.buff <- matrix(rep(0.0, 3 * ngrp), nrow = 3)
beta.mat.smp <- array(NA, dim = c(nsmp, 3, ngrp))
t.rcpp <- proc.time()[3]
for (i in 1:nsmp) {
  beta.mat.buff <- foreach(n = 1:ngrp,
                           .combine = cbind, .options.multicore = list(preschedule = TRUE)) %dopar% {
                               MfU.Sample(beta.mat.buff[, n], hb.logpost.grp.rcpp,
                                          uni.sampler = "slice",
                                          X = X.exp, y = y.mat.exp[, n],
                                          beta0 = beta0.prior, W = W.prior)
                           }
  beta.mat.smp[i, , ] <- beta.mat.buff
}
t.rcpp <- proc.time()[3] - t.rcpp
save(t.rcpp, file = "t.rcpp")
cat("hb sampling time - revised & parallel & rcpp method:", t.rcpp, "sec\n")
cat("incremental speedup:", t.parallel / t.rcpp, "\n")

cat("combined speedup:", t.naive / t.rcpp, "\n")

pdf(file = "figure3.pdf")
yplot <- c(t.naive, t.revised, t.parallel, t.rcpp)
names(yplot) <- c("naive", " + separability", " + parallel", " + Rcpp")
xmids <- barplot(yplot, xlab = "optimization level", yaxt = "n", space = 0.5,
                 # log = "y",
                 ylab = "sampling time (sec)",
                 ylim = c(0, yplot[1] + 10))
text(x = xmids, y = yplot/2, labels = format(yplot, digits = 3))
text(x = xmids[2:4], y = yplot[2:4] + 8,
     labels = paste(format(c(t.naive / t.revised, t.revised / t.parallel, t.parallel / t.rcpp), digits = 2), "x", sep = ""))
dev.off()

## Appendix A
sessionInfo()