## Section 3 ## library("MfUSampler") my.seed <- 0 nsmp <- 1000 # Section 3.1 # z <- c(1, 4, 7, 10, 13, 16, 19, 24) m1.prior <- c(17, 26, 39, 27, 35, 37, 26, 23) m2.prior <- c(215, 218, 137, 62, 36, 16, 13, 15) m1.current <- c(46, 52, 44, 54, 38, 39, 23, 52) m2.current <- c(290, 211, 134, 91, 53, 42, 23, 32) X <- cbind(1, z, z^2) # Section 3.2 # loglike <- function(beta, X, m1, m2) { beta <- as.numeric(beta) Xbeta <- X %*% beta return(-sum((m1 + m2) * log(1 + exp(-Xbeta)) + m2 * Xbeta)) } logprior <- function(beta, beta0 , W) { return(-0.5 * t(beta - beta0) %*% solve(W, beta - beta0)) } logpost <- function(beta, X, m1, m2, beta0 = rep(0,0, 3), W = diag(1e+6, nrow = 3)) { return(logprior(beta, beta0, W) + loglike(beta, X, m1, m2)) } beta0.prior <- c(-3.17, 0.33, -0.007) W.prior <- 1e-4 * matrix(c(638, -111, 3.9, -111, 24.1, -0.9, 3.9, -0.9, 0.04), ncol = 3) set.seed(my.seed) beta.ini <- c(0.0, 0.0, 0.0) beta.smp <- MfU.Sample.Run(beta.ini, logpost, nsmp = nsmp, X = X, m1 = m1.current, m2 = m2.current, beta0 = beta0.prior, W = W.prior) summ.slice <- summary(beta.smp) save(summ.slice, file = "summ.slice") summ.slice summ.slice$covar pdf(file = "figure2.pdf") plot(beta.smp) dev.off() # Section 3.3 # logpost.fg <- function(beta, X, m1, m2, beta0 = rep(0.0, 3), W = diag(1e+3, nrow = 3), grad = FALSE) { Xbeta <- X %*% beta if (grad) { log.prior.d <- -solve(W, beta - beta0) log.like.d <- t(X) %*% ((m1 + m2) / (1 + exp(Xbeta)) - m2) return(log.prior.d + log.like.d) } log.prior <- -0.5 * t(beta - beta0) %*% solve(W, beta - beta0) log.like <- -sum((m1 + m2) * log(1 + exp(-Xbeta)) + m2 * Xbeta) log.post <- log.prior + log.like return(log.post) } # Please note that even after fixing the random seed, the results (also those not about the timings) are not exactly reproducible. # We suspect that the samples drawn are getting increasingly different due to slight differences in floating-point arithmetics # for different platforms which accumulate over the sampling. # Note that time-dependent results (e.g. effective sampling rate or speedups) cannot be replicated exactly for obvious reasons. set.seed(my.seed) beta.ini <- c(0.0, 0.0, 0.0) beta.smp <- MfU.Sample.Run(beta.ini, logpost.fg, nsmp = nsmp, uni.sampler = "ars", control = MfU.Control(3, ars.x = list(c(-10, 0, 10), c(-1, 0, 1), c(-0.1, 0.0, 0.1))), X = X, m1 = m1.current, m2 = m2.current, beta0 = beta0.prior, W = W.prior) summ.ars <- summary(beta.smp) save(summ.ars, file = "summ.ars") summ.ars # Section 3.4 # predfunc.mean <- function(beta, X) { return(1/(1 + exp(-X %*% beta))) } pred.mean <- predict(beta.smp, predfunc.mean, X) predmean.summ <- summary(pred.mean) print(predmean.summ, n = 8) predfunc.binary <- function(beta, X) { return(as.numeric(runif(nrow(X)) < 1/(1 + exp(-X %*% beta)))) } pred.binary <- predict(beta.smp, predfunc.binary, X) predbinary.summ <- summary(pred.binary) print(predbinary.summ, n = 8) ## Section 4 ## # Section 4.1 # library("mvtnorm") set.seed(my.seed) nrep <- 50 m.current <- m1.current + m2.current nz.exp <- nrep * sum(m.current) jitter <- 1.0 z.exp <- sample(z, size = nz.exp, replace = TRUE, prob = m.current) + (2 * runif(nz.exp) - 1) * jitter X.exp <- cbind(1, z.exp, z.exp^2) ngrp <- 5 beta.mat <- t(rmvnorm(ngrp, mean = beta0.prior, sigma = W.prior)) y.mat.exp <- matrix(as.numeric(runif(ngrp * nz.exp) < 1 / (1 + exp(-X.exp %*% beta.mat))), ncol = ngrp) # Section 4.2 # hb.logprior <- function(beta.flat, beta0, W) { beta.mat <- matrix(beta.flat, nrow = 3) return(sum(apply(beta.mat, 2, logprior, beta0, W))) } hb.loglike <- function(beta.flat, X, y) { beta.mat <- matrix(beta.flat, nrow = 3) ngrp <- ncol(beta.mat) return(sum(sapply(1:ngrp, function(n) { xbeta <- X %*% beta.mat[, n] return(-sum((1-y[, n]) * xbeta + log(1 + exp(-xbeta)))) }))) } hb.logpost <- function(beta.flat, X, y, beta0, W) { return(hb.logprior(beta.flat, beta0, W) + hb.loglike(beta.flat, X, y)) } nsmp <- 10 set.seed(my.seed) beta.flat.ini <- rep(0.0, 3 * ngrp) beta.flat.smp <- MfU.Sample.Run(beta.flat.ini, hb.logpost, X = X.exp, y = y.mat.exp, beta0 = beta0.prior, W = W.prior, nsmp = nsmp) t.naive <- attr(beta.flat.smp, "t") save(t.naive, file = "t.naive") cat("hb sampling time - naive method:", t.naive, "sec\n") hb.loglike.grp <- function(beta, X, y) { beta <- as.numeric(beta) xbeta <- X %*% beta return(-sum((1-y) * xbeta + log(1 + exp(-xbeta)))) } hb.logprior.grp <- logprior hb.logpost.grp <- function(beta, X, y, beta0 = rep(0,0, 3), W = diag(1e+6, nrow = 3)) { return(hb.logprior.grp(beta, beta0, W) + hb.loglike.grp(beta, X, y)) } set.seed(my.seed) beta.mat.buff <- matrix(rep(0.0, 3 * ngrp), nrow = 3) beta.mat.smp <- array(NA, dim = c(nsmp, 3, ngrp)) t.revised <- proc.time()[3] for (i in 1:nsmp) { for (n in 1:ngrp) { beta.mat.buff[, n] <- MfU.Sample(beta.mat.buff[, n], hb.logpost.grp, uni.sampler = "slice", X = X.exp, y = y.mat.exp[, n], beta0 = beta0.prior, W = W.prior) } beta.mat.smp[i, , ] <- beta.mat.buff } t.revised <- proc.time()[3] - t.revised save(t.revised, file = "t.revised") cat("hb sampling time - revised method:", t.revised, "sec\n") cat("incremental speedup:", t.naive / t.revised, "\n") library("doParallel") ncores <- 2 registerDoParallel(ncores) set.seed(my.seed) beta.mat.buff <- matrix(rep(0.0, 3 * ngrp), nrow = 3) beta.mat.smp <- array(NA, dim = c(nsmp, 3, ngrp)) t.parallel <- proc.time()[3] for (i in 1:nsmp) { beta.mat.buff <- foreach(n = 1:ngrp, .combine = cbind, .options.multicore = list(preschedule = TRUE)) %dopar% { MfU.Sample(beta.mat.buff[, n], hb.logpost.grp, uni.sampler = "slice", X = X.exp, y = y.mat.exp[, n], beta0 = beta0.prior, W = W.prior) } beta.mat.smp[i, , ] <- beta.mat.buff } t.parallel <- proc.time()[3] - t.parallel save(t.parallel, file = "t.parallel") cat("hb sampling time - revised & parallel method:", t.parallel, "sec\n") cat("incremental speedup:", t.revised / t.parallel, "\n") # Section 4.3 # library("RcppArmadillo") library("inline") code <- " arma::vec beta_cpp = Rcpp::as(beta); arma::mat X_cpp = Rcpp::as(X); arma::vec y_cpp = Rcpp::as(y); arma::vec xbeta = X_cpp * beta_cpp; int n = X_cpp.n_rows; double logp = 0.0; for (int i=0; i