# devtools::install_github("quentingronau/bridgesampling@master") # install.packages("bridgesampling") # install.packages("BayesFactor") # install.packages("rjags") # install.packages("R2jags") # install.packages("rstan") # install.packages("sm") library("bridgesampling") library("BayesFactor") library("R2jags") library("rstan") library("sm") #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #------------------------------------------------------------------------------- #@@@@@@@@@@@@@@@@@ Toy example: Bayesian t-test @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #------------------------------------------------------------------------------- #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ data("sleep", package = "datasets") y <- sleep$extra[sleep$group == 1] x <- sleep$extra[sleep$group == 2] # compute difference scores d <- x - y n <- length(d) #------------------------------------------------------------------------------- # plot data #------------------------------------------------------------------------------- # by Henrik Singmann customized violinplot function (singmann.org) # the original violinplot function stems from the 'vioplot' package # Copyright (c) 2004, Daniel Adler. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the # distribution. # # * Neither the name of the University of Goettingen nor the names of # its contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # 'AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # OF THE POSSIBILITY OF SUCH DAMAGE. vioplot_singmann <- function(x, ..., range = 1.5, h = NULL, ylim = NULL, names = NULL, horizontal = FALSE, col = NULL, border = "black", lty = 1, lwd = 1, rectCol = "black", colMed = "white", pchMed = 19, at, add = FALSE, wex = 1, mark.outlier = TRUE, pch.mean = 4, ids = NULL, drawRect = TRUE, yaxt = "s", drawBox = FALSE) { # process multiple datas datas <- list(x, ...) n <- length(datas) if (missing(at)) at <- 1:n # pass 1 - calculate base range - estimate density setup parameters for # density estimation upper <- vector(mode = "numeric", length = n) lower <- vector(mode = "numeric", length = n) q1 <- vector(mode = "numeric", length = n) q3 <- vector(mode = "numeric", length = n) med <- vector(mode = "numeric", length = n) base <- vector(mode = "list", length = n) height <- vector(mode = "list", length = n) outliers <- vector(mode = "list", length = n) baserange <- c(Inf, -Inf) # global args for sm.density function-call args <- list(display = "none") if (!(is.null(h))) args <- c(args, h = h) for (i in 1:n) { data <- datas[[i]] if (!is.null(ids)) names(data) <- ids if (is.null(names(data))) names(data) <- as.character(1:(length(data))) # calculate plot parameters 1- and 3-quantile, median, IQR, upper- and # lower-adjacent data.min <- min(data) data.max <- max(data) q1[i] <- quantile(data, 0.25) q3[i] <- quantile(data, 0.75) med[i] <- median(data) iqd <- q3[i] - q1[i] upper[i] <- min(q3[i] + range * iqd, data.max) lower[i] <- max(q1[i] - range * iqd, data.min) # strategy: xmin = min(lower, data.min)) ymax = max(upper, data.max)) est.xlim <- c(min(lower[i], data.min), max(upper[i], data.max)) # estimate density curve smout <- do.call("sm.density", c(list(data, xlim = est.xlim), args)) # calculate stretch factor the plots density heights is defined in range 0.0 # ... 0.5 we scale maximum estimated point to 0.4 per data hscale <- 0.4/max(smout$estimate) * wex # add density curve x,y pair to lists base[[i]] <- smout$eval.points height[[i]] <- smout$estimate * hscale t <- range(base[[i]]) baserange[1] <- min(baserange[1], t[1]) baserange[2] <- max(baserange[2], t[2]) min.d <- boxplot.stats(data)[["stats"]][1] max.d <- boxplot.stats(data)[["stats"]][5] height[[i]] <- height[[i]][(base[[i]] > min.d) & (base[[i]] < max.d)] height[[i]] <- c(height[[i]][1], height[[i]], height[[i]][length(height[[i]])]) base[[i]] <- base[[i]][(base[[i]] > min.d) & (base[[i]] < max.d)] base[[i]] <- c(min.d, base[[i]], max.d) outliers[[i]] <- list(data[(data < min.d) | (data > max.d)], names(data[(data < min.d) | (data > max.d)])) # calculate min,max base ranges } # pass 2 - plot graphics setup parameters for plot if (!add) { xlim <- if (n == 1) at + c(-0.5, 0.5) else range(at) + min(diff(at))/2 * c(-1, 1) if (is.null(ylim)) { ylim <- baserange } } if (is.null(names)) { label <- 1:n } else { label <- names } boxwidth <- 0.05 * wex # setup plot if (!add) plot.new() if (!horizontal) { if (!add) { plot.window(xlim = xlim, ylim = ylim) axis(2) axis(1, at = at, label = label) } for (i in 1:n) { # plot left/right density curve polygon(c(at[i] - height[[i]], rev(at[i] + height[[i]])), c(base[[i]], rev(base[[i]])), col = col, border = border, lty = lty, lwd = lwd) if (drawRect) { # plot IQR boxplot(datas[[i]], at = at[i], add = TRUE, yaxt = yaxt, pars = list(boxwex = 0.6 * wex, outpch = if (mark.outlier) "" else 1)) if ((length(outliers[[i]][[1]]) > 0) & mark.outlier) text(rep(at[i], length(outliers[[i]][[1]])), outliers[[i]][[1]], labels = outliers[[i]][[2]]) } points(at[i], mean(datas[[i]]), pch = pch.mean, cex = 1.3) } } else { if (!add) { plot.window(xlim = ylim, ylim = xlim) axis(1) axis(2, at = at, label = label) } if (drawBox) box() for (i in 1:n) { # plot left/right density curve polygon(c(base[[i]], rev(base[[i]])), c(at[i] - height[[i]], rev(at[i] + height[[i]])), col = col, border = border, lty = lty, lwd = lwd) if (drawRect) { # plot IQR boxplot(datas[[i]], yaxt = yaxt, at = at[i], add = TRUE, pars = list(boxwex = 0.8 * wex, outpch = if (mark.outlier) "" else 1)) if ((length(outliers[[i]][[1]]) > 0) & mark.outlier) text(rep(at[i], length(outliers[[i]][[1]])), outliers[[i]][[1]], labels = outliers[[i]][[2]]) } points(at[i], mean(datas[[i]]), pch = pch.mean, cex = 1.3) } } invisible(list(upper = upper, lower = lower, median = med, q1 = q1, q3 = q3)) } set.seed(1) yticks <- pretty(sleep$extra) ylim <- range(yticks) cex.lab <- 1.8 cairo_ps("sleep.eps", width = 500/72, height = 420/72) op <- par(bty = "n", mar = c(5, 3, 3, 0)) plot(1, type = "n", axes = FALSE, ylab = "", xlab = "", ylim = ylim, xlim = c(.3, 2.7), bty = "n") axis(1, at = c(1, 2), cex.axis = 1.4) axis(2, las = 1, cex.axis = 1.4, line = -2) vioplot_singmann(y, add = TRUE, mark.outlier = TRUE, at = 1, wex = 1, col = "grey", border = "black", rectCol = "black", colMed = "grey", yaxt = "n") vioplot_singmann(x, add = TRUE, mark.outlier = TRUE, at = 2, wex = 1, col = "white", border = "black", rectCol = "black", colMed = "grey", yaxt = "n") one_x <- rep(1, length(y)) two_x <- rep(2, length(x)) for (i in seq_along(one_x)) { lines(c(one_x[i], two_x[i]), c(y[i], x[i]), lwd = 1.2) } points(one_x, y, pch = 21, bg = "white") points(two_x, x, pch = 21, bg = "grey") mtext("Increase in Sleep (Hours)", side = 2, cex = cex.lab, line = 1) mtext("Drug", side = 1, cex = cex.lab, line = 2) par(op) dev.off() #------------------------------------------------------------------------------- # JAGS models #------------------------------------------------------------------------------- code_H1 <- "model { delta ~ dt(0, 1 / r ^ 2, 1) # prior inv_sigma2 ~ dgamma(0.0001, 0.0001) # prior sigma <- 1 / sqrt(inv_sigma2) # convert precision to sigma for (i in 1:n) { d[i] ~ dnorm(sigma * delta, inv_sigma2) # likelihood } }" code_H0 <- "model { inv_sigma2 ~ dgamma(0.0001, 0.0001) # prior for (i in 1:n) { d[i] ~ dnorm(0, inv_sigma2) # likelihood } }" #------------------------------------------------------------------------------- # fit models #------------------------------------------------------------------------------- set.seed(1) jags_H1 <- jags(data = list(d = d, n = n, r = 1 / sqrt(2)), parameters.to.save = c("delta", "inv_sigma2"), model.file = textConnection(code_H1), n.chains = 3, n.iter = 16000, n.burnin = 1000, n.thin = 1) jags_H0 <- jags(data = list(d = d, n = n), parameters.to.save = "inv_sigma2", model.file = textConnection(code_H0), n.chains = 3, n.iter = 16000, n.burnin = 1000, n.thin = 1) #------------------------------------------------------------------------------- # specify unnormalized log posterior functions #------------------------------------------------------------------------------- log_posterior_H1 <- function(pars, data) { delta <- pars["delta"] # extract parameter inv_sigma2 <- pars["inv_sigma2"] # extract parameter sigma <- 1 / sqrt(inv_sigma2) # convert precision to sigma out <- dcauchy(delta, scale = data$r, log = TRUE) + # prior dgamma(inv_sigma2, 0.0001, 0.0001, log = TRUE) + # prior sum(dnorm(data$d, sigma * delta, sigma, log = TRUE)) # likelihood return(out) } log_posterior_H0 <- function(pars, data) { inv_sigma2 <- pars["inv_sigma2"] # extract parameter sigma <- 1 / sqrt(inv_sigma2) # convert precision to sigma out <- dgamma(inv_sigma2, 0.0001, 0.0001, log = TRUE) + # prior sum(dnorm(data$d, 0, sigma, log = TRUE)) # likelihood return(out) } #------------------------------------------------------------------------------- # specify lower and upper bounds for the parameters #------------------------------------------------------------------------------- lb_H1 <- rep(-Inf, 2) ub_H1 <- rep(Inf, 2) names(lb_H1) <- names(ub_H1) <- c("delta", "inv_sigma2") lb_H1[["inv_sigma2"]] <- 0 lb_H0 <- 0 ub_H0 <- Inf names(lb_H0) <- names(ub_H0) <- "inv_sigma2" #------------------------------------------------------------------------------- # compute log marginal likelihoods #------------------------------------------------------------------------------- set.seed(12345) bridge_H1 <- bridge_sampler(samples = jags_H1, log_posterior = log_posterior_H1, data = list(d = d, n = n, r = 1 / sqrt(2)), lb = lb_H1, ub = ub_H1) bridge_H0 <- bridge_sampler(samples = jags_H0, log_posterior = log_posterior_H0, data = list(d = d, n = n), lb = lb_H0, ub = ub_H0) #------------------------------------------------------------------------------- # print results #------------------------------------------------------------------------------- bridge_H1 bridge_H0 #------------------------------------------------------------------------------- # compute approximate percentage errors of estimates #------------------------------------------------------------------------------- error_measures(bridge_H1)$percentage error_measures(bridge_H0)$percentage #------------------------------------------------------------------------------- # summary #------------------------------------------------------------------------------- summary(bridge_H1) summary(bridge_H0) #------------------------------------------------------------------------------- # compute Bayes factor #------------------------------------------------------------------------------- bf(bridge_H1, bridge_H0) #------------------------------------------------------------------------------- # compare to BayesFactor result #------------------------------------------------------------------------------- extractBF(ttestBF(x = x, y = y, paired = TRUE), onlybf = TRUE, log = FALSE) #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #------------------------------------------------------------------------------- #@@@@@@@@@@@@@@@@@ Stan example 1: Bayesian GLMM @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #------------------------------------------------------------------------------- #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # Note that results are dependent on the compiler and the optimization settings. # Thus, even with identical seeds results can differ slightly from the results # reported in the manuscript. data("turtles", package = "bridgesampling") #------------------------------------------------------------------------------- # plot data #------------------------------------------------------------------------------- # reproduce Figure 1 from Sinharay & Stern (2005) xticks <- pretty(turtles$clutch) yticks <- pretty(turtles$x) cairo_ps("turtles.eps", width = 520/72, height = 400/72) plot(1, type = "n", axes = FALSE, ylab = "", xlab = "", xlim = range(xticks), ylim = range(yticks)) points(turtles$clutch, turtles$x, pch = ifelse(turtles$y, 21, 4), cex = 1.3, col = ifelse(turtles$y, "black", "darkred"), bg = "grey", lwd = 1.3) axis(1, cex.axis = 1.4) mtext("Clutch Identifier", side = 1, line = 2.9, cex = 1.8) axis(2, las = 1, cex.axis = 1.4) mtext("Birth Weight (Grams)", side = 2, line = 2.6, cex = 1.8) dev.off() #------------------------------------------------------------------------------- # Analysis: Natural Selection Study (compute same BF as Sinharay & Stern, 2005) #------------------------------------------------------------------------------- # H0 (model without random intercepts) ## H0_code <- "data { int N; int y[N]; real x[N]; } parameters { real alpha0_raw; real alpha1_raw; } transformed parameters { real alpha0 = sqrt(10.0) * alpha0_raw; real alpha1 = sqrt(10.0) * alpha1_raw; } model { target += normal_lpdf(alpha0_raw | 0, 1); // prior target += normal_lpdf(alpha1_raw | 0, 1); // prior for (i in 1:N) { // likelihood target += bernoulli_lpmf(y[i] | Phi(alpha0 + alpha1 * x[i])); } }" # H1 (model with random intercepts) ## H1_code <- "data { int N; int y[N]; real x[N]; int C; int clutch[N]; } parameters { real alpha0_raw; real alpha1_raw; vector[C] b_raw; real sigma2; } transformed parameters { vector[C] b; real sigma = sqrt(sigma2); real alpha0 = sqrt(10.0) * alpha0_raw; real alpha1 = sqrt(10.0) * alpha1_raw; b = sigma * b_raw; } model { target += - 2 * log(1 + sigma2); // p(sigma2) = 1 / (1 + sigma2) ^ 2 target += normal_lpdf(alpha0_raw | 0, 1); // prior target += normal_lpdf(alpha1_raw | 0, 1); // prior target += normal_lpdf(b_raw | 0, 1); // random effects for (i in 1:N) { // likelihood target += bernoulli_lpmf(y[i] | Phi(alpha0 + alpha1 * x[i] + b[clutch[i]])); } }" # fit models ## set.seed(1) stanfit_H0 <- stan(model_code = H0_code, data = list(y = turtles$y, x = turtles$x, N = nrow(turtles)), iter = 15500, warmup = 500, chains = 4, seed = 1) stanfit_H1 <- stan(model_code = H1_code, data = list(y = turtles$y, x = turtles$x, N = nrow(turtles), C = max(turtles$clutch), clutch = turtles$clutch), iter = 15500, warmup = 500, chains = 4, seed = 1) # compute (log) marginal likelihoods ## set.seed(1) bridge_H0 <- bridge_sampler(stanfit_H0) bridge_H1 <- bridge_sampler(stanfit_H1) # compute approximate percentage errors ## error_measures(bridge_H0)$percentage error_measures(bridge_H1)$percentage # compute Bayes factor ("true" value: BF01 = 1.273) ## bf(bridge_H0, bridge_H1) #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #------------------------------------------------------------------------------- #@@@@@@@@@@@@@@ Stan example 2: Bayesian factor analysis @@@@@@@@@@@@@@@@@@@@@@@ #------------------------------------------------------------------------------- #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # Note that results are dependent on the compiler and the optimization settings. # Thus, even with identical seeds results can differ slightly from the results # reported in the manuscript. data("ier", package = "bridgesampling") # enable parallel fitting in rstan cores <- 4 options(mc.cores = cores) #------------------------------------------------------------------------------- # plot data #------------------------------------------------------------------------------- currency <- colnames(ier) label <- c("US Dollar", "Canadian Dollar", "Yen", "Franc", "Lira", "Mark") cairo_ps("ier.eps", width = 2*320/72, height = 3*260/72) op <- par(mfrow = c(3, 2), mar = c(6, 6, 3, 3)) for (i in seq_along(currency)) { plot(ier[,currency[i]], type = "l", col = "darkblue", axes = FALSE, ylim = c(-4, 4), ylab = "", xlab = "", lwd = 2) axis(1, at = 0:12*12, labels = 1975:1987, cex.axis = 1.7) axis(2, at = pretty(c(-4, 4)), las = 1, cex.axis = 1.7) mtext("Year", 1, cex = 1.5, line = 3.2) mtext("Exchange Rate Changes", 2, cex = 1.4, line = 3.2) mtext(label[i], 3, cex = 1.6, line = .1) } par(op) dev.off() #------------------------------------------------------------------------------- # stan model #------------------------------------------------------------------------------- model_code <- "data { int T; // number of observations int m; // number of variables int k; // number of factors matrix[T,m] Y; // data matrix } transformed data { int r; vector[m] zeros; r = m * k - k * (k - 1) / 2; // number of non-zero factor loadings zeros = rep_vector(0.0, m); } parameters { real beta_lower[r - k]; // lower-diagonal elements of beta real beta_diag [k]; // diagonal elements of beta vector[m] sigma2; // residual variances } transformed parameters { matrix[m,k] beta; cov_matrix[m] Omega; { int index_lower = 1; for (j in 1:k) { // construct lower-triangular factor loadings matrix for (i in 1:m) { if (i == j) { beta[j,j] = beta_diag[j]; } else if (i >= j) { beta[i,j] = beta_lower[index_lower]; index_lower = index_lower + 1; } else { beta[i,j] = 0.0; } } } } Omega = beta * beta' + diag_matrix(sigma2); } model { target += normal_lpdf(beta_diag | 0, 1) - k * normal_lccdf(0 | 0, 1); target += normal_lpdf(beta_lower | 0, 1); // prior target += inv_gamma_lpdf(sigma2 | 2.2 / 2.0, 0.1 / 2.0); // prior for(t in 1:T) { target += multi_normal_lpdf(Y[t] | zeros, Omega); // likelihood } }" # compile model model <- stan_model(model_code = model_code) #------------------------------------------------------------------------------- # fit models and compute log marginal likelihoods #------------------------------------------------------------------------------- # function for generating starting values init_fun <- function(nchains, k, m) { r <- m * k - k * (k - 1) / 2 out <- vector("list", nchains) for (i in seq_len(nchains)) { beta_lower <- array(runif(r - k, 0.05, 1), dim = r - k) beta_diag <- array(runif(k, .05, 1), dim = k) sigma2 <- array(runif(m, .05, 1.5), dim = m) out[[i]] <- list(beta_lower = beta_lower, beta_diag = beta_diag, sigma2 = sigma2) } return(out) } set.seed(1) stanfit <- bridge <- vector("list", 3) for (k in 1:3) { stanfit[[k]] <- sampling(model, data = list(Y = ier, T = nrow(ier), m = ncol(ier), k = k), iter = 11000, warmup = 1000, chains = 4, init = init_fun(nchains = 4, k = k, m = ncol(ier)), cores = cores, seed = 1) bridge[[k]] <- bridge_sampler(stanfit[[k]], method = "warp3", repetitions = 10, cores = cores) } # example output summary(bridge[[2]]) #------------------------------------------------------------------------------- # compute posterior model probabilities #------------------------------------------------------------------------------- post_prob(bridge[[1]], bridge[[2]], bridge[[3]], model_names = c("k = 1", "k = 2", "k = 3"))