## NOTES ## - The timing and efficiency results vary by hardware, and also by JAGS and Stan versions. ## ## - The simulation-based calibration section (near the end) can take days to run. Also, ## at the time of writing, rstan does not allow the user to set a random seed for sbc(), ## so those results are not exactly reproducible. ## Seed set.seed(123) ## formatting for paper library("knitr") ## global knitr options opts_chunk$set(echo = FALSE, fig.path = '../Figures/sem-', fig.align = 'center', fig.show = 'hold', size = 'footnotesize', cache.path = "cache/", warning = FALSE, message = FALSE, highlight = FALSE, background = 'white', comment = ' ', prompt = TRUE) render_sweave() ## some options options(prompt = "R> ", continue = "+ ", digits = 3, useFancyQuotes = FALSE, show.signif.stars = FALSE) ## packages and functions library("blavaan") library("rstan") library("runjags") library("ggplot2") ## Function for simulating growth data growthdata <- function(sampsize){ ## Simulate data for a Univariate Latent Change Score model. ## (Code from Kievit et al 2018) MILCS_simulate<-' #### The following two lines specify the measurement model for multiple indicators (X1-X3) #### measured on two occasions (T1-T2) COG_T1=~.8*T1X1+.9*T1X2+.7*T1X3 # This specifies the measurement model for COG_T1 COG_T2=~.8*T2X1+.9*T2X2+.7*T2X3 # This specifies the measurement model for COG_T2 ##### The following lines specify the core assumptions of the LCS ##### and should not generally be modified COG_T2 ~ 1*COG_T1 # Fixed regression of COG_T2 on COG_T1 dCOG1 =~ 1*COG_T2 # Fixed regression of dCOG1 on COG_T2 COG_T2 ~ 0*1 # This line constrains the intercept of COG_T2 to 0 COG_T2 ~~ 0*COG_T2 # This fixes the variance of the COG_T2 to 0 T1X1~0*1 # This fixes the intercept of X1 to 0 T1X2~1*1 # This fixes the intercept of X2 to 1 T1X3~.5*1 # This fixes the intercept of X3 to 0.5 T2X1~0*1 # This fixes the intercept of X1 to 0 T2X2~1*1 # This fixes the intercept of X2 to 1 T2X3~.5*1 # This fixes the intercept of X3 to 0.5 ###### The following five parameters will be estimated in the model. ###### Values can be modified manually to examine the effect on the model dCOG1 ~ 10*1 # This fixes the intercept of the change score to 10 COG_T1 ~ 50*1 # This fixes the intercept of COG_T1 to 50. dCOG1 ~~ 5*dCOG1 # This fixes the variance of the change scores to 5. COG_T1 ~~ 8*COG_T1 # This fixes the variance of the COG_T1 to 8. dCOG1~-0.1*COG_T1 # This fixes the self-feedback parameter to -0.1. ' # Generate data set.seed(1234) simdatMILCS<-simulateData(MILCS_simulate, sample.nobs = sampsize, meanstructure = TRUE) simdatMILCS } ## compiled model for new Stan approach stanmarg <- blavaan:::stanmodels$stanmarg ## Example code for Political Democracy model ## model <- ' ## # measurement model ## ind60 =~ x1 + x2 + x3 ## dem60 =~ y1 + y2 + y3 + y4 ## dem65 =~ y5 + y6 + y7 + y8 ## # regressions ## dem60 ~ ind60 ## dem65 ~ ind60 + dem60 ## # residual correlations ## y1 ~~ y5 ## y2 ~~ y4 + y6 ## y3 ~~ y7 ## y4 ~~ y8 ## y6 ~~ y8 ## ' ## fit1 <- bsem(model, data = PoliticalDemocracy, target = "jags") ## fit2 <- bsem(model, data = PoliticalDemocracy, target = "stanclassic") ## fit3 <- bsem(model, data = PoliticalDemocracy, target = "stan") ## Political Democracy Models ## This is the first example in the paper, and it compares the methods' sampling efficiencies for the Bollen political ## democracy data if(file.exists("fit1jags2.rda") & file.exists("fit2stan2.rda") & file.exists("fit3lers2.rda")){ load("fit1jags2.rda") load("fit2stan2.rda") load("fit3lers2.rda") load("times_ex2.rda") } else { model <- ' # latent variable definitions ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 dem65 =~ y5 + y6 + y7 + y8 # regressions dem60 ~ ind60 dem65 ~ ind60 + dem60 # residual correlations y1 ~~ y5 y2 ~~ y4 + y6 y3 ~~ y7 y4 ~~ y8 y6 ~~ y8 ' ## JAGS method fit1 <- bsem(model, data = PoliticalDemocracy, target = "jags", dp = dpriors(nu = "dnorm(5,1e-2)", target = "jags"), bcontrol = list(method = "rjparallel"), do.fit = FALSE, mcmcfile = TRUE, seed = c(6862, 4557, 3036)) file.rename("lavExport/sem.jag", "lavExport/sem2.jag") file.rename("lavExport/semjags.rda", "lavExport/sem2jags.rda") ##model estimation load("lavExport/sem2jags.rda") sysj2 <- system.time(fit1jags2 <- run.jags("lavExport/sem2.jag", monitor = jagtrans$monitors, data = jagtrans$data, n.chains = 3, adapt = 0, burnin = 1000, sample = 1000, inits = jagtrans$inits)) save(fit1jags2, file = "fit1jags2.rda") ## Stan method fit2 <- bsem(model, data = PoliticalDemocracy, target = "stanclassic", dp = dpriors(nu = "normal(5,10)"), bcontrol = list(cores = 3), do.fit = FALSE, mcmcfile = TRUE) file.rename("lavExport/sem.stan", "lavExport/sem2.stan") file.rename("lavExport/semstan.rda", "lavExport/sem2stan.rda") load("lavExport/sem2stan.rda") stanm2 <- stan_model("lavExport/sem2.stan") syss2 <- system.time(fit2stan2 <- sampling(stanm2, data = stantrans$data, pars = stantrans$monitors, chains = 3, iter = 1200, warmup = 300, init = stantrans$inits, seed = 6862)) save(fit2stan2, file = "fit2stan2.rda") ## Benchmark fit3 <- bsem(model, data = PoliticalDemocracy, target = "stan", do.fit = FALSE, mcmcfile = TRUE, dp = dpriors(nu = "normal(5,10)")) file.rename("lavExport/semstan.rda", "lavExport/sem1stanmarg2.rda") load("lavExport/sem1stanmarg2.rda") ## starting values ini <- list(Lambda_y_free = rep(1, 8), Theta_sd_free = rep(1, 11)) ini <- list(c1 = ini, c2 = ini, c3 = ini) sysb2 <- system.time(fit3lers2 <- sampling(stanmarg, data = stantrans$data, pars = stantrans$monitors, iter = 1200, warmup = 300, chains = 3, init = ini, seed = 7974)) save(fit3lers2, file = "fit3lers2.rda") ## hard-coded for reproducibility: ##times_ex2 <- c(sysj2[3], syss2[3], sysb2[3]) times_ex2 <- c(7.943, 149.548, 20.471) save(times_ex2, file = "times_ex2.rda") } jag_samples <- fit1jags2$mcmc jag_array <- array(NA, dim = c(nrow(jag_samples[[1]]), length(jag_samples), ncol(jag_samples[[1]]))) for(i in 1:length(jag_samples)){ jag_array[,i,] <- jag_samples[[i]] } jag2res <- monitor(jag_array, print = FALSE) stan2res <- monitor(fit2stan2, print = FALSE) stan2res <- subset(stan2res, sd > 0) fitlers2summ <- summary(fit3lers2) fitlers2res <- monitor(fit3lers2, print = FALSE) ## isolating effective sample size jn_eff2 <- jag2res[,'n_eff'] names(jn_eff2) <- colnames(jag_samples[[1]]) ## omitting all the na's, keeping only the draws we need jn_eff2 <- jn_eff2[!is.na(jn_eff2)] jn_eff2 <- jn_eff2[c(1:20, 55:62, 29:42)] ## isolating effective sample size sn_eff2 <- stan2res$n_eff names(sn_eff2) <- rownames(stan2res) ## removing lp__ and keeping only what we need rows2 <- which(names(sn_eff2) == "lp__") sn_eff2 <- sn_eff2[-rows2] sn_eff2 <- sn_eff2[1:42] ## isolating effective sample size ln_eff2 <- fitlers2res$n_eff names(ln_eff2) <- rownames(fitlers2res) ## removing lp__ effective sample size rows3 <- which(grepl("lp__", names(ln_eff2)) | grepl("log_lik", names(ln_eff2))) ln_eff2 <- ln_eff2[-rows3] ## setting up effective sample size per second res2 <- round(c(jn_eff2/times_ex2[1], sn_eff2/times_ex2[2], ln_eff2/times_ex2[3]), 1) ## dataframe with effective sample size per second for jags and stan res2 <- data.frame(npersec = res2, param = factor(rep(1:(length(res2)/3), 3)), method = rep(c("JAGS", "Old Stan", "New Stan"), each = length(res2)/3)) res2$method <- as.factor(res2$method) ## Figure 1: Sampling efficiency of the MCMC procedures for the Bollen example ## colorblind-friendly palette cbbPalette <- c("#0072B2", "#E69F00", "#000000", "#009E73", "#F0E442", "#56B4E9", "#D55E00", "#CC79A7") ggplot(res2, aes(x = param, y = npersec, group = method)) + geom_line(aes(linetype = method, color = method)) + geom_point(aes(shape = method, color = method)) + scale_color_manual(values = cbbPalette) + labs(x = "Parameter Number", y = "ESS/s") + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 45, size = rel(0.75))) ## This is the second example in the paper and involves a confirmatory factor analysis of the Holzinger and Swineford 1939 data if(file.exists("fit1jags1.rda") & file.exists("fit2stan1.rda") & file.exists("fit3lers1.rda")){ load("fit1jags1.rda") load("fit2stan1.rda") load("fit3lers1.rda") load("times_ex1.rda") } else { HS.model <- ' visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 ' ## JAGS method fit1 <- bcfa(HS.model, data = HolzingerSwineford1939, target = "jags", bcontrol = list(method = "rjparallel"), do.fit = FALSE, mcmcfile = TRUE, seed = c(6863, 4558, 3037)) file.rename("lavExport/sem.jag", "lavExport/sem1.jag") file.rename("lavExport/semjags.rda", "lavExport/sem1jags.rda") load("lavExport/sem1jags.rda") sysj <- system.time(fit1jags1 <- run.jags("lavExport/sem1.jag", monitor = jagtrans$monitors, data = jagtrans$data, n.chains = 3, adapt = 0, burnin = 1000, sample = 1000, inits = jagtrans$inits)) save(fit1jags1, file = "fit1jags1.rda") ## Stan method fit2 <- bcfa(HS.model, data = HolzingerSwineford1939, target = "stanclassic", do.fit = FALSE, mcmcfile = TRUE) file.rename("lavExport/sem.stan", "lavExport/sem1.stan") file.rename("lavExport/semstan.rda", "lavExport/sem1stan.rda") load("lavExport/sem1stan.rda") stanm1 <- stan_model("lavExport/sem1.stan") syss <- system.time(fit2stan1 <- sampling(stanm1, data = stantrans$data, pars = stantrans$monitors, chains = 3, init = stantrans$inits, iter = 1200, warmup = 300, seed = 6863)) save(fit2stan1, file = "fit2stan1.rda") ## Benchmark fit3 <- bcfa(HS.model, data = HolzingerSwineford1939, do.fit = FALSE, target = "stan", mcmcfile = TRUE) file.rename("lavExport/semstan.rda", "lavExport/sem1stanmarg.rda") load("lavExport/sem1stanmarg.rda") sysb <- system.time(fit3lers1 <- sampling(stanmarg, data = stantrans$data, pars = stantrans$monitors, iter = 1200, warmup = 300, init = stantrans$init, chains = 3, seed = 6863)) save(fit3lers1, file = "fit3lers1.rda") ## hard-coded for reproducibility: ## times_ex1 <- c(sysj[3], syss[3], sysb[3]) times_ex1 <- c(13.318, 112.437, 34.757) save(times_ex1, file = "times_ex1.rda") } jag_samples <- fit1jags1$mcmc jag_array <- array(NA, dim = c(nrow(jag_samples[[1]]), length(jag_samples), ncol(jag_samples[[1]]))) for(i in 1:length(jag_samples)){ jag_array[,i,] <- jag_samples[[i]] } jag1res <- monitor(jag_array, print = FALSE) ## stan summary stan1res <- monitor(fit2stan1, print = FALSE) stan1res <- subset(stan1res, sd > 0) ## lersil summary fitlerssumm <- summary(fit3lers1) fitlersres <- monitor(fit3lers1, print = FALSE) ## isolating effective sample size jn_eff <- jag1res[,'n_eff'] names(jn_eff) <- colnames(jag_samples[[1]]) ## omitting all the na's jn_eff <- na.omit(jn_eff) ## setting up dataframe stan1resdf <- as.data.frame(stan1res) ## isolating effective sample size sn_eff <- stan1resdf['n_eff'] ## removing effective sample sizes not found in the jag or benchmark set sn_eff <- subset(sn_eff, !(rownames(sn_eff) %in% c("lvrho[1,2,1]", "lvrho[1,3,1]", "lvrho[2,3,1]", "lp__"))) ## rearrange parameters to match jags order sn_eff <- sn_eff[match(names(jn_eff), rownames(sn_eff)),] ## isolating effective sample size & rearrange fitlersresdf <- as.data.frame(fitlersres) ln_eff <- fitlersresdf['n_eff'] ln_eff <- subset(ln_eff, !(grepl("lp__", rownames(ln_eff)) | grepl("log_lik", rownames(ln_eff)))) ln_eff <- ln_eff[c(1:15,19:21,16:18,22:30),] ## setting up effective sample size per second res <- round(c(jn_eff/times_ex1[1], sn_eff/times_ex1[2], ln_eff/times_ex1[3]), 1) ## dataframe with effective sample size per second for jags and stan res <- data.frame(npersec = res, param = factor(rep(1:(length(res)/3), 3)), method = rep(c("JAGS", "Old Stan", "New Stan"), each = length(res)/3)) res$method <- as.factor(res$method) ## This is displayed code in paper for example 2 ## HS.model <- ' visual =~ x1 + x2 + x3 ## textual =~ x4 + x5 + x6 ## speed =~ x7 + x8 + x9 ' ## fit <- bcfa(HS.model, data = HolzingerSwineford1939) ## Figure 2: Sampling efficiency of the MCMC procedures for the Holzinger & Swineford example ## colorblind-friendly palette cbbPalette <- c("#0072B2", "#E69F00", "#000000", "#009E73", "#F0E442", "#56B4E9", "#D55E00", "#CC79A7") ggplot(res, aes(x = param, y = npersec, group = method)) + geom_line(aes(linetype = method, color = method)) + geom_point(aes(shape = method, color = method)) + scale_color_manual(values = cbbPalette) + labs(x = "Parameter Number", y = "ESS/s") + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 45, size = rel(0.75))) ## This is the third example in the paper and uses a multiple indicator univariate latent change score model (Kievit et al, 2018) set.seed(100) simdatMILCS <- growthdata(500) if(file.exists("fit1jags3.rda") & file.exists("fit2stan3.rda") & file.exists("fit3lers3.rda")){ load("fit1jags3.rda") load("fit2stan3.rda") load("fit3lers3.rda") load("times_ex3.rda") } else { # Multiple indicator Univariate Latent Change Score model # (Kievit et al, 2018) MILCS<-' COG_T1 =~ 1*T1X1 + T1X2 + T1X3 # This specifies the measurement model for COG_T1 COG_T2 =~ 1*T2X1 + equal("COG_T1 =~ T1X2")*T2X2 + equal("COG_T1 =~ T1X3")*T2X3 # This specifies the measurement model for COG_T2 with the equality constrained factor loadings COG_T2 ~ 1*COG_T1 # Fixed regression of COG_T2 on COG_T1 dCOG1 =~ 1*COG_T2 # Fixed regression of dCOG1 on COG_T2 COG_T2 ~ 0*1 # This line constrains the intercept of COG_T2 to 0 COG_T2 ~~ 0*COG_T2 # This fixes the variance of the COG_T2 to 0 dCOG1 ~ 1 # This estimates the intercept of the change score COG_T1 ~ 1 # This estimates the intercept of COG_T1 dCOG1 ~~ dCOG1 # This estimates the variance of the change scores COG_T1 ~~ COG_T1 # This estimates the variance of the COG_T1 dCOG1 ~ COG_T1 # This estimates the self-feedback parameter T1X1 ~~ T2X1 # This allows residual covariance on indicator X1 across T1 and T2 T1X2 ~~ T2X2 # This allows residual covariance on indicator X2 across T1 and T2 T1X3 ~~ T2X3 # This allows residual covariance on indicator X3 across T1 and T2 T1X1 ~~ T1X1 # This allows residual variance on indicator X1 T1X2 ~~ T1X2 # This allows residual variance on indicator X2 T1X3 ~~ T1X3 # This allows residual variance on indicator X3 T2X1 ~~ equal("T1X1 ~~ T1X1")*T2X1 # This allows residual variance on indicator X1 at T2 T2X2 ~~ equal("T1X2 ~~ T1X2")*T2X2 # This allows residual variance on indicator X2 at T2 T2X3 ~~ equal("T1X3 ~~ T1X3")*T2X3 # This allows residual variance on indicator X3 at T2 T1X1 ~ 0*1 # This constrains the intercept of X1 to 0 at T1 T1X2 ~ 1 # This estimates the intercept of X2 at T1 T1X3 ~ 1 # This estimates the intercept of X3 at T1 T2X1 ~ 0*1 # This constrains the intercept of X1 to 0 at T2 T2X2 ~ equal("T1X2 ~ 1")*1 # This estimates the intercept of X2 at T2 T2X3 ~ equal("T1X3 ~ 1")*1 # This estimates the intercept of X3 at T2 ' ## JAGS method fit1 <- blavaan(MILCS, data = simdatMILCS, target = "jags", fixed.x = FALSE, do.fit = FALSE, mcmcfile = TRUE, seed = c(6863, 4558, 3037)) file.rename("lavExport/sem.jag", "lavExport/sem3.jag") file.rename("lavExport/semjags.rda", "lavExport/sem3jags.rda") load("lavExport/sem3jags.rda") sysj3 <- system.time(fit1jags3 <- run.jags("lavExport/sem3.jag", n.chains = 3, adapt = 200, burnin = 800, sample = 2000, monitor = jagtrans$monitors, data = jagtrans$data, inits = jagtrans$inits, thin = 20)) save(fit1jags3, file = "fit1jags3.rda") ## Stan method fit2 <- blavaan(MILCS, data = simdatMILCS, fixed.x = FALSE, target = 'stanclassic', do.fit = FALSE, mcmcfile = TRUE) file.rename("lavExport/sem.stan", "lavExport/sem3.stan") file.rename("lavExport/semstan.rda", "lavExport/sem3stan.rda") load("lavExport/sem3stan.rda") stanm3 <- stan_model("lavExport/sem3.stan") syss3 <- system.time(fit2stan3 <- try(sampling(stanm3, data = stantrans$data, pars = stantrans$monitors, chains = 3, init = stantrans$inits, iter = 3000, warmup = 300, seed = 4558))) save(fit2stan3, file = "fit2stan3.rda") ## Benchmark fit3 <- blavaan(MILCS, data = simdatMILCS, fixed.x = FALSE, do.fit = FALSE, target = "stan", mcmcfile = TRUE) file.rename("lavExport/semstan.rda", "lavExport/sem1stanmarg3.rda") load("lavExport/sem1stanmarg3.rda") ## starting values ini <- list(Lambda_y_free = rep(1,2), Theta_sd_free = rep(1,3)) ini <- list(c1 = ini, c2 = ini, c3 = ini) sysb3 <- system.time(fit3lers3 <- sampling(stanmarg, data = stantrans$data, pars=stantrans$monitors, iter = 3000, warmup = 300, chains = 3, init = ini, seed = 4558)) save(fit3lers3, file = "fit3lers3.rda") ## hard-coded for reproducibility: ##times_ex3 <- c(sysj3[3], syss3[3], sysb3[3]) times_ex3 <- c(1078.296, 15384.675, 678.74) save(times_ex3, file = "times_ex3.rda") } jag_samples <- fit1jags3$mcmc jag_array <- array(NA, dim = c(nrow(jag_samples[[1]]), length(jag_samples), ncol(jag_samples[[1]]))) for(i in 1:length(jag_samples)){ jag_array[,i,] <- jag_samples[[i]] } jag3res <- monitor(jag_array, print = FALSE) stan3res <- monitor(fit2stan3, print = FALSE) stan3res <- subset(stan3res, sd > 0) fitlers3res <- monitor(fit3lers3, print = FALSE) ## isolating effective sample size jn_eff3 <- jag3res[,'n_eff'] names(jn_eff3) <- colnames(jag_samples[[1]]) ## omitting all the na's, removing equality constrained parameters, reordering jn_eff3 <- na.omit(jn_eff3) jn_eff3 <- jn_eff3[c('lambda[2,1,1]', 'lambda[3,1,1]', 'beta[3,1,1]', 'theta[1,1,1]', 'theta[2,2,1]', 'theta[3,3,1]', 'theta[1,4,1]', 'theta[2,5,1]', 'theta[3,6,1]', 'psi[1,1,1]', 'psi[3,3,1]', 'nu[2,1,1]', 'nu[3,1,1]', 'alpha[1,1,1]', 'alpha[3,1,1]')] ## setting up dataframe stan3resdf <- as.data.frame(stan3res) ## isolating effective sample size sn_eff3 <- stan3resdf['n_eff'] ## reorder sn_eff3 <- sn_eff3[c('lambda[2,1,1]', 'lambda[3,1,1]', 'beta[3,1,1]', 'theta[1,1,1]', 'theta[2,2,1]', 'theta[3,3,1]', 'theta[1,4,1]', 'theta[2,5,1]', 'theta[3,6,1]', 'psi[1,1,1]', 'psi[3,3,1]', 'nu[2,1,1]', 'nu[3,1,1]', 'alpha[1,1,1]', 'alpha[3,1,1]'),] ## setting up dataframe l3resdf <- as.data.frame(fitlers3res) ## isolating effective sample size ln_eff3 <- l3resdf['n_eff'] ## reorder ln_eff3 <- ln_eff3[c('ly_sign[1]', 'ly_sign[2]', 'bet_sign[1]', 'Theta_var[1]', 'Theta_var[2]', 'Theta_var[3]', 'Theta_cov[1]', 'Theta_cov[2]', 'Theta_cov[3]', 'Psi_var[1]', 'Psi_var[2]', 'Nu_free[1]', 'Nu_free[2]', 'Alpha_free[1]', 'Alpha_free[2]'),] ## setting up effective sample size per second res3 <- round(c(jn_eff3/times_ex3[1], sn_eff3/times_ex3[2], ln_eff3/times_ex3[3]), 1) ## dataframe with effective sample size per second for jags and stan res3 <- data.frame(npersec = res3, param = factor(rep(1:(length(res3)/3), 3)), method = rep(c("JAGS", "Old Stan", "New Stan"), each = length(res3)/3)) res3$method <- as.factor(res3$method) ## This is displayed code in the paper for example 3 ## MILCS <- ' ## COG_T1 =~ 1*T1X1 + T1X2 + T1X3 ## COG_T2 =~ 1*T2X1 + equal("COG_T1 =~ T1X2")*T2X2 + equal("COG_T1 =~ T1X3")*T2X3 ## ## COG_T2 ~ 1*COG_T1 ## dCOG1 =~ 1*COG_T2 ## COG_T2 ~ 0*1 ## COG_T2 ~~ 0*COG_T2 ## ## dCOG1 ~ 1 ## COG_T1 ~ 1 ## dCOG1 ~~ dCOG1 ## COG_T1 ~~ COG_T1 ## dCOG1 ~ COG_T1 ## ## T1X1 ~~ T2X1 ## T1X2 ~~ T2X2 ## T1X3 ~~ T2X3 ## ## T1X1 ~~ T1X1 ## T1X2 ~~ T1X2 ## T1X3 ~~ T1X3 ## ## T2X1 ~~ equal("T1X1 ~~ T1X1")*T2X1 ## T2X2 ~~ equal("T1X2 ~~ T1X2")*T2X2 ## T2X3 ~~ equal("T1X3 ~~ T1X3")*T2X3 ## ## T1X1 ~ 0*1 ## T1X2 ~ 1 ## T1X3 ~ 1 ## T2X1 ~ 0*1 ## T2X2 ~ equal("T1X2 ~ 1")*1 ## T2X3 ~ equal("T1X3 ~ 1")*1 ## ' ## fit <- blavaan(MILCS, data = simdatMILCS, fixed.x = FALSE) ## Figure 3: Sampling efficiency of the MCMC procedures for the growth model example ## colorblind-friendly palette cbbPalette <- c("#0072B2", "#E69F00", "#000000", "#009E73", "#F0E442", "#56B4E9", "#D55E00", "#CC79A7") ggplot(res3, aes(x = param, y = npersec, group = method)) + geom_line(aes(linetype = method, color = method)) + geom_point(aes(shape = method, color = method)) + scale_color_manual(values = cbbPalette) + labs(x = "Parameter Number", y = "ESS/s") + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 45, size = rel(0.75))) ## Simulation-based calibration. This takes days to run, and rstan::sbc() does not allow for ## a random seed. We do not see a way to make this exactly reproducible. ## compile sbc file if(file.exists("sbc.rda")){ load("sbc.rda") } else { sem.sbc <- stan_model(stanc_ret = stanc(file = "sbc.stan")) save(sem.sbc, file = "sbc.rda") } ## get data model <- ' # measurement model ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 dem65 =~ y5 + y6 + y7 + y8 # regressions dem60 ~ ind60 dem65 ~ ind60 + dem60 # residual correlations y1 ~~ y5 y2 ~~ y4 + y6 y3 ~~ y7 y4 ~~ y8 y6 ~~ y8 ' ## data formatted for Stan set.seed(101) tmp <- bsem(model, data = PoliticalDemocracy, do.fit = FALSE, mcmcfile = TRUE) load("lavExport/semstan.rda") sbc.dat <- stantrans$data sbc.dat$YX <- sbc.dat$S <- NULL # Not necessary sbc.dat$has_data <- 1 # takes time; 0 is faster if(file.exists("sbcfitni.rda")){ load("sbcfitni.rda") } else { ini <- stantrans$inits[[1]] sbcfitni <- sbc(sem.sbc, data = sbc.dat, M = 5e2, iter = 500, warmup = 200, init = list(c1 = ini)) # apparently unable to set seed save(sbcfitni, file="sbcfitni.rda") } if(file.exists("sbcfit.rda")){ load("sbcfit.rda") } else { ini <- stantrans$inits[[1]] ## tighter priors to avoid errors of npd covariance matrices during data generation sbc.dat$b_mn <- rep(1.5, sbc.dat$len_b) sbc.dat$b_sd <- rep(.25, sbc.dat$len_b) sbc.dat$lambda_y_mn <- rep(1.25, sbc.dat$len_lam_y) sbc.dat$lambda_y_sd <- rep(.25, sbc.dat$len_lam_y) sbc.dat$psi_sd_shape <- rep(10, sbc.dat$len_psi_sd) sbc.dat$psi_sd_rate <- rep(10, sbc.dat$len_psi_sd) sbc.dat$theta_r_alpha <- rep(5, sbc.dat$len_thet_r) sbc.dat$theta_r_beta <- rep(5, sbc.dat$len_thet_r) sbc.dat$theta_sd_shape <- rep(10, sbc.dat$len_thet_sd) sbc.dat$theta_sd_rate <- rep(10, sbc.dat$len_thet_sd) sbcfit <- sbc(sem.sbc, data = sbc.dat, M = 5e2, iter = 500, warmup = 200, init = list(c1 = ini)) # apparently unable to set seed save(sbcfit, file = "sbcfit.rda") } ## Figure 4: sbc results for default priors df <- as.data.frame(t(as.data.frame(lapply(sbcfitni$ranks, colMeans)))) rownames(df) <- NULL names(df) <- gsub("_free", "", names(df)) rstan::plot(sbcfitni, thin = 3, binwidth=5) + facet_wrap(~ rep(names(df))) + theme_classic() + scale_x_continuous(breaks = NULL)+ theme(strip.text.x = element_text(size = 8))+ xlab("") ## alternative ## df.l <- cbind.data.frame(param = rep(names(df), each=nrow(df)), dist = as.numeric(as.matrix(df))) ## ggplot(df.l, aes(dist)) + facet_wrap(~ param) + geom_freqpoly(binwidth=.05) + theme_classic() + scale_x_continuous(breaks = NULL) + theme(strip.text.x = element_text(size = 8)) + xlab("") ## Figure 5: sbc results for informative priors df <- as.data.frame(t(as.data.frame(lapply(sbcfit$ranks, colMeans)))) rownames(df) <- NULL names(df) <- gsub("_free", "", names(df)) rstan::plot(sbcfit, thin = 3, binwidth=5) + facet_wrap(~ rep(names(df))) + theme_classic() + scale_x_continuous(breaks = NULL) + theme(strip.text.x = element_text(size = 8))+ xlab("") ## alternative ## df.l <- cbind.data.frame(param = rep(names(df), each=nrow(df)), dist = as.numeric(as.matrix(df))) ## ggplot(df.l, aes(dist)) + facet_wrap(~ param) + geom_freqpoly(binwidth=.05) + theme_classic() + scale_x_continuous(breaks = NULL) + theme(strip.text.x = element_text(size = 8)) + xlab("")