## NOTES
## - The timing and efficiency results vary by hardware, and also by JAGS and Stan versions.
##
## - The simulation-based calibration section (near the end) can take days to run. Also,
##   at the time of writing, rstan does not allow the user to set a random seed for sbc(),
##   so those results are not exactly reproducible.

## Seed
set.seed(123)

## formatting for paper
library("knitr")
## global knitr options
opts_chunk$set(echo = FALSE, fig.path = '../Figures/sem-', fig.align = 'center',
               fig.show = 'hold', size = 'footnotesize', cache.path = "cache/",
               warning = FALSE, message = FALSE, highlight = FALSE,
               background = 'white', comment = '  ', prompt = TRUE)

render_sweave()
  
## some options
options(prompt = "R> ", continue = "+  ", digits = 3,
        useFancyQuotes = FALSE, show.signif.stars = FALSE)


## packages and functions
library("blavaan")
library("rstan")
library("runjags")
library("ggplot2")

## Function for simulating growth data

growthdata <- function(sampsize){
  ## Simulate data for a Univariate Latent Change Score model.
  ## (Code from Kievit et al 2018)
  MILCS_simulate<-'

####    The following two lines specify the measurement model for multiple indicators (X1-X3) 
####    measured on two occasions (T1-T2)
COG_T1=~.8*T1X1+.9*T1X2+.7*T1X3   # This specifies the measurement model for COG_T1 
COG_T2=~.8*T2X1+.9*T2X2+.7*T2X3   # This specifies the measurement model for COG_T2 


#####     The following lines specify the core assumptions of the LCS 
#####     and should not generally be modified

COG_T2 ~ 1*COG_T1           # Fixed regression of COG_T2 on COG_T1
dCOG1 =~ 1*COG_T2           # Fixed regression of dCOG1 on COG_T2
COG_T2 ~ 0*1                # This line constrains the intercept of COG_T2 to 0
COG_T2 ~~ 0*COG_T2          # This fixes the variance of the COG_T2 to 0  


T1X1~0*1                  # This fixes the intercept of X1 to 0
T1X2~1*1                  # This fixes the intercept of X2 to 1
T1X3~.5*1                 # This fixes the intercept of X3 to 0.5 
T2X1~0*1                  # This fixes the intercept of X1 to 0
T2X2~1*1                  # This fixes the intercept of X2 to 1
T2X3~.5*1                 # This fixes the intercept of X3 to 0.5


###### The following five parameters will be estimated in the model. 
###### Values can be modified manually to examine the effect on the model

dCOG1 ~ 10*1            # This fixes the intercept of the change score to 10 
COG_T1 ~ 50*1           # This fixes the intercept of COG_T1 to 50. 
dCOG1 ~~ 5*dCOG1        # This fixes the variance of the change scores to 5. 
COG_T1 ~~ 8*COG_T1      # This fixes the variance of the COG_T1 to 8. 
dCOG1~-0.1*COG_T1       # This fixes the self-feedback parameter to -0.1. 

'
  
  # Generate data
  set.seed(1234)
  simdatMILCS<-simulateData(MILCS_simulate, sample.nobs = sampsize, meanstructure = TRUE)
  
  simdatMILCS
}

## compiled model for new Stan approach
stanmarg <- blavaan:::stanmodels$stanmarg


## Example code for Political Democracy model
## model <- '
##   # measurement model
##     ind60 =~ x1 + x2 + x3
##     dem60 =~ y1 + y2 + y3 + y4
##     dem65 =~ y5 + y6 + y7 + y8
##   # regressions
##     dem60 ~ ind60
##     dem65 ~ ind60 + dem60
##   # residual correlations
##     y1 ~~ y5
##     y2 ~~ y4 + y6
##     y3 ~~ y7
##     y4 ~~ y8
##     y6 ~~ y8
## '
## fit1 <- bsem(model, data = PoliticalDemocracy, target = "jags")
## fit2 <- bsem(model, data = PoliticalDemocracy, target = "stanclassic")
## fit3 <- bsem(model, data = PoliticalDemocracy, target = "stan")


## Political Democracy Models
## This is the first example in the paper, and it compares the methods' sampling efficiencies for the Bollen political
## democracy data 
if(file.exists("fit1jags2.rda") & file.exists("fit2stan2.rda") &
   file.exists("fit3lers2.rda")){
  load("fit1jags2.rda")
  load("fit2stan2.rda")
  load("fit3lers2.rda")
  load("times_ex2.rda")
} else {
  model <- ' 
         # latent variable definitions
           ind60 =~ x1 + x2 + x3
           dem60 =~ y1 + y2 + y3 + y4
           dem65 =~ y5 + y6 + y7 + y8
     
         # regressions
           dem60 ~ ind60
           dem65 ~ ind60 + dem60

         # residual correlations
           y1 ~~ y5
           y2 ~~ y4 + y6
           y3 ~~ y7
           y4 ~~ y8
           y6 ~~ y8
           '
     
  ## JAGS method
  fit1 <- bsem(model, data = PoliticalDemocracy, target = "jags", dp = dpriors(nu = "dnorm(5,1e-2)", target = "jags"),
               bcontrol = list(method = "rjparallel"), do.fit = FALSE, mcmcfile = TRUE, seed = c(6862, 4557, 3036))

  file.rename("lavExport/sem.jag", "lavExport/sem2.jag")
  file.rename("lavExport/semjags.rda", "lavExport/sem2jags.rda")

  ##model estimation
  load("lavExport/sem2jags.rda")

  sysj2 <- system.time(fit1jags2 <- run.jags("lavExport/sem2.jag", 
                                    monitor = jagtrans$monitors, 
                                    data = jagtrans$data, n.chains = 3, 
                                    adapt = 0, burnin = 1000, sample = 1000, 
                                    inits = jagtrans$inits))
  
  save(fit1jags2, file = "fit1jags2.rda")

  ## Stan method
  fit2 <- bsem(model, data = PoliticalDemocracy, target = "stanclassic", 
               dp = dpriors(nu = "normal(5,10)"), bcontrol = list(cores = 3), 
               do.fit = FALSE, mcmcfile = TRUE)

  file.rename("lavExport/sem.stan", "lavExport/sem2.stan")
  file.rename("lavExport/semstan.rda", "lavExport/sem2stan.rda")

  load("lavExport/sem2stan.rda")

  stanm2 <- stan_model("lavExport/sem2.stan")

  syss2 <- system.time(fit2stan2 <- sampling(stanm2, data = stantrans$data, 
                                    pars = stantrans$monitors, chains = 3, 
                                    iter = 1200, warmup = 300, 
                                    init = stantrans$inits, seed = 6862))
  
  save(fit2stan2, file = "fit2stan2.rda")

  ## Benchmark
  fit3 <- bsem(model, data = PoliticalDemocracy, target = "stan", do.fit = FALSE, mcmcfile = TRUE, 
               dp = dpriors(nu = "normal(5,10)"))

  file.rename("lavExport/semstan.rda", "lavExport/sem1stanmarg2.rda")

  load("lavExport/sem1stanmarg2.rda")
  
  ## starting values
  ini <- list(Lambda_y_free = rep(1, 8), Theta_sd_free = rep(1, 11))
  ini <- list(c1 = ini, c2 = ini, c3 = ini)

  sysb2 <- system.time(fit3lers2 <- sampling(stanmarg, data = stantrans$data,
                       pars = stantrans$monitors, iter = 1200, warmup = 300,
                       chains = 3, init = ini, seed = 7974))

  save(fit3lers2, file = "fit3lers2.rda")

  ## hard-coded for reproducibility:
  ##times_ex2 <- c(sysj2[3], syss2[3], sysb2[3])
  times_ex2 <- c(7.943, 149.548, 20.471)
  save(times_ex2, file = "times_ex2.rda")
}

jag_samples <- fit1jags2$mcmc
jag_array <- array(NA, dim = c(nrow(jag_samples[[1]]), length(jag_samples), 
                             ncol(jag_samples[[1]])))
for(i in 1:length(jag_samples)){
  jag_array[,i,] <- jag_samples[[i]]
}
jag2res <- monitor(jag_array, print = FALSE)

stan2res <- monitor(fit2stan2, print = FALSE)
stan2res <- subset(stan2res, sd > 0)

fitlers2summ <- summary(fit3lers2)
fitlers2res <- monitor(fit3lers2, print = FALSE)

## isolating effective sample size
jn_eff2 <- jag2res[,'n_eff']
names(jn_eff2) <- colnames(jag_samples[[1]])

## omitting all the na's, keeping only the draws we need
jn_eff2 <- jn_eff2[!is.na(jn_eff2)]
jn_eff2 <- jn_eff2[c(1:20, 55:62, 29:42)]

## isolating effective sample size
sn_eff2 <- stan2res$n_eff
names(sn_eff2) <- rownames(stan2res)

## removing lp__ and keeping only what we need
rows2 <- which(names(sn_eff2) == "lp__")
sn_eff2 <- sn_eff2[-rows2]
sn_eff2 <- sn_eff2[1:42]

## isolating effective sample size
ln_eff2 <- fitlers2res$n_eff
names(ln_eff2) <- rownames(fitlers2res)

## removing lp__ effective sample size
rows3 <- which(grepl("lp__", names(ln_eff2)) | grepl("log_lik", names(ln_eff2)))
ln_eff2 <- ln_eff2[-rows3]

## setting up effective sample size per second
res2 <- round(c(jn_eff2/times_ex2[1], sn_eff2/times_ex2[2], ln_eff2/times_ex2[3]), 1)

## dataframe with effective sample size per second for jags and stan
res2 <- data.frame(npersec = res2, param = factor(rep(1:(length(res2)/3), 3)), 
                   method = rep(c("JAGS", "Old Stan", "New Stan"), 
                              each = length(res2)/3))

res2$method <- as.factor(res2$method)


## Figure 1: Sampling efficiency of the MCMC procedures for the Bollen example
## colorblind-friendly palette
cbbPalette <- c("#0072B2", "#E69F00", "#000000", "#009E73", "#F0E442", "#56B4E9", "#D55E00", "#CC79A7")
ggplot(res2, aes(x = param, y = npersec, group = method)) + 
  geom_line(aes(linetype = method, color = method)) + 
  geom_point(aes(shape = method, color = method)) + 
  scale_color_manual(values = cbbPalette) + 
  labs(x = "Parameter Number", y = "ESS/s") + 
  theme(legend.title = element_blank(), 
        axis.text.x = element_text(angle = 45, size = rel(0.75)))


## This is the second example in the paper and involves a confirmatory factor analysis of the Holzinger and Swineford 1939 data
if(file.exists("fit1jags1.rda") & file.exists("fit2stan1.rda") &
   file.exists("fit3lers1.rda")){
  load("fit1jags1.rda")
  load("fit2stan1.rda")
  load("fit3lers1.rda")
  load("times_ex1.rda")
} else {
  HS.model <- ' visual  =~ x1 + x2 + x3
                textual =~ x4 + x5 + x6
                speed   =~ x7 + x8 + x9 '

  ## JAGS method
  fit1 <- bcfa(HS.model, data = HolzingerSwineford1939,
               target = "jags",
               bcontrol = list(method = "rjparallel"),
               do.fit = FALSE, mcmcfile = TRUE, seed = c(6863, 4558, 3037))
  
  file.rename("lavExport/sem.jag", "lavExport/sem1.jag")
  file.rename("lavExport/semjags.rda", "lavExport/sem1jags.rda")
  
  load("lavExport/sem1jags.rda")

  sysj <- system.time(fit1jags1 <- run.jags("lavExport/sem1.jag", 
                                    monitor = jagtrans$monitors, 
                                    data = jagtrans$data, n.chains = 3, 
                                    adapt = 0, burnin = 1000, sample = 1000,
                                    inits = jagtrans$inits))

  save(fit1jags1, file = "fit1jags1.rda")

  ## Stan method
  fit2 <- bcfa(HS.model, data = HolzingerSwineford1939,
               target = "stanclassic", do.fit = FALSE, mcmcfile = TRUE)

  file.rename("lavExport/sem.stan", "lavExport/sem1.stan")
  file.rename("lavExport/semstan.rda", "lavExport/sem1stan.rda")

  load("lavExport/sem1stan.rda")
  
  stanm1 <- stan_model("lavExport/sem1.stan")

  syss <- system.time(fit2stan1 <- sampling(stanm1, data = stantrans$data, 
                                    pars = stantrans$monitors, chains = 3,
                                    init = stantrans$inits, iter = 1200, 
                                    warmup = 300, seed = 6863))
  save(fit2stan1, file = "fit2stan1.rda")

  ## Benchmark
  fit3 <- bcfa(HS.model, data = HolzingerSwineford1939, do.fit = FALSE,
               target = "stan", mcmcfile = TRUE)
  
  file.rename("lavExport/semstan.rda", "lavExport/sem1stanmarg.rda")

  load("lavExport/sem1stanmarg.rda")
  
  sysb <- system.time(fit3lers1 <- sampling(stanmarg, data = stantrans$data,
                                    pars = stantrans$monitors, iter = 1200, 
                                    warmup = 300, init = stantrans$init, chains = 3, seed = 6863))
  save(fit3lers1, file = "fit3lers1.rda")

  ## hard-coded for reproducibility:
  ## times_ex1 <- c(sysj[3], syss[3], sysb[3])
  times_ex1 <- c(13.318, 112.437, 34.757)

  save(times_ex1, file = "times_ex1.rda")
}
 
jag_samples <- fit1jags1$mcmc
jag_array <- array(NA, dim = c(nrow(jag_samples[[1]]), length(jag_samples), ncol(jag_samples[[1]])))
for(i in 1:length(jag_samples)){
  jag_array[,i,] <- jag_samples[[i]]
}
jag1res <- monitor(jag_array, print = FALSE) 

## stan summary
stan1res <- monitor(fit2stan1, print = FALSE)
stan1res <- subset(stan1res, sd > 0)

## lersil summary
fitlerssumm <- summary(fit3lers1)
fitlersres <- monitor(fit3lers1, print = FALSE)

## isolating effective sample size
jn_eff <- jag1res[,'n_eff']
names(jn_eff) <- colnames(jag_samples[[1]])

## omitting all the na's
jn_eff <- na.omit(jn_eff)

## setting up dataframe
stan1resdf <- as.data.frame(stan1res)

## isolating effective sample size
sn_eff <- stan1resdf['n_eff']

## removing effective sample sizes not found in the jag or benchmark set
sn_eff <- subset(sn_eff, !(rownames(sn_eff) %in% c("lvrho[1,2,1]", "lvrho[1,3,1]",
                                                   "lvrho[2,3,1]", "lp__")))

## rearrange parameters to match jags order
sn_eff <- sn_eff[match(names(jn_eff), rownames(sn_eff)),]

## isolating effective sample size & rearrange
fitlersresdf <- as.data.frame(fitlersres)
ln_eff <- fitlersresdf['n_eff']
ln_eff <- subset(ln_eff, !(grepl("lp__", rownames(ln_eff)) | grepl("log_lik", rownames(ln_eff))))
ln_eff <- ln_eff[c(1:15,19:21,16:18,22:30),]

## setting up effective sample size per second
res <- round(c(jn_eff/times_ex1[1], sn_eff/times_ex1[2], ln_eff/times_ex1[3]), 1)

## dataframe with effective sample size per second for jags and stan
res <- data.frame(npersec = res, param = factor(rep(1:(length(res)/3), 3)), 
                  method = rep(c("JAGS", "Old Stan", "New Stan"), 
                               each = length(res)/3))
res$method <- as.factor(res$method)


## This is displayed code in paper for example 2
## HS.model <- ' visual  =~ x1 + x2 + x3
##               textual =~ x4 + x5 + x6
##               speed   =~ x7 + x8 + x9 '
## fit <- bcfa(HS.model, data = HolzingerSwineford1939)


## Figure 2: Sampling efficiency of the MCMC procedures for the Holzinger & Swineford example
## colorblind-friendly palette
cbbPalette <- c("#0072B2", "#E69F00", "#000000", "#009E73", "#F0E442", "#56B4E9", "#D55E00", "#CC79A7")
ggplot(res, aes(x = param, y = npersec, group = method)) + 
  geom_line(aes(linetype = method, color = method)) + geom_point(aes(shape = method, color = method)) + 
  scale_color_manual(values = cbbPalette) + labs(x = "Parameter Number", y = "ESS/s") + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 45, size = rel(0.75)))


## This is the third example in the paper and uses a multiple indicator univariate latent change score model (Kievit et al, 2018)
set.seed(100)
simdatMILCS <- growthdata(500)

if(file.exists("fit1jags3.rda") & file.exists("fit2stan3.rda") &
   file.exists("fit3lers3.rda")){
  load("fit1jags3.rda")
  load("fit2stan3.rda")
  load("fit3lers3.rda")
  load("times_ex3.rda")
} else {
  # Multiple indicator Univariate Latent Change Score model 
  # (Kievit et al, 2018)
  MILCS<-'
    COG_T1 =~ 1*T1X1 + T1X2 + T1X3   # This specifies the measurement model for COG_T1 
    COG_T2 =~ 1*T2X1 + equal("COG_T1 =~ T1X2")*T2X2 + equal("COG_T1 =~ T1X3")*T2X3   # This specifies the measurement model for COG_T2 with the equality constrained factor loadings

    COG_T2 ~ 1*COG_T1     # Fixed regression of COG_T2 on COG_T1
    dCOG1 =~ 1*COG_T2     # Fixed regression of dCOG1 on COG_T2
    COG_T2 ~ 0*1          # This line constrains the intercept of COG_T2 to 0
    COG_T2 ~~ 0*COG_T2    # This fixes the variance of the COG_T2 to 0 

    dCOG1 ~ 1               # This estimates the intercept of the change score 
    COG_T1 ~ 1              # This estimates the intercept of COG_T1 
    dCOG1 ~~ dCOG1          # This estimates the variance of the change scores 
    COG_T1 ~~ COG_T1        # This estimates the variance of the COG_T1 
    dCOG1 ~ COG_T1          # This estimates the self-feedback parameter

    T1X1 ~~ T2X1   # This allows residual covariance on indicator X1 across T1 and   T2
    T1X2 ~~ T2X2   # This allows residual covariance on indicator X2 across T1 and   T2
    T1X3 ~~ T2X3   # This allows residual covariance on indicator X3 across T1 and   T2

    T1X1 ~~ T1X1   # This allows residual variance on indicator X1 
    T1X2 ~~ T1X2   # This allows residual variance on indicator X2
    T1X3 ~~ T1X3   # This allows residual variance on indicator X3

    T2X1 ~~ equal("T1X1 ~~ T1X1")*T2X1  # This allows residual variance on indicator   X1 at T2 
    T2X2 ~~ equal("T1X2 ~~ T1X2")*T2X2  # This allows residual variance on indicator   X2 at T2 
    T2X3 ~~ equal("T1X3 ~~ T1X3")*T2X3  # This allows residual variance on indicator   X3 at T2

    T1X1 ~ 0*1                   # This constrains the intercept of X1 to 0 at T1
    T1X2 ~ 1                     # This estimates the intercept of X2 at T1
    T1X3 ~ 1                     # This estimates the intercept of X3 at T1
    T2X1 ~ 0*1                   # This constrains the intercept of X1 to 0 at T2
    T2X2 ~ equal("T1X2 ~ 1")*1   # This estimates the intercept of X2 at T2
    T2X3 ~ equal("T1X3 ~ 1")*1   # This estimates the intercept of X3 at T2
  '

  ## JAGS method
  fit1 <- blavaan(MILCS, data = simdatMILCS, target = "jags", fixed.x = FALSE, do.fit = FALSE,
                  mcmcfile = TRUE, seed = c(6863, 4558, 3037))

  file.rename("lavExport/sem.jag", "lavExport/sem3.jag")
  file.rename("lavExport/semjags.rda", "lavExport/sem3jags.rda")

  load("lavExport/sem3jags.rda")

  sysj3 <- system.time(fit1jags3 <- run.jags("lavExport/sem3.jag", n.chains = 3, 
                                    adapt = 200, burnin = 800, sample = 2000, 
                                    monitor = jagtrans$monitors, 
                                    data = jagtrans$data, 
                                    inits = jagtrans$inits, thin = 20))
  save(fit1jags3, file = "fit1jags3.rda")

  ## Stan method
  fit2 <- blavaan(MILCS, data = simdatMILCS, fixed.x = FALSE, target = 'stanclassic',
                  do.fit = FALSE, mcmcfile = TRUE)

  file.rename("lavExport/sem.stan", "lavExport/sem3.stan")
  file.rename("lavExport/semstan.rda", "lavExport/sem3stan.rda")

  load("lavExport/sem3stan.rda")

  stanm3 <- stan_model("lavExport/sem3.stan")
  syss3 <- system.time(fit2stan3 <- try(sampling(stanm3, data = stantrans$data,
                                    pars = stantrans$monitors, chains = 3,
                                    init = stantrans$inits, iter = 3000,
                                    warmup = 300, seed = 4558)))
  save(fit2stan3, file = "fit2stan3.rda")

  ## Benchmark
  fit3 <- blavaan(MILCS, data = simdatMILCS, fixed.x = FALSE, do.fit = FALSE,
                  target = "stan", mcmcfile = TRUE)
  
  file.rename("lavExport/semstan.rda", "lavExport/sem1stanmarg3.rda")

  load("lavExport/sem1stanmarg3.rda")

  ## starting values
  ini <- list(Lambda_y_free = rep(1,2), Theta_sd_free = rep(1,3))
  ini <- list(c1 = ini, c2 = ini, c3 = ini)

  sysb3 <- system.time(fit3lers3 <- sampling(stanmarg, data = stantrans$data,
                                             pars=stantrans$monitors,
                                             iter = 3000, warmup = 300, chains = 3, init = ini,
                                             seed = 4558))

  save(fit3lers3, file = "fit3lers3.rda")

  ## hard-coded for reproducibility:
  ##times_ex3 <- c(sysj3[3], syss3[3], sysb3[3])
  times_ex3 <- c(1078.296, 15384.675, 678.74)
  save(times_ex3, file = "times_ex3.rda")
}

jag_samples <- fit1jags3$mcmc
jag_array <- array(NA, dim = c(nrow(jag_samples[[1]]), length(jag_samples), 
                               ncol(jag_samples[[1]])))
for(i in 1:length(jag_samples)){
  jag_array[,i,] <- jag_samples[[i]]
}
jag3res <- monitor(jag_array, print = FALSE)

stan3res <- monitor(fit2stan3, print = FALSE)
stan3res <- subset(stan3res, sd > 0)

fitlers3res <- monitor(fit3lers3, print = FALSE)

## isolating effective sample size
jn_eff3 <- jag3res[,'n_eff']
names(jn_eff3) <- colnames(jag_samples[[1]])

## omitting all the na's, removing equality constrained parameters, reordering
jn_eff3 <- na.omit(jn_eff3)

jn_eff3 <- jn_eff3[c('lambda[2,1,1]', 'lambda[3,1,1]', 'beta[3,1,1]',
                     'theta[1,1,1]', 'theta[2,2,1]', 'theta[3,3,1]',
                     'theta[1,4,1]', 'theta[2,5,1]', 'theta[3,6,1]',
                     'psi[1,1,1]', 'psi[3,3,1]', 'nu[2,1,1]', 'nu[3,1,1]',
                     'alpha[1,1,1]', 'alpha[3,1,1]')]

## setting up dataframe
stan3resdf <- as.data.frame(stan3res)

## isolating effective sample size
sn_eff3 <- stan3resdf['n_eff']

## reorder
sn_eff3 <- sn_eff3[c('lambda[2,1,1]', 'lambda[3,1,1]', 'beta[3,1,1]',
                     'theta[1,1,1]', 'theta[2,2,1]', 'theta[3,3,1]',
                     'theta[1,4,1]', 'theta[2,5,1]', 'theta[3,6,1]',
                     'psi[1,1,1]', 'psi[3,3,1]', 'nu[2,1,1]', 'nu[3,1,1]',
                     'alpha[1,1,1]', 'alpha[3,1,1]'),]


## setting up dataframe
l3resdf <- as.data.frame(fitlers3res)

## isolating effective sample size
ln_eff3 <- l3resdf['n_eff']

## reorder
ln_eff3 <- ln_eff3[c('ly_sign[1]', 'ly_sign[2]', 'bet_sign[1]', 'Theta_var[1]',
                     'Theta_var[2]', 'Theta_var[3]', 'Theta_cov[1]', 'Theta_cov[2]', 
                     'Theta_cov[3]', 'Psi_var[1]', 'Psi_var[2]', 'Nu_free[1]', 'Nu_free[2]', 
                     'Alpha_free[1]', 'Alpha_free[2]'),]

## setting up effective sample size per second
res3 <- round(c(jn_eff3/times_ex3[1], sn_eff3/times_ex3[2], ln_eff3/times_ex3[3]), 1)

## dataframe with effective sample size per second for jags and stan
res3 <- data.frame(npersec = res3, param = factor(rep(1:(length(res3)/3), 3)), 
                   method = rep(c("JAGS", "Old Stan", "New Stan"), 
                                each = length(res3)/3))
res3$method <- as.factor(res3$method)


## This is displayed code in the paper for example 3
## MILCS <- '
##   COG_T1 =~ 1*T1X1 + T1X2 + T1X3
##   COG_T2 =~ 1*T2X1 + equal("COG_T1 =~ T1X2")*T2X2 + equal("COG_T1 =~ T1X3")*T2X3
## 
##   COG_T2 ~ 1*COG_T1
##   dCOG1 =~ 1*COG_T2
##   COG_T2 ~ 0*1
##   COG_T2 ~~ 0*COG_T2
## 
##   dCOG1 ~ 1
##   COG_T1 ~ 1
##   dCOG1 ~~ dCOG1
##   COG_T1 ~~ COG_T1
##   dCOG1 ~ COG_T1
## 
##   T1X1 ~~ T2X1
##   T1X2 ~~ T2X2
##   T1X3 ~~ T2X3
## 
##   T1X1 ~~ T1X1
##   T1X2 ~~ T1X2
##   T1X3 ~~ T1X3
## 
##   T2X1 ~~ equal("T1X1 ~~ T1X1")*T2X1
##   T2X2 ~~ equal("T1X2 ~~ T1X2")*T2X2
##   T2X3 ~~ equal("T1X3 ~~ T1X3")*T2X3
## 
##   T1X1 ~ 0*1
##   T1X2 ~ 1
##   T1X3 ~ 1
##   T2X1 ~ 0*1
##   T2X2 ~ equal("T1X2 ~ 1")*1
##   T2X3 ~ equal("T1X3 ~ 1")*1
## '
## fit <- blavaan(MILCS, data = simdatMILCS, fixed.x = FALSE)


## Figure 3: Sampling efficiency of the MCMC procedures for the growth model example
## colorblind-friendly palette
cbbPalette <- c("#0072B2", "#E69F00", "#000000", "#009E73", "#F0E442", "#56B4E9", "#D55E00", "#CC79A7")
ggplot(res3, aes(x = param, y = npersec, group = method)) + 
  geom_line(aes(linetype = method, color = method)) + geom_point(aes(shape = method, color = method)) +  
  scale_color_manual(values = cbbPalette) + labs(x = "Parameter Number", y = "ESS/s") + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 45, size = rel(0.75)))


## Simulation-based calibration. This takes days to run, and rstan::sbc() does not allow for
## a random seed. We do not see a way to make this exactly reproducible.
## compile sbc file
if(file.exists("sbc.rda")){
  load("sbc.rda")
} else {
  sem.sbc <- stan_model(stanc_ret = stanc(file = "sbc.stan"))
  save(sem.sbc, file = "sbc.rda")
}

## get data
model <- '
  # measurement model
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
    dem65 =~ y5 + y6 + y7 + y8
  # regressions
    dem60 ~ ind60
    dem65 ~ ind60 + dem60
  # residual correlations
    y1 ~~ y5
    y2 ~~ y4 + y6
    y3 ~~ y7
    y4 ~~ y8
    y6 ~~ y8
'

## data formatted for Stan
set.seed(101)
tmp <- bsem(model, data = PoliticalDemocracy, do.fit = FALSE, mcmcfile = TRUE)
load("lavExport/semstan.rda")

sbc.dat <- stantrans$data
sbc.dat$YX <- sbc.dat$S <- NULL # Not necessary
sbc.dat$has_data <- 1 # takes time; 0 is faster

if(file.exists("sbcfitni.rda")){
  load("sbcfitni.rda")
} else {
  ini <- stantrans$inits[[1]]

  sbcfitni <- sbc(sem.sbc, data = sbc.dat, M = 5e2, iter = 500, warmup = 200,
                  init = list(c1 = ini)) # apparently unable to set seed
  save(sbcfitni, file="sbcfitni.rda")
} 

if(file.exists("sbcfit.rda")){
  load("sbcfit.rda")
} else {
  ini <- stantrans$inits[[1]]

  ## tighter priors to avoid errors of npd covariance matrices during data generation
  sbc.dat$b_mn <- rep(1.5, sbc.dat$len_b)
  sbc.dat$b_sd <- rep(.25, sbc.dat$len_b)
  sbc.dat$lambda_y_mn <- rep(1.25, sbc.dat$len_lam_y)
  sbc.dat$lambda_y_sd <- rep(.25, sbc.dat$len_lam_y)
  sbc.dat$psi_sd_shape <- rep(10, sbc.dat$len_psi_sd)
  sbc.dat$psi_sd_rate <- rep(10, sbc.dat$len_psi_sd)
  sbc.dat$theta_r_alpha <- rep(5, sbc.dat$len_thet_r)
  sbc.dat$theta_r_beta <- rep(5, sbc.dat$len_thet_r)
  sbc.dat$theta_sd_shape <- rep(10, sbc.dat$len_thet_sd)
  sbc.dat$theta_sd_rate <- rep(10, sbc.dat$len_thet_sd)
  
  sbcfit <- sbc(sem.sbc, data = sbc.dat, M = 5e2, iter = 500, warmup = 200,
                init = list(c1 = ini)) # apparently unable to set seed
  save(sbcfit, file = "sbcfit.rda")
}


## Figure 4: sbc results for default priors
df <- as.data.frame(t(as.data.frame(lapply(sbcfitni$ranks, colMeans))))
rownames(df) <- NULL
names(df) <- gsub("_free", "", names(df))
rstan::plot(sbcfitni, thin = 3, binwidth=5) + facet_wrap(~ rep(names(df))) + theme_classic() + 
  scale_x_continuous(breaks = NULL)+ theme(strip.text.x = element_text(size = 8))+ xlab("")
## alternative
## df.l <- cbind.data.frame(param = rep(names(df), each=nrow(df)), dist = as.numeric(as.matrix(df)))
## ggplot(df.l, aes(dist)) + facet_wrap(~ param) + geom_freqpoly(binwidth=.05) + theme_classic() + scale_x_continuous(breaks = NULL) + theme(strip.text.x = element_text(size = 8)) + xlab("")


## Figure 5: sbc results for informative priors
df <- as.data.frame(t(as.data.frame(lapply(sbcfit$ranks, colMeans))))
rownames(df) <- NULL
names(df) <- gsub("_free", "", names(df))
rstan::plot(sbcfit, thin = 3, binwidth=5) + facet_wrap(~ rep(names(df))) + theme_classic() + 
  scale_x_continuous(breaks = NULL) + theme(strip.text.x = element_text(size = 8))+ xlab("")
## alternative
## df.l <- cbind.data.frame(param = rep(names(df), each=nrow(df)), dist = as.numeric(as.matrix(df)))
## ggplot(df.l, aes(dist)) + facet_wrap(~ param) + geom_freqpoly(binwidth=.05) + theme_classic() + scale_x_continuous(breaks = NULL) + theme(strip.text.x = element_text(size = 8)) + xlab("")