# This script requires brms version 2.11.5 or higher to fully run.

# The current release version of brms can be installed via
# install.packages("brms")

# The current developmental version of brms can be installed via
# remotes::install_github("paul-buerkner/brms")


# load required packages
library("tidyverse")
library("brms")
# for comparison with brms
library("lme4")
library("TAM")

# set ggplot theme
theme_set(bayesplot::theme_default())

# set rstan options
rstan::rstan_options(auto_write = TRUE)
options(mc.cores = 2)

# create a "models" folder in the current working directory to store fitted
# model objects for easier re-usage
if (!dir.exists("models")) {
  dir.create("models")
}

# Although I set a seed for all models, the results are only exactly
# reproducible on the same operating system with the same C++ compiler (and
# version) as well as the same version of Stan. 
# Thus, when you run the code below, it will likely not produce exactly the
# same results as shown in the paper. For the latter, I have used Windows 10,
# The g++ compiler version 8.3.0 for C++ (shipped with Rtools 4) as well as
# rstan 2.19.3 with StanHeaders 2.19.2. R version is 4.0.4.

# ----------- Code for Section 5.1 ------------ Analysis of the VerbAgg data
# set using dichotomous IRT models
data("VerbAgg", package = "lme4")

# get an overview of the data
head(VerbAgg, 10)

# ---------- 1PL models ---------------------- specify a 1PL model in brms
formula_va_1pl <- bf(r2 ~ 1 + (1 | item) + (1 | id))

# specify some weakly informative priors
prior_va_1pl <- prior("normal(0, 3)", class = "sd", group = "id") +
  prior("normal(0, 3)", class = "sd", group = "item")

# fit the 1PL model
fit_va_1pl <- brm(formula = formula_va_1pl, data = VerbAgg, 
  family = brmsfamily("bernoulli", "logit"), prior = prior_va_1pl, seed = 1234,
  file = "models/fit_va_1pl")

# obtain basic summaries
summary(fit_va_1pl)
# Figure 1
plot(fit_va_1pl, ask = FALSE)

# extract person parameters
ranef_va_1pl <- ranef(fit_va_1pl)
(person_pars_va_1pl <- ranef_va_1pl$id)

# extract item parameters
(item_pars_va_1pl <- coef(fit_va_1pl)$item)

# plot item parameters
# Figure 2
item_pars_va_1pl[, , "Intercept"] %>%
  as_tibble() %>%
  rownames_to_column() %>%
  rename(item = "rowname") %>%
  mutate(item = as.numeric(item)) %>%
  ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5)) +
    geom_pointrange() +
    coord_flip() + labs(x = "Item Number")

# plot person parameters
# Figure 3
person_pars_va_1pl[, , "Intercept"] %>%
  as_tibble() %>%
  rownames_to_column() %>%
  arrange(Estimate) %>%
  mutate(id = seq_len(n())) %>%
  ggplot(aes(id, Estimate, ymin = Q2.5, ymax = Q97.5)) + 
    geom_pointrange(alpha = 0.7) +
    coord_flip() + labs(x = "Person Number (Sorted)")

# specify a 1PL model with lme4 for comparison
lme4_va_1pl <- glmer(r2 ~ 1 + (1 | item) + (1 | id), data = VerbAgg,
  family = binomial())
summary(lme4_va_1pl)

# person and item parameters are similar to those obtained by brms
coef(lme4_va_1pl)$item
ranef(lme4_va_1pl)$id

# specify a 1PL model with TAM for comparison bring the data in wide structure
# first
VerbAgg_wide <- VerbAgg %>%
  select(item, id, r2) %>%
  mutate(r2 = ifelse(r2 == "Y", 1, 0)) %>%
  spread(key = "item", value = "r2") %>%
  select(-id)

# fit the model with TAM
tam_va_1pl <- tam(VerbAgg_wide, irtmodel = "1PL", verbose = FALSE)

# person and item parameters are similar to those obtained by brms
summary(tam_va_1pl)
IRT.factor.scores(tam_va_1pl)


# ---------- 2PL models ---------------------- specify a 2PL model
formula_va_2pl <- bf(r2 ~ exp(logalpha) * eta, eta ~ 1 + (1 | i | item) +
  (1 | id), logalpha ~ 1 + (1 | i | item), nl = TRUE)

# specify some weakly informative priors
prior_va_2pl <- prior("normal(0, 5)", class = "b", nlpar = "eta") +
  prior("normal(0, 1)", class = "b", nlpar = "logalpha") +
  prior("constant(1)", class = "sd", group = "id", nlpar = "eta") +
  prior("normal(0, 3)", class = "sd", group = "item", nlpar = "eta") +
  prior("normal(0, 1)", class = "sd", group = "item", nlpar = "logalpha")

# fit the 2PL model this models throws some convergence warnings which are
# false positives and can be safely ignored
fit_va_2pl <- brm(formula = formula_va_2pl, data = VerbAgg,
  family = brmsfamily("bernoulli", "logit"), prior = prior_va_2pl, seed = 1234,
  file = "models/fit_va_2pl")

# obtain some basic summaries
summary(fit_va_2pl)
# No figure in paper
plot(fit_va_2pl, ask = FALSE)

# extract item parameters
(item_pars_va_1pl <- coef(fit_va_2pl)$item)

# plot item parameters difficulties
eta <- item_pars_va_1pl[, , "eta_Intercept"] %>%
  as_tibble() %>%
  rownames_to_column()

# discriminations
alpha <- item_pars_va_1pl[, , "logalpha_Intercept"] %>%
  exp() %>%
  as_tibble() %>%
  rownames_to_column()

# plot difficulties and discrimination next to each other
# Figure 4
bind_rows(eta, alpha, .id = "nlpar") %>%
  rename(item = "rowname") %>%
  mutate(item = as.numeric(item)) %>%
  mutate(nlpar = factor(nlpar, labels = c("Easiness", "Discrimination"))) %>%
  ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5)) +
    facet_wrap("nlpar", scales = "free_x") +
    geom_pointrange() +
    coord_flip() +
    labs(x = "Item Number")

# extract person parameters
ranef_va_2pl <- ranef(fit_va_2pl)
(person_pars_va_2pl <- ranef_va_2pl$id)

# plot person parameters
# Figure 5
person_pars_va_2pl[, , "eta_Intercept"] %>%
  as_tibble() %>%
  rownames_to_column() %>%
  select(-Est.Error) %>%
  arrange(Estimate) %>%
  mutate(id = seq_len(n())) %>%
  ggplot(aes(id, Estimate, ymin = Q2.5, ymax = Q97.5)) +
    geom_pointrange(alpha = 0.7) +
    coord_flip() +
    labs(x = "Person Number (Sorted)")

# perform model comparison via approximate LOO-CV
loo_va_1pl <- loo(fit_va_1pl)
loo_va_2pl <- loo(fit_va_2pl)
loo_va_compare <- loo_compare(loo_va_1pl, loo_va_2pl)
print(loo_va_compare, simplify = FALSE)


# ---------- 1PL models with covariates ---------------------- specify a model
# including item covariates
formula_va_1pl_cov1 <- bf(r2 ~ btype + situ + mode + (1 | item) + 
  (0 + mode | id))

fit_va_1pl_cov1 <- brm(formula = formula_va_1pl_cov1, data = VerbAgg,
  family = brmsfamily("bernoulli", "logit"), prior = prior_va_1pl, seed = 1234,
  file = "models/fit_va_1pl_cov1")

summary(fit_va_1pl_cov1)
# Figure 6
conditional_effects(fit_va_1pl_cov1, "mode")

# compare standard deviations
hyp <- "modedo - modewant > 0"
hypothesis(fit_va_1pl_cov1, hyp, class = "sd", group = "id")

# fit a more complex covariate model
formula_va_1pl_cov2 <- bf(r2 ~ Anger + Gender + btype + situ + mode +
  mode:Gender + (0 + Gender | item) + (0 + mode | id))
fit_va_1pl_cov2 <- brm(formula = formula_va_1pl_cov2, data = VerbAgg,
  family = brmsfamily("bernoulli", "logit"), prior = prior_va_1pl, seed = 1234,
  file = "models/fit_va_1pl_cov2")

summary(fit_va_1pl_cov2)
# Figure 7
plot(conditional_effects(fit_va_1pl_cov2, c("Anger", "mode:Gender")),
  ask = FALSE)


# perform explicit DIF analysis compute the DIF covariate
VerbAgg$dif <- as.numeric(with(VerbAgg, Gender == "F" & mode == "do" &
  btype %in% c("curse", "scold")))

# fit and summarize the DIF model
formula_va_1pl_dif1 <- bf(r2 ~ Gender + dif + (1 | item) + (1 | id))
fit_va_1pl_dif1 <- brm(formula = formula_va_1pl_dif1, data = VerbAgg,
  family = brmsfamily("bernoulli", "logit"), prior = prior_va_1pl, seed = 1234,
  file = "models/fit_va_1pl_dif1")
summary(fit_va_1pl_dif1)


# compare convergence of lme4 and brms for a complex covariate model does not
# converge well
glmer_va_1pl_cov_full <- lme4::glmer(r2 ~ 1 + Anger + Gender + btype + situ +
    mode + (1 + Anger + Gender | item) + (1 + btype + situ + mode | id), 
  data = VerbAgg,
  family = binomial("logit"))
summary(glmer_va_1pl_cov_full)

# converges nicely and shows sensible results
fit_va_1pl_cov_full <- brm(r2 ~ 1 + Anger + Gender + btype + situ + mode +
    (1 + Anger + Gender | item) + (1 + btype + situ + mode | id),
  data = VerbAgg, family = brmsfamily("bernoulli", "logit"),
  prior = prior_va_1pl, seed = 1234, file = "models/fit_va_1pl_cov_full")
summary(fit_va_1pl_cov_full)


# ---------- 3PL models ---------------------- 3PL model with known guessing
# parameter
formula_va_3pl <- bf(r2 ~ 0.25 + 0.75 * inv_logit(exp(logalpha) * eta),
  eta ~ 1 + (1 | i | item) + (1 | id), logalpha ~ 1 + (1 | i | item), nl = TRUE)
family_va_3pl <- brmsfamily("bernoulli", link = "identity")

# 3PL model with unknown guessing parameters
formula_va_3pl <- bf(r2 ~ gamma + (1 - gamma) * inv_logit(exp(logalpha) * eta),
  eta ~ 1 + (1 | i | item) + (1 | id), logalpha ~ 1 + (1 | i | item), 
  logitgamma ~ 1 + (1 | i | item), nlf(gamma ~ inv_logit(logitgamma)),
  nl = TRUE)


# ----------- Code for Section 5.2 ------------ fit ordinal models to the
# VerbAgg data

# specify a basic GRM this models throws some convergence warnings which are
# false positives and can be safely ignored
formula_va_ord_1pl <- bf(resp ~ 1 + (1 | item) + (1 | id))
fit_va_ord_1pl <- brm(formula = formula_va_ord_1pl, data = VerbAgg,
  family = brmsfamily("cumulative", "logit"), prior = prior_va_1pl, seed = 1234,
  file = "models/fit_va_ord_1pl")

summary(fit_va_ord_1pl)
plot(fit_va_ord_1pl, ask = FALSE)

# extract item and person parameters
(ranef_va_ord_1pl <- ranef(fit_va_ord_1pl))

# plot person parameters
# No figure in paper
ranef_va_ord_1pl$id[, , "Intercept"] %>%
  as_tibble() %>%
  rownames_to_column() %>%
  arrange(Estimate) %>%
  mutate(id = seq_len(n())) %>%
  ggplot(aes(id, Estimate, ymin = Q2.5, ymax = Q97.5)) +
    geom_pointrange(alpha = 0.7) +
    coord_flip() +
    labs(x = "Person Number (Sorted)")


# compare GRM with 1PL model
# figure 9
df_person_1pl <- as_tibble(cbind(
  mean_va_1pl = ranef_va_1pl$id[, "Estimate", "Intercept"],
  mean_va_ord_1pl = ranef_va_ord_1pl$id[, "Estimate", "Intercept"],
  se_va_1pl = ranef_va_1pl$id[, "Est.Error", "Intercept"],
  se_va_ord_1pl = ranef_va_ord_1pl$id[, "Est.Error", "Intercept"]
))

df_person_1pl %>%
  ggplot(aes(mean_va_1pl, mean_va_ord_1pl, color = se_va_1pl)) +
  geom_point() +
  geom_abline() +
  scale_color_viridis_c() +
  lims(x = c(-4, 5), y = c(-4, 5)) +
  labs(
    x = "Mean (binary)",
    y = "Mean (ordinal)",
    color = "SD (binary)"
  )

df_person_1pl %>%
  ggplot(aes(se_va_1pl, se_va_ord_1pl, color = mean_va_1pl)) +
  geom_point() +
  geom_abline() +
  scale_color_viridis_c() +
  lims(x = c(0.3, 0.8), y = c(0.3, 0.8)) +
  labs(
    x = "SD (binary)",
    y = "SD (ordinal)",
    color = "Mean (binary)"
  )


# -------------- ordinal 1PL models with varying thresholds ---------- specify
# a GRM with varying thresholds across items
formula_va_ord_thres_1pl <- bf(resp | thres(gr = item) ~ 1 + (1 | id))
prior_va_ord_thres_1pl <- prior("normal(0, 3)", class = "Intercept") +
  prior("normal(0, 3)", class = "sd", group = "id")

# this models throws some convergence warnings which are false positives and
# can be safely ignored
fit_va_ord_thres_1pl <- brm(formula = formula_va_ord_thres_1pl, data = VerbAgg,
  family = brmsfamily("cumulative", "logit"), prior = prior_va_ord_thres_1pl,
  inits = 0, chains = 2, seed = 1234, file = "models/fit_va_ord_thres_1pl")
summary(fit_va_ord_thres_1pl)

# perform model comparison
loo(fit_va_ord_1pl, fit_va_ord_thres_1pl)


# -------------- ordinal 2PL models --------------- specify a GRM with varying
# discriminations
formula_va_ord_2pl <- bf(resp ~ 1 + (1 | i | item) + (1 | id), disc ~ 1 +
  (1 | i | item))

# some weakly informative priors
prior_va_ord_2pl <- prior("constant(1)", class = "sd", group = "id") +
  prior("normal(0, 3)", class = "sd", group = "item") +
  prior("normal(0, 1)", class = "sd", group = "item", dpar = "disc")

# fit the model this models throws some convergence warnings which are false
# positives and can be safely ignored
fit_va_ord_2pl <- brm(formula = formula_va_ord_2pl, data = VerbAgg,
  family = brmsfamily("cumulative", "logit"), prior = prior_va_ord_2pl,
  seed = 1234, file = "models/fit_va_ord_2pl")
summary(fit_va_ord_2pl)

# extract item and person parameters
(ranef_va_ord_2pl <- ranef(fit_va_ord_2pl))

# plot person parameters item easinesses (deviations from thresholds)
eta <- ranef_va_ord_2pl$item[, , "Intercept"] %>%
  as_tibble() %>%
  rownames_to_column()

# discriminations
alpha <- ranef_va_ord_2pl$item[, , "disc_Intercept"] %>%
  exp() %>%
  as_tibble() %>%
  rownames_to_column()

# put easinesses and discriminations together
# Figure 10
bind_rows(eta, alpha, .id = "nlpar") %>%
  rename(item = "rowname") %>%
  mutate(item = as.numeric(item)) %>%
  mutate(nlpar = factor(nlpar, labels = c("Easiness", "Discrimination"))) %>%
  ggplot(aes(item, Estimate, ymin = Q2.5, ymax = Q97.5)) +
    facet_wrap("nlpar", scales = "free_x") +
    geom_pointrange() +
    coord_flip() +
    labs(x = "Item Number")

# compute correlations between person parameters across models
cbind(va_1pl = ranef_va_1pl$id[, "Estimate", "Intercept"],
  va_2pl = ranef_va_2pl$id[, "Estimate", "eta_Intercept"],
  va_ord_1pl = ranef_va_ord_1pl$id[, "Estimate", "Intercept"],
  va_ord_2pl = ranef_va_ord_2pl$id[, "Estimate", "Intercept"]) %>%
cor() %>%
round(3)


# ------- ordinal models with covariates ------------------- fit a GRM with
# person and item covariates this models throws some convergence warnings which
# are false positives and can be safely ignored
formula_va_ord_cov1 <- bf(resp ~ Anger + Gender + btype + situ + mode +
  mode:Gender + (0 + Gender | item) + (0 + mode | id))
fit_va_ord_cov1 <- brm(formula = formula_va_ord_cov1, data = VerbAgg,
  family = brmsfamily("cumulative", "logit"), prior = prior_va_1pl, seed = 1234,
  file = "models/fit_va_ord_cov1")
summary(fit_va_ord_cov1)

# plot effects of Anger
# Figure 10, 11
conditional_effects(fit_va_ord_cov1, effects = "Anger", categorical = TRUE)


# fit a PCM with covariates and a category specific effect of "Anger"
formula_va_ord_cov2 <- bf(resp ~ cs(Anger) + Gender + btype + situ + mode +
  mode:Gender + (0 + Gender | item) + (0 + mode | id))

# fit the model
fit_va_ord_cov2 <- brm(formula = formula_va_ord_cov2, data = VerbAgg,
  family = brmsfamily("acat", "logit"), prior = prior_va_1pl, seed = 1234,
  file = "models/fit_va_ord_cov2")

# summarize the results
summary(fit_va_ord_cov2)
# Figure 13
conditional_effects(fit_va_ord_cov2, effects = "Anger", categorical = TRUE)


# ----------- Code for Section 5.2 ------------ Analysis of the rotation data
# set using response times IRT models
data("rotation", package = "diffIRT")
rotation <- rotation %>%
  as_tibble() %>%
  mutate(person = seq_len(n())) %>%
  gather("key", "value", -person) %>%
  extract("key", into = c("type", "item"), regex = "(.)\\[(.+)\\]") %>%
  spread("type", "value") %>%
  rename(time = T, resp = X) %>%
  mutate(rotate = factor(case_when(item %in% c(2, 5, 8) ~ 50,
    item %in% c(3, 6, 10) ~ 100, item %in% c(1, 4, 7, 9) ~ 150)),
    item = as.numeric(item))

# get an overview of the data
head(rotation, 10)


# specify a distributional exgaussian model
bform_exg1 <- bf(time ~ rotate + (1 | p | person) + (1 | i | item),
  sigma ~ rotate + (1 | p | person) + (1 | i | item),
  beta ~ rotate + (1 | p | person) + (1 | i | item))

# fit the model
fit_exg1 <- brm(bform_exg1, data = rotation, family = brmsfamily("exgaussian",
  link_sigma = "log", link_beta = "log"), chains = 4, cores = 4, inits = 0,
  control = list(adapt_delta = 0.99), seed = 1234, file = "models/fit_exg1")

# summarize the results
summary(fit_exg1)
# Figure 14
pp_check(fit_exg1)

# visualize effects of "rotate"
# Figure 15
conditional_effects(fit_exg1, "rotate", dpar = "mu")
conditional_effects(fit_exg1, "rotate", dpar = "sigma")
conditional_effects(fit_exg1, "rotate", dpar = "beta")


# specify a 3-parameter drift diffusion model
bform_drift1 <- bf(time | dec(resp) ~ rotate + (1 | p | person) +
    (1 | i | item), bs ~ rotate + (1 | p | person) + (1 | i | item),
  ndt ~ rotate + (1 | p | person) + (1 | i | item), bias = 0.5)

# specify initial values to help the model start sampling diffusion models
# require quite a bit of working memory and running multiple chains in parallel
# may exceed memory capacity of some standard laptops for this reason, we run
# only a single chain here but, in practice, running multiple chains is
# recommended for increased estimation accuracy and better convergence
# diagnostics
chains <- 1
inits_drift <- list(Intercept_ndt = -3)
inits_drift <- replicate(chains, inits_drift, simplify = FALSE)

# fit the model
fit_drift1 <- brm(bform_drift1, data = rotation, family = brmsfamily("wiener",
    "log", link_bs = "log", link_ndt = "log"), chains = chains, cores = chains,
  inits = inits_drift, init_r = 0.05, control = list(adapt_delta = 0.99),
  seed = 1234, file = "models/fit_drift1")

# summarize the model
summary(fit_drift1)

# extract item specific parameters
coef(fit_drift1)$item

# plot the effect of "rotate"
# Figure 16
conditional_effects(fit_drift1, "rotate", dpar = "mu")
conditional_effects(fit_drift1, "rotate", dpar = "bs")
conditional_effects(fit_drift1, "rotate", dpar = "ndt")


# specify a drift diffusion model without "rotate" affecting the boundary
# separation
bform_drift2 <- bf(time | dec(resp) ~ rotate + (1 | p | person) +
   (1 | i | item),
  bs ~ 1 + (1 | p | person), ndt ~ rotate + (1 | p | person) + (1 | i | item),
  bias = 0.5)

# fit the model
fit_drift2 <- brm(bform_drift2, rotation, family = wiener("log",
  link_bs = "log", link_ndt = "log"), chains = chains, cores = chains,
  inits = inits_drift, init_r = 0.05, control = list(adapt_delta = 0.99),
  seed = 1234, file = "models/fit_drift2")

# perform model comparison via approximate LOO-CV
loo_drift1 <- loo(fit_drift1)
loo_drift2 <- loo(fit_drift2)
loo_drift_compare <- loo_compare(loo_drift1, loo_drift2)
print(loo_drift_compare, simplify = FALSE)