##########################################################
## Simulation DINA Model
##########################################################

library(MASS)
library(CDM)

# read model parameters
pars <- read.table("simulation_parameters.dat", header = TRUE)

set.seed(280715)

# define parameters of skill class distribution

# vector mu which indirectly defines skill distributions
mu <- c(0, 0, -0.4, 0.5, 0.3)

# covariance matrix of tetrachoric correlations among skills
Sigma <- diag(1, 5)
Sigma[1, 2] <- Sigma[1, 3] <- Sigma[2, 3] <- 0.5
Sigma[4, 5] <- 0.8
Sigma[1, 4] <- Sigma[1, 5] <- Sigma[2, 4] <- Sigma[2, 5] <- 0.1
Sigma[3, 4] <- Sigma[3, 5] <- 0.3
Sigma[lower.tri(Sigma)] <- t(Sigma)[lower.tri(t(Sigma))]

# extract Q-matrix and item parameters
qmatrix <- pars[, 2:6]
slip <- 1 - pars$slip
guess <- pars$guess

# number of replications
BB <- 1000

# define sample sizes
N.vec <- c(100, 300, 1000)
NL <- length(N.vec)

# define data frame for simulation results
dfr <- NULL

# loop over replications
for (bb in 1:BB) {
  for (nn in seq(1, NL)) {
    N <- N.vec[nn]
    dat <- sim.din(N, q.matrix = qmatrix, slip = slip, guess = guess, mean = mu, 
      Sigma = Sigma)$dat
    mod <- din(dat, q.matrix = qmatrix, conv.crit = 1e-04)
    # collect results: iterations simulation index, sample size, guessing and
    # slipping parameters
    l1 <- c(N, mod$guess$est, mod$slip$est)
    # skill class probabilities and marginal skill class probabilities
    l1 <- c(l1, mod$attribute.patt$class.prob, mod$skill.patt[, "skill.prob"])
    dfr <- rbind(dfr, l1)
  }
}

# define indices
I <- length(guess)
K <- ncol(qmatrix)
index_guess <- 1 + 1:I
index_slip <- 1 + I + 1:I
index_probs <- 1 + 2 * I + 1:(2^K)

# compute true parameters of skill class distribution this is conveniently done
# via simulation
N <- 3e+06
skills <- mvrnorm(N, mu = mu, Sigma = Sigma)
skills <- 1 * (skills > 0)  # dichotomize skills

# skill pattern in din output
skillpatt <- rownames(mod$attribute.patt)

# probabilities of skill patterns
a1 <- skills[, 1]
for (kk in 2:K) {
  a1 <- paste0(a1, skills[, kk])
}

# true skill probabilities
skillprobs <- table(a1)/N
skillprobs <- skillprobs[skillpatt]

# compute summary table

summ_table <- as.data.frame(matrix(NA, nrow = 9, ncol = 6))
colnames(summ_table) <- c(paste0("Bias_", c("mean", "|min|", "|max|")), paste0("RMSE_", 
  c("mean", "|min|", "|max|")))
rownames(summ_table) <- apply(expand.grid(N.vec, c("guess", "slip", "skillprobs")), 
  1, FUN = function(pp) paste0(pp[2], "_", pp[1]))

# Bias guessing
a1 <- t(aggregate(dfr[, index_guess], list(dfr[, 1]), mean)[, -1])
bias <- a1 - guess

i1 <- 1:NL  # vector of indices
summ_table[i1, "Bias_mean"] <- colMeans(bias)
summ_table[i1, "Bias_|min|"] <- apply(abs(bias), 2, min)
summ_table[i1, "Bias_|max|"] <- apply(abs(bias), 2, max)

# RMSE guessing
a1 <- t(aggregate(dfr[, index_guess], list(dfr[, 1]), var)[, -1])
rmse <- sqrt(a1 + bias^2)

summ_table[i1, "RMSE_mean"] <- colMeans(rmse)
summ_table[i1, "RMSE_|min|"] <- apply(abs(rmse), 2, min)
summ_table[i1, "RMSE_|max|"] <- apply(abs(rmse), 2, max)

# Bias slipping
a1 <- t(aggregate(dfr[, index_slip], list(dfr[, 1]), mean)[, -1])
bias <- a1 - slip

i1 <- NL + 1:NL
summ_table[i1, "Bias_mean"] <- colMeans(bias)
summ_table[i1, "Bias_|min|"] <- apply(abs(bias), 2, min)
summ_table[i1, "Bias_|max|"] <- apply(abs(bias), 2, max)

# RMSE slipping
a1 <- t(aggregate(dfr[, index_slip], list(dfr[, 1]), var)[, -1])
rmse <- sqrt(a1 + bias^2)

summ_table[i1, "RMSE_mean"] <- colMeans(rmse)
summ_table[i1, "RMSE_|min|"] <- apply(abs(rmse), 2, min)
summ_table[i1, "RMSE_|max|"] <- apply(abs(rmse), 2, max)

# Bias skillprobs
a1 <- t(aggregate(dfr[, index_probs], list(dfr[, 1]), mean)[, -1])
bias <- a1 - as.vector(skillprobs)

i1 <- 2 * NL + 1:NL
summ_table[i1, "Bias_mean"] <- colMeans(bias)
summ_table[i1, "Bias_|min|"] <- apply(abs(bias), 2, min)
summ_table[i1, "Bias_|max|"] <- apply(abs(bias), 2, max)

# RMSE skillprobs
a1 <- t(aggregate(dfr[, index_probs], list(dfr[, 1]), var)[, -1])
rmse <- sqrt(a1 + bias^2)

summ_table[i1, "RMSE_mean"] <- colMeans(rmse)
summ_table[i1, "RMSE_|min|"] <- apply(abs(rmse), 2, min)
summ_table[i1, "RMSE_|max|"] <- apply(abs(rmse), 2, max)

write.csv(summ_table, "jss_table4.csv", quote = FALSE)


##########################################################
## CDM software check
## CDM R routine and Ox code of de la Torre
##########################################################

# read data and Q-matrix
dat <- read.table("sample_data.dat")
q.matrix <- read.table("sample_qmatrix.dat")

# fit DINA model in CDM
mod <- CDM::din(dat = dat, q.matrix = q.matrix, conv = 1e-05)
mod$time

# obtain Ox code 'dina_em_beta.ox' from Jimmy de la Torre
# (j.delatorre@rutgers.edu) and adapt lines 5-9
  ##5 decl N=1000,J=30,K=5;
  ##6 decl data={"sample.dat"};
  ##7 decl qmatrix={"q_sample.txt"};
  ##8 decl emp_bayes=1,sim=0,seed=255;
  ##9 decl crit=0.00001,maxit=999;

# N ... number of students
# J ... items
# K ... number of skills
# data ... data file name
# qmatrix ... Q-matrix
# crit ... convergence criterion

# define call to Ox (MS Windows)
# user specific: define directory in which Ox console 'oxl.exe' is located
# ox_locate <- 'C:\\Programs\\OxMetrics6\\Ox\\bin\\'
# execute DINA routine 'dina_em_beta.ox' and save output in 'ox_output.log'
# ox_call <- paste0(ox_locate, 'oxl.exe\\ dina_em_beta.ox > ox_output.log')
# system(ox_call)

# read Ox output log file
out <- readLines("ox_output.log")

# compare computation times use assessed time difference in CDM and
# reported elapsed time in Ox

mod$time  # CDM
out[grep("Elapsed", out)]  # Ox
mod$time  # CDM
## Time difference of 0.272028 secs
out[grep("Elapsed" , out)]  # Ox
## "Elapsed Time: 1.06"

# compare guessing and slipping parameters of CDM and Ox

ind1 <- grep("Parameter Estimates", out) + 2
ind2 <- grep("Latent Classes", out) - 2

vec <- strsplit(out[ind1:ind2], " ")
vec <- lapply(vec, FUN = function(vv) vv[vv != ""])

# guessing
max(abs(mod$guess$est - as.numeric(unlist(lapply(vec, FUN = function(vv) vv[2])))))
## [1] 6.278001e-06

# slipping
max(abs(mod$slip$est - as.numeric(unlist(lapply(vec, FUN = function(vv) vv[4])))))
## [1] 5.454171e-06


# compare marginal skill proportions of CDM and Ox

ind <- grep("Estimates of Attribute Prevalence", out) + 1
vec <- strsplit(out[seq(ind, ind + 4)], " ")
vec <- lapply(vec, FUN = function(vv) {
  vv[vv != ""]
})
max(abs(mod$skill.patt - as.numeric(unlist(lapply(vec, FUN = function(vv) vv[2])))))
## [1] 4.332956e-06