########################################################## ## Simulation DINA Model ########################################################## library(MASS) library(CDM) # read model parameters pars <- read.table("simulation_parameters.dat", header = TRUE) set.seed(280715) # define parameters of skill class distribution # vector mu which indirectly defines skill distributions mu <- c(0, 0, -0.4, 0.5, 0.3) # covariance matrix of tetrachoric correlations among skills Sigma <- diag(1, 5) Sigma[1, 2] <- Sigma[1, 3] <- Sigma[2, 3] <- 0.5 Sigma[4, 5] <- 0.8 Sigma[1, 4] <- Sigma[1, 5] <- Sigma[2, 4] <- Sigma[2, 5] <- 0.1 Sigma[3, 4] <- Sigma[3, 5] <- 0.3 Sigma[lower.tri(Sigma)] <- t(Sigma)[lower.tri(t(Sigma))] # extract Q-matrix and item parameters qmatrix <- pars[, 2:6] slip <- 1 - pars$slip guess <- pars$guess # number of replications BB <- 1000 # define sample sizes N.vec <- c(100, 300, 1000) NL <- length(N.vec) # define data frame for simulation results dfr <- NULL # loop over replications for (bb in 1:BB) { for (nn in seq(1, NL)) { N <- N.vec[nn] dat <- sim.din(N, q.matrix = qmatrix, slip = slip, guess = guess, mean = mu, Sigma = Sigma)$dat mod <- din(dat, q.matrix = qmatrix, conv.crit = 1e-04) # collect results: iterations simulation index, sample size, guessing and # slipping parameters l1 <- c(N, mod$guess$est, mod$slip$est) # skill class probabilities and marginal skill class probabilities l1 <- c(l1, mod$attribute.patt$class.prob, mod$skill.patt[, "skill.prob"]) dfr <- rbind(dfr, l1) } } # define indices I <- length(guess) K <- ncol(qmatrix) index_guess <- 1 + 1:I index_slip <- 1 + I + 1:I index_probs <- 1 + 2 * I + 1:(2^K) # compute true parameters of skill class distribution this is conveniently done # via simulation N <- 3e+06 skills <- mvrnorm(N, mu = mu, Sigma = Sigma) skills <- 1 * (skills > 0) # dichotomize skills # skill pattern in din output skillpatt <- rownames(mod$attribute.patt) # probabilities of skill patterns a1 <- skills[, 1] for (kk in 2:K) { a1 <- paste0(a1, skills[, kk]) } # true skill probabilities skillprobs <- table(a1)/N skillprobs <- skillprobs[skillpatt] # compute summary table summ_table <- as.data.frame(matrix(NA, nrow = 9, ncol = 6)) colnames(summ_table) <- c(paste0("Bias_", c("mean", "|min|", "|max|")), paste0("RMSE_", c("mean", "|min|", "|max|"))) rownames(summ_table) <- apply(expand.grid(N.vec, c("guess", "slip", "skillprobs")), 1, FUN = function(pp) paste0(pp[2], "_", pp[1])) # Bias guessing a1 <- t(aggregate(dfr[, index_guess], list(dfr[, 1]), mean)[, -1]) bias <- a1 - guess i1 <- 1:NL # vector of indices summ_table[i1, "Bias_mean"] <- colMeans(bias) summ_table[i1, "Bias_|min|"] <- apply(abs(bias), 2, min) summ_table[i1, "Bias_|max|"] <- apply(abs(bias), 2, max) # RMSE guessing a1 <- t(aggregate(dfr[, index_guess], list(dfr[, 1]), var)[, -1]) rmse <- sqrt(a1 + bias^2) summ_table[i1, "RMSE_mean"] <- colMeans(rmse) summ_table[i1, "RMSE_|min|"] <- apply(abs(rmse), 2, min) summ_table[i1, "RMSE_|max|"] <- apply(abs(rmse), 2, max) # Bias slipping a1 <- t(aggregate(dfr[, index_slip], list(dfr[, 1]), mean)[, -1]) bias <- a1 - slip i1 <- NL + 1:NL summ_table[i1, "Bias_mean"] <- colMeans(bias) summ_table[i1, "Bias_|min|"] <- apply(abs(bias), 2, min) summ_table[i1, "Bias_|max|"] <- apply(abs(bias), 2, max) # RMSE slipping a1 <- t(aggregate(dfr[, index_slip], list(dfr[, 1]), var)[, -1]) rmse <- sqrt(a1 + bias^2) summ_table[i1, "RMSE_mean"] <- colMeans(rmse) summ_table[i1, "RMSE_|min|"] <- apply(abs(rmse), 2, min) summ_table[i1, "RMSE_|max|"] <- apply(abs(rmse), 2, max) # Bias skillprobs a1 <- t(aggregate(dfr[, index_probs], list(dfr[, 1]), mean)[, -1]) bias <- a1 - as.vector(skillprobs) i1 <- 2 * NL + 1:NL summ_table[i1, "Bias_mean"] <- colMeans(bias) summ_table[i1, "Bias_|min|"] <- apply(abs(bias), 2, min) summ_table[i1, "Bias_|max|"] <- apply(abs(bias), 2, max) # RMSE skillprobs a1 <- t(aggregate(dfr[, index_probs], list(dfr[, 1]), var)[, -1]) rmse <- sqrt(a1 + bias^2) summ_table[i1, "RMSE_mean"] <- colMeans(rmse) summ_table[i1, "RMSE_|min|"] <- apply(abs(rmse), 2, min) summ_table[i1, "RMSE_|max|"] <- apply(abs(rmse), 2, max) write.csv(summ_table, "jss_table4.csv", quote = FALSE) ########################################################## ## CDM software check ## CDM R routine and Ox code of de la Torre ########################################################## # read data and Q-matrix dat <- read.table("sample_data.dat") q.matrix <- read.table("sample_qmatrix.dat") # fit DINA model in CDM mod <- CDM::din(dat = dat, q.matrix = q.matrix, conv = 1e-05) mod$time # obtain Ox code 'dina_em_beta.ox' from Jimmy de la Torre # (j.delatorre@rutgers.edu) and adapt lines 5-9 ##5 decl N=1000,J=30,K=5; ##6 decl data={"sample.dat"}; ##7 decl qmatrix={"q_sample.txt"}; ##8 decl emp_bayes=1,sim=0,seed=255; ##9 decl crit=0.00001,maxit=999; # N ... number of students # J ... items # K ... number of skills # data ... data file name # qmatrix ... Q-matrix # crit ... convergence criterion # define call to Ox (MS Windows) # user specific: define directory in which Ox console 'oxl.exe' is located # ox_locate <- 'C:\\Programs\\OxMetrics6\\Ox\\bin\\' # execute DINA routine 'dina_em_beta.ox' and save output in 'ox_output.log' # ox_call <- paste0(ox_locate, 'oxl.exe\\ dina_em_beta.ox > ox_output.log') # system(ox_call) # read Ox output log file out <- readLines("ox_output.log") # compare computation times use assessed time difference in CDM and # reported elapsed time in Ox mod$time # CDM out[grep("Elapsed", out)] # Ox mod$time # CDM ## Time difference of 0.272028 secs out[grep("Elapsed" , out)] # Ox ## "Elapsed Time: 1.06" # compare guessing and slipping parameters of CDM and Ox ind1 <- grep("Parameter Estimates", out) + 2 ind2 <- grep("Latent Classes", out) - 2 vec <- strsplit(out[ind1:ind2], " ") vec <- lapply(vec, FUN = function(vv) vv[vv != ""]) # guessing max(abs(mod$guess$est - as.numeric(unlist(lapply(vec, FUN = function(vv) vv[2]))))) ## [1] 6.278001e-06 # slipping max(abs(mod$slip$est - as.numeric(unlist(lapply(vec, FUN = function(vv) vv[4]))))) ## [1] 5.454171e-06 # compare marginal skill proportions of CDM and Ox ind <- grep("Estimates of Attribute Prevalence", out) + 1 vec <- strsplit(out[seq(ind, ind + 4)], " ") vec <- lapply(vec, FUN = function(vv) { vv[vv != ""] }) max(abs(mod$skill.patt - as.numeric(unlist(lapply(vec, FUN = function(vv) vv[2]))))) ## [1] 4.332956e-06