## Journal of Statistical Software ## ## covsim: An R package for simulating non-normal data for Structural Equation Models Using Copulas ## S. Gronneberg, N. Foldnes, K. Marcoulides ## library("MASS") library("lavaan") library("copula") library("rvinecopulib") library("covsim") library("discnorm") library("ggExtra") library("GGally") library("psych") library("VineCopula") ## ---- Figure 1 and 2---- ## set.seed(1234) n <- 1000 ## Figs 1a and 2a tmp <- rCopula(n, normalCopula(param = 0.8)) # 14.1: corr 0.8 with chi-ssquare marginals p <- ggplot(data.frame(x = tmp[, 1], y = tmp[, 2]), aes(x, y)) + geom_point() + xlab(expr(U[1])) + ylab(expr(U[2])) ggExtra::ggMarginal(p, type = "histogram", bins = 15) tmp <- qnorm(tmp) # add N(0, 1) marginals p <- ggplot(data.frame(x = tmp[, 1], y = tmp[, 2]), aes(x, y)) + geom_point() + xlab(expr(Z[1])) + ylab(expr(Z[2])) ggExtra::ggMarginal(p, type = "histogram", bins = 15) set.seed(1234) n <- 1000 ## Figs 1b and 2b tmp <- rCopula(n, claytonCopula(param = 3.4)) p <- ggplot(data.frame(x = tmp[, 1], y = tmp[, 2]), aes(x, y)) + geom_point() + xlab(expr(U[1])) + ylab(expr(U[2])) ggExtra::ggMarginal(p, type = "histogram", bins = 15) tmp <- qnorm(tmp) # add N(0, 1) marginals p <- ggplot(data.frame(x = tmp[, 1], y = tmp[, 2]), aes(x, y)) + geom_point() + xlab(expr(Z[1])) + ylab(expr(Z[2])) ggExtra::ggMarginal(p, type = "histogram", bins = 15) ## --- VITA calibration for the clayton copula parameter 3.4 used above --- ## mnorm <- list(list(distr = "norm"), list(distr = "norm")) sigma.target <- matrix(c(1, 0.8, 0.8, 1), 2) set.seed(1) calibrated.vita <- vita(mnorm, sigma.target, family_set = "clayton") summary(calibrated.vita) ## simulate from vita and test accuracy library("rvinecopulib") cov(rvine(10^5, calibrated.vita)) ## --- Fig 5: VITA calibration in the 3-dimensional growth curve illustration --- ## sigma.target <- matrix(c(1, 0.4, 0.3, 0.4, 1, 0.4, 0.3, 0.4, 1), 3) margins <- list(list(distr = "norm"), list(distr = "chisq", df = 1), list(distr = "t", df = 5)) pcs <- list(list(bicop_dist("clayton"), bicop_dist("joe")), list(bicop_dist("frank"))) vine_cop<- vinecop_dist(pcs, structure = dvine_structure(1:3)) ## vita vector has these marginal variances: margin.variances <- c(1, 2, 5/3) ## the covariance matrix to be attained by vita must be scaled, therefore pre <- diag(sqrt(margin.variances/diag(sigma.target))) vita.target <- pre %*% sigma.target %*% pre set.seed(1) calibrated.vita <- vita(margins, vita.target, vc = vine_cop, verbose = TRUE) ## the vita vector must be post-multiplied to have unit variances post <- diag(1/diag(pre)) vita.sample <- rvine(10^5, calibrated.vita) %*% post round(cov(vita.sample)-sigma.target, 3) # approximately equal set.seed(1234) GGally::ggpairs(data.frame(rvine(10^3, calibrated.vita) %*% post), diag = list(continuous = "barDiag"), columnLabels = c("delta[1]", "delta[2]", "delta[3]"), labeller = "label_parsed") ## Fig 6 IG ## ## --- Subsection 3.1 on independent generator approach set.seed(1234) ig.sample <- rIG(N = 10^3, sigma.target = sigma.target, reps = 1, skewness = c(0, sqrt(8), 0), excesskurtosis = c(0, 12, 6)) GGally::ggpairs(data.frame(ig.sample[[1]]), diag = list(continuous = "barDiag"), columnLabels = c("delta[1]", "delta[2]", "delta[3]"), labeller = "label_parsed") ## simulation for growth model growth.mod <- " i = ~ 1*t1 + 1*t2 + 1*t3 s = ~ 0*t1 + 1*t2 + 2*t3 i~~1*i; s~~1*s; i~~start(0)*s; t1~~t2+t3; t2~~t3; t1~~c*t1; t2~~c*t2; t3~~c*t3; " set.seed(1) n <- 1000; reps <- 1000 par <- 9; parvalue <- 0 sim.df <- replicate(reps, { errors <- data.frame(MASS::mvrnorm(n, mu = rep(0, 3), Sigma = sigma.target)) i <- rnorm(n); s <- rnorm(n) t1 <- i + errors[, 1] t2 <- i + s + errors[, 2] t3 <- i + 2*s + errors[, 3] f <- sem(growth.mod, data.frame(t1, t2, t3)) cover.norm <- (parameterestimates(f)[par, "ci.lower"]- parvalue) * (parameterestimates(f)[par, "ci.upper"]- parvalue) < 0 errors <- data.frame(rvine(n, calibrated.vita) %*% post) i <- rnorm(n); s <- rnorm(n) t1 <- i + errors[, 1] t2 <- i + s + errors[, 2] t3 <- i + 2*s + errors[, 3] f <- sem(growth.mod, data.frame(t1, t2, t3)) cover.vita <- (parameterestimates(f)[par, "ci.lower"]- parvalue) * (parameterestimates(f)[par, "ci.upper"]- parvalue) < 0 c(cover.norm, cover.vita) }) sim.df <- data.frame(t(sim.df)) colMeans(sim.df, na.rm = TRUE) ## --- Fig 7: VITA calibration in the 6-dimensional growth curve illustration --- ## residual.covariance <- toeplitz(1:6) residual.covariance[residual.covariance > 3] <-0 residual.covariance[residual.covariance == 2] <- 0.5 residual.covariance[residual.covariance == 3] <- 0.2 margins.nonnorm <- list(list(distr = "norm"), list(distr = "chisq", df = 5), list(distr = "chisq", df = 4), list(distr = "chisq", df = 3), list(distr = "chisq", df = 2), list(distr = "chisq", df = 1)) margins.norm <- list(list(distr = "norm"), list(distr = "norm"), list(distr = "norm"), list(distr = "norm"), list(distr = "norm"), list(distr = "norm")) margin.variances <- c(1, 10, 8, 6, 4, 2) sigma.target <- diag(sqrt(margin.variances)) %*% residual.covariance %*% diag(sqrt(margin.variances)) ## calibration set.seed(1) vita1 <- vita(margins.norm, residual.covariance, family_set = "clayton") set.seed(1) vita2 <- vita(margins.nonnorm, sigma.target, family_set = "gauss") set.seed(1) vita3 <- vita(margins.nonnorm, sigma.target, family_set = "clayton") ## post multiplication matrix pre <- diag(sqrt(margin.variances/diag(residual.covariance))) post <- diag(1/diag(pre)) ## approximately equal at N = 10^5 round(cov(rvine(10^5, vita1))-residual.covariance, 2) round(cov(rvine(10^5, vita2) %*% post)-residual.covariance, 2) round(cov(rvine(10^5, vita3) %*% post)-residual.covariance, 2) ## DATA GENERATOR get_data <- function(n, cv) { residuals <- rvine(n, cv, cores = parallel::detectCores()) if (cv$margins[[6]]$distr != "norm") residuals <- residuals %*% post i <- rnorm(n); s <- rnorm(n) t1 <- i + residuals[, 1] t2 <- i + s + residuals[, 2] t3 <- i + 2*s + residuals[, 3] t4 <- i + 3*s + residuals[, 4] t5 <- i + 4*s + residuals[, 5] t6 <- i + 5*s + residuals[, 6] data.frame(t1 = t1, t2 = t2, t3 = t3, t4 = t4, t5 = t5, t6 = t6) } growth.model.toeplitz <- " i = ~ 1*t1 + 1*t2 + 1*t3 + 1*t4 + 1*t5 + 1*t6 s = ~ 0*t1 + 1*t2 + 2*t3 + 3*t4 + 4*t5 + 5*t6 i~~start(0)*s; t1~~b*t2+c*t3; t2~~b*t3+c*t4; t3~~b*t4+c*t5; t4~~b*t5+c*t6; t5~~b*t6 t1~~a*t1;t2~~a*t2; t3~~a*t3; t4~~a*t4; t5~~a*t5; t6~~a*t6; " ## ASYMPTOTIC POWER set.seed(1) big <- get_data(10^6, vita1) f <- sem(growth.model.toeplitz, big, estimator = "MLM") eigs1 <- Re(eigen(lavInspect(f, "UGamma"))$values) library("CompQuadForm") imhof(qchisq(0.95, 15), eigs1[1:15])$Qq # RR = 7.3% set.seed(1) big <- get_data(10^6, vita2) f <- sem(growth.model.toeplitz, big, estimator = "MLM") eigs2 <- Re(eigen(lavInspect(f, "UGamma"))$values) imhof(qchisq(0.95, 15), eigs2[1:15])$Qq # RR = 29.4% set.seed(1) big <- get_data(10^6, vita3) f <- sem(growth.model.toeplitz, big, estimator = "MLM") eigs3 <- Re(eigen(lavInspect(f, "UGamma"))$values) imhof(qchisq(0.95, 15), eigs3[1:15])$Qq # RR = 50.6% ## produce graphs get_density <- function(egs, x) { CDF.T <- NULL for (i in x) { CDF.T <- c(CDF.T, 1-imhof(i, egs)$Qq) } dens.val <- NULL for (i in 2:length(x)) { dens.val <- c(dens.val, CDF.T[i]- CDF.T[i-1]) } dens.val <- dens.val/(x[2]-x[1]) } Tvalues <- seq(0, 60, length.out = 500); x <- Tvalues[-1] my.df <- data.frame(x = Tvalues[-1], VITA1 = get_density(eigs1, Tvalues), VITA2 = get_density(eigs2, Tvalues), VITA3 = get_density(eigs3, Tvalues), nominal = dchisq(x, df = 15)) m <- reshape2::melt(my.df, id.vars = "x");m$Distribution <- m$variable ggplot(m, aes(x, value, color = Distribution, linetype = Distribution))+geom_line()+ geom_vline(xintercept = qchisq(0.95, df = 15))+ xlab(expr("T"["NTML"]))+ylab("Density")+ theme(legend.position = c(0.8, .6)) ## MODERATE SAMPLE SIZE, n = 500 SIMULATIONS, confirm the asymptotics set.seed(1) f1 <- replicate(1000, {sem(growth.model.toeplitz, data = get_data(500, vita1))}) f2 <- replicate(1000, {sem(growth.model.toeplitz, data = get_data(500, vita2))}) f3 <- replicate(1000, {sem(growth.model.toeplitz, data = get_data(500, vita3))}) ## Test of fit pval.df <- data.frame(p1 = sapply(f1, function(f) fitmeasures(f, "pvalue")), p2 = sapply(f2, function(f) fitmeasures(f, "pvalue")), p3 = sapply(f3, function(f) fitmeasures(f, "pvalue"))) colMeans(pval.df <0.05) ## --- SECTION 5 --- ### ## --- running times of original code from 2017 vs vita() based on rvinecopulib --- ### #### ## Auxiliary functions needed for original VITA implementation in Gronneberg and Foldnes (2017) ##### index.par <- function(A) { a <- A[1] b <- A[2] d <- dim(Matrix)[1] index.i <- NULL index.j <- NULL for (i in(1:d)) { if (Matrix[i, i] %in% c(a, b)) { ind <- Matrix[, i] %in% c(a, b) if (sum(ind) == 2) { index.i <- which(ind[-i] == TRUE) + 1 index.j <- i } } } return(c(index.i, index.j)) } parameter.valid = function (family, par.value1, par.value2) { # check if parameter value is valid if ((family == 1 || family == 2) && abs(par.value1) >= 1) return(FALSE) if (family == 2 && par.value2 <= 2) return(FALSE) if ((family == 3 || family == 13) && par.value1 <= 0) return(FALSE) if ((family == 4 || family == 14) && par.value1 < 1) return(FALSE) if ((family == 6 || family == 16) && par.value1 <= 1) return(FALSE) if (family == 5 && par.value1 == 0) return(FALSE) if ((family == 7 || family == 17) && par.value1 <= 0) return(FALSE) if ((family == 7 || family == 17) && par.value2 < 1) return(FALSE) if ((family == 8 || family == 18) && par.value1 <= 0) return(FALSE) if ((family == 8 || family == 18) && par.value2 < 1) return(FALSE) if ((family == 9 || family == 19) && par.value1 < 1) return(FALSE) if ((family == 9 || family == 19) && par.value2 <= 0) return(FALSE) if ((family == 10 || family == 20) && par.value1 < 1) return(FALSE) if ((family == 10 || family == 20) && (par.value2 <= 0 || par.value2 > 1)) return(FALSE) if ((family == 23 || family == 33) && par.value1 >= 0) return(FALSE) if ((family == 24 || family == 34) && par.value1 > -1) return(FALSE) if ((family == 26 || family == 36) && par.value1 >= -1) return(FALSE) if ((family == 27 || family == 37) && par.value1 >= 0) return(FALSE) if ((family == 27 || family == 37) && par.value2 > -1) return(FALSE) if ((family == 28 || family == 38) && par.value1 >= 0) return(FALSE) if ((family == 28 || family == 38) && par.value2 > -1) return(FALSE) if ((family == 29 || family == 39) && par.value1 > -1) return(FALSE) if ((family == 29 || family == 39) && par.value2 >= 0) return(FALSE) if ((family == 30 || family == 40) && par.value1 > -1) return(FALSE) if ((family == 30 || family == 40) && (par.value2 >= 0 || par.value2 < (-1))) return(FALSE) if ((family == 104 || family == 114 || family == 204 || family == 214) && par.value1 < 1) return(FALSE) if ((family == 104 || family == 114 || family == 204 || family == 214) && (par.value2 < 0 || par.value2 > 1)) return(FALSE) if ((family == 124 || family == 134 || family == 224 || family == 234) && par.value1 > -1) return(FALSE) if ((family == 124 || family == 134 || family == 224 || family == 234) && (par.value2 < 0 || par.value2 > 1)) return(FALSE) return(TRUE) } ## This function identifies the appropriate sub-vine and uses the R-vine array representation from the VineCopula package. create.submatrix <- function(I) { d <- dim(Matrix)[1] l <- length(I) for (i in (d:1)) { if (sum(Matrix[, i] %in% I) == l) break } sub.matrix <- Matrix[(i:d), (i:d)] sub.par <- par[(i:d), (i:d)] sub.family <- family[(i:d), (i:d)] remove.ind <- which(!(diag(sub.matrix) %in% I)) if (length(remove.ind) != 0) { sub.par <- sub.par[, -remove.ind] sub.family <- sub.family[, -remove.ind] sub.matrix <- sub.matrix[, -remove.ind] } new.sub.matrix <- NULL new.sub.par <- NULL new.sub.family <- NULL for (i in (1:(dim(sub.matrix)[2]))) { indx <- which(sub.matrix[, i] %in% I) tmp.mat <- sub.matrix[indx, i] new.sub.matrix <- cbind(new.sub.matrix, c(rep(0, length(I)-length(tmp.mat)), tmp.mat)) tmp.par <- sub.par[indx, i] new.sub.par <- cbind(new.sub.par, c(rep(0, length(I)-length(tmp.mat)), tmp.par)) tmp.family <- sub.family[indx, i] new.sub.family <- cbind(new.sub.family, c(rep(0, length(I)-length(tmp.mat)), tmp.family)) } sub.matrix <- new.sub.matrix sub.par <- new.sub.par sub.family <- new.sub.family ## add zeros to diagonals on par and family: diag(sub.par) <- rep(0, l) diag(sub.family) <- rep(0, l) return(list(sub.matrix = sub.matrix, sub.par = sub.par, sub.family = sub.family)) } ############## ## Specify parametric R-vine in five dimensions. ############# Matrix <- c(5, 2, 3, 1, 4, 0, 2, 3, 4, 1, 0, 0, 3, 4, 1, 0, 0, 0, 4, 1, 0, 0, 0, 0, 1) Matrix <- matrix(Matrix, 5, 5) family <- c(0, 3, 3, 3, 3, 0, 0, 3, 3, 3, 0, 0, 0, 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0) # 3 = Clayton copula family <- matrix(family, 5, 5) par <- family # parameter matrix for copulas. VITA fills in parameters sequentially in this matrix RVM <- RVineMatrix(Matrix, family, par) # the R vine specification. ## define an order in which to run through the vine structure, edge by edge pair.index <- NULL index.set <- list() d <- dim(Matrix)[1] for (i in ((d-1):1)) { for (j in (d:(i+1))) { cond.index <- NULL pair.index <- c(Matrix[i, i], Matrix[j, i]) if (j+1 <= d) { cond.index <- Matrix[((j+1):d), i] } index.set <- c(index.set, list(pair.index = pair.index, cond.index = cond.index)) } } ############### # Define target covariance matrix ############### model.true <- ' # latent variables F1 = ~ load*x1 +load* x2 F2 = ~ load*y1 + load*y2+load*y3 x1 ~~ resvar *x1 x2 ~~ resvar *x2 y1 ~~ resvar *y1 y2 ~~ resvar *y2 y3 ~~ resvar *y3 F1 ~~ phi *F2 F1 ~~ 1 *F1 F2 ~~ 1 *F2 ' load <- 0.95 resvar <- 1-load^2 phi <- .9 model.true <- gsub("resvar", as.character(resvar), model.true, fixed = TRUE) model.true <- gsub("load", as.character(load), model.true, fixed = TRUE) model.true <- gsub("phi", as.character(phi), model.true, fixed = TRUE) f <- cfa(model.true, data = NULL) sigma.target <- fitted(f)$cov ######## ## Original VITA for given sigma.target and standard normal marginals ####### ## the numerical routine that searches for copula parameter for a given edge = pair.index in the vine solve.param <- function(pair.index, cond.index) { I <- c(pair.index, cond.index) curr.family = family[index.par(pair.index)[1], index.par(pair.index)[2]] cat(" family: ", curr.family, "\n") sub.matrices <- create.submatrix(I) #the sub-vine we need for simulation ## Rename the RVineMatrix, to get the names from 1 and up: sub.matrix <- sub.matrices$sub.matrix recoded.index <- sort(unique(as.vector(sub.matrix)))[-1] recoded.sub <- as.vector(sub.matrix) recoded.pair.index <- pair.index for (i in (1:length(recoded.index))) { recoded.sub[which(recoded.sub == recoded.index[i])] <- i recoded.pair.index[which(recoded.pair.index == recoded.index[i])] <- i } sub.matrix <- matrix(recoded.sub, dim(sub.matrix)[1], dim(sub.matrix)[2]) RVM.sub <- RVineMatrix(Matrix = sub.matrix, par = sub.matrices$sub.par, par2 = sub.matrices$sub.par*0, family = sub.matrices$sub.family) root.function <- function(theta) { # this function returns the difference between target covariance and vine-implied covariance for theta if (!parameter.valid(curr.family, theta, NA)) { cat("not valid parameter: ", theta , "\n") return(10^3) } RVM.sub$par[2, 1] <<- theta simdata <- RVineSim(N, RVM.sub)[, recoded.pair.index] # Simulate large sample of size N return(sigma.target[pair.index[1], pair.index[2]] - cov(qnorm(simdata[, 1]), qnorm(simdata[, 2]))) # standard normal marginals } est <- uniroot(root.function, lower = 0.001, upper = 20)$root cat("Approximation:", est, "\n") par[index.par(pair.index)[1], index.par(pair.index)[2]] <<- est # replace VITA result into global vine structure } ## Original implementation timing N <- 10^6 set.seed(1) t1 <- system.time({ for (i in (1:(length(index.set)/2))) { pair.index <- index.set[[2*i-1]] cond.index <- index.set[[2*i]] cat("Optimizing for:", pair.index, "|", cond.index) solve.param(pair.index, cond.index) } }) RVM <- RVineMatrix(Matrix, family, par) ## Timing of covsim implementation of VITA bicop <- bicop_dist("clayton") pcs <- list(list(bicop, bicop, bicop, bicop), list(bicop, bicop, bicop), list(bicop, bicop), list(bicop)) ## set up vine copula model with Gaussian margins tmat <- Matrix[5:1, ] vc <- vine_dist(list(distr = "norm"), pcs, tmat) marginsnorm <- lapply(X = sqrt(diag(sigma.target)), function(X) list(distr = "norm", sd = X)) set.seed(1) t2 <- system.time(calvita <- vita(marginsnorm, sigma.target, vc = vc, family_set = "clayton")) ### vita is approx 13 times faster t1["elapsed"]/t2["elapsed"] ### ###a five-dimensional vine, on a computer with 4 CPU cores, #### simulation runtimes at a sample size of n = 1000 with rvinecopulib compared to VineCopula ## system.time(replicate(10000, qnorm(RVineSim(10^3, RVM)))) system.time(replicate(10000, rvine(10^3, calvita))) ## --- SECTION 6 --- ### ## --- Table 1 --- ### test_func <- function(d) { set.seed(1) ncores <- parallel::detectCores() marginsnorm <- lapply(X = rep(1, d), function(X) list(distr = "norm", sd = X)) mat <- diag(d)+0.3 diag(mat) <- 1 time.cal <- tryCatch(system.time(vita <- vita(marginsnorm, mat)), error = function(w) {-1}, warning = function(w) {-2}) if (length(time.cal) == 5) { time.sim <- tryCatch(system.time(replicate(10^3, rvine(500, vita))), error = function(w) {-1}, warning = function(w) {-2}) } return(list(time.cal = time.cal, time.sim = time.sim, vita = vita, ncores = ncores)) } ## Table 1. With 40 dimensions too slow; 15++ hours. for (d in c(5, 10, 15, 20, 25, 30)) { res <- test_func(d) print(res) } ##### ## Dimension 40 with reduced precision, Nmax=10^5 instead of default 10^6 #### ## very large and time consuming, must reduce Nmax (but still takes hours..) set.seed(1) d <- 40 marginsnorm <- lapply(X = rep(1, d), function(X) list(distr = "norm", sd = X)) mat <- diag(d)+0.3 diag(mat) <- 1 time.cal <- system.time(vita <- vita(marginsnorm, mat, Nmax =10^5, verbose=TRUE)) time.cal # precision is good set.seed(1) large.sample <- rvine(10^6, vita) discrepancies <- lavaan::lav_matrix_vech(cor(large.sample)-mat, diag=FALSE) summary(abs(discrepancies) < .005) summary(abs(discrepancies) < .01) ## --- SECTION 6 --- ## ## --- Continuous SEM data example --- ## sem.pop <- ' Ksi1 = ~ start(.8)*x1+start(.7)*x2+start(.6)*x3+start(.5)*x4 Ksi2 = ~ start(.8)*x5+start(.7)*x6+start(.6)*x7+start(.5)*x8 Eta1 = ~ start(.8)*y1+start(.7)*y2+start(.6)*y3+start(.5)*y4 Eta2 = ~ start(.8)*y5+start(.7)*y6+start(.6)*y7+start(.5)*y8 Eta3 = ~ start(.8)*y9+start(.7)*y10+start(.6)*y11+start(.5)*y12 Eta1 ~ start(.4)*Ksi1+start(.6)*Ksi2 Eta2 ~ start(.4)*Ksi1+start(.2)*Ksi2+start(.3)*Eta1 Eta3 ~ start(.1)*Ksi1+start(.1)*Ksi2+start(.2)*Eta1+start(.5)*Eta2 Ksi1~~start(.3)*Ksi2; Eta1~~start(.5)*Eta1; Eta2~~start(.5)*Eta2 Eta3~~start(.5)*Eta3; x1~~start(.5)*x1; x2~~start(.5)*x2; x3~~start(.5)*x3 x4~~start(.5)*x4; x5~~start(.5)*x5; x6~~start(.5)*x6; x7~~start(.5)*x7 x8~~start(.5)*x8; y1~~start(.5)*y1; y2~~start(.5)*y2; y3~~start(.5) *y3 y4~~start(.5)*y4; y5~~start(.5)*y5; y6~~start(.5)*y6; y7~~start(.5) *y7 y8~~start(.5)*y8; y9~~start(.5)*y9; y10~~start(.5)*y10 y11~~start(.5)*y11; y12~~start(.5)*y12' sigma.target <- lavInspect(sem(sem.pop, data = NULL), "sigma.hat") marginsnorm <- lapply(X = sqrt(diag(sigma.target)), function(X) list(distr = "norm", sd = sqrt(X))) set.seed(1) cal.time <- system.time(vitadist <- vita(marginsnorm, sigma.target)) cal.time["elapsed"]/3600 sim.time <- system.time(randomsamples <- replicate(10^3, rvine(10^3, vitadist))) sim.time ## --- Ordinal data simulation --- ## sigma.target <- matrix(c(1, 0.5, 0.5, 1), 2) set.seed(1) vita_clayton <- vita(list(list(distr = "norm"), list(distr = "norm")), sigma.target, family_set = "clayton") set.seed(1) vita_joe <- vita(list(list(distr = "norm"), list(distr = "norm")), sigma.target, family_set = "joe") ## generate data and discretize clayton.disc <- apply(rvine(10^3, vita_clayton), 2, cut, breaks = c(-Inf, 0, 1, Inf), labels = FALSE) ## --- Run bootstrap test --- ## bootTest(clayton.disc) ## TABLE 2 p1 <- p2 <- c(0, pnorm(0), pnorm(1), 1) ## clayton claytonpar <- as.numeric(vita_clayton$copula$pair_copulas[[1]][[1]]$parameters) my.cop <- copula::claytonCopula(param = claytonpar) my.tab <- matrix(0, 3, 3) for (row in 1:3) { for(col in 1:3) { my.tab[row, col] <- pCopula(c(p1[row+1], p2[col+1]), my.cop) - pCopula(c(p1[row], p2[col+1]), my.cop) - pCopula(c(p1[row+1], p2[col]), my.cop) + pCopula(c(p1[row], p2[col]), my.cop) } } round(my.tab, digits = 3) polychoric(as.table(my.tab))$rho ## joe joepar <- as.numeric(vita_joe$copula$pair_copulas[[1]][[1]]$parameters) my.cop <- copula::joeCopula(param = joepar) my.tab <- matrix(0, 3, 3) for (row in 1:3) { for(col in 1:3) { my.tab[row, col] <- pCopula(c(p1[row+1], p2[col+1]), my.cop) - pCopula(c(p1[row], p2[col+1]), my.cop) - pCopula(c(p1[row+1], p2[col]), my.cop) + pCopula(c(p1[row], p2[col]), my.cop) } } round(my.tab, digits = 3) polychoric(as.table(my.tab))$rho ## --- Figure 9 --- ## clayton_cop <- mvdc(copula = claytonCopula(param = claytonpar), margins = c("norm", "norm"), paramMargins = list(list(mean = 0, sd = 1), list(mean = 0, sd = 1))) copula::contour(clayton_cop,dMvdc, xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5)) abline(v = c(0, 1), h = c(0, 1), lty = 2) joe_cop <- mvdc(copula = joeCopula(param = joepar), margins = c("norm", "norm"), paramMargins = list(list(mean = 0, sd = 1), list(mean = 0, sd = 1))) copula::contour(joe_cop,dMvdc, xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5)) abline(v = c(0, 1), h = c(0, 1), lty = 2) ## --- Table 3 --- ## run_sim <- function(n) { norm <- MASS::mvrnorm(n, mu = c(0,0), Sigma = sigma.target) clayton <- rvine(n, vita_clayton) joe <- rvine(n, vita_joe) ## discretize norm.disc <- apply(norm, 2, cut, breaks = c(-Inf, 0, 1, Inf), labels = FALSE) clayton.disc <- apply(clayton, 2, cut, breaks = c(-Inf, 0, 1, Inf), labels = FALSE) joe.disc <- apply(joe, 2, cut, breaks = c(-Inf, 0, 1, Inf), labels = FALSE) c(bootTest(norm.disc, verbose = FALSE), bootTest(clayton.disc, verbose = FALSE), bootTest(joe.disc, verbose = FALSE)) } set.seed(1) sim100 <- replicate(500, run_sim(100)) sim500 <- replicate(500, run_sim(500)) rowMeans(sim100 < .05) rowMeans(sim500 < .05)