# Package library("MissMech") #-- Example 1: Data are MCAR and normally distributed n <- 300 p <- 5 pctmiss <- 0.2 set.seed(1010) y <- matrix(rnorm(n * p), nrow = n) missing <- matrix(runif(n * p), nrow = n) < pctmiss y[missing] <- NA out <- TestMCARNormality(data = y) out # --- Prints the p-value for both the Hawkins and the nonparametric test summary(out) # --- Uses more cases out1 <- TestMCARNormality(data = y, del.lesscases = 1) out1 #---- performs multiple imputation Out <- TestMCARNormality (data = y, imputation.number = 100) summary(Out) boxplot(Out) #-- Example 2: Data are MCAR and non-normally distributed (t distributed with d.f. = 5) n <- 300 p <- 5 pctmiss <- 0.2 set.seed(1010) y <- matrix(rt(n * p, 5), nrow = n) missing <- matrix(runif(n * p), nrow = n) < pctmiss y[missing] <- NA out <- TestMCARNormality(data = y) out # Perform multiple imputation Out_m <- TestMCARNormality(data = y, imputation.number = 100) boxplot(Out_m) # Use the k nearest neighbor method of the package imputation to impute the missing data library("imputation") yimputed <- kNNImpute(y, k = 3) out_k <- TestMCARNormality(data = y, imputed.data = yimputed$x) out_k #-- Example 3: Data are MAR (not MCAR), but are normally distributed n <- 300 p <- 5 r <- 0.3 mu <- rep(0, p) sigma <- r * (matrix(1, p, p) - diag(1, p)) + diag(1, p) set.seed(110) eig <- eigen(sigma) sig.sqrt <- eig$vectors %*% diag(sqrt(eig$values)) %*% solve(eig$vectors) sig.sqrt <- (sig.sqrt + t(sig.sqrt)) / 2 y <- matrix(rnorm(n * p), nrow = n) %*% sig.sqrt tmp <- y for (j in 2:p) y[tmp[, j - 1] > 0.8, j] <- NA out <- TestMCARNormality(data = y, alpha = 0.1) out #-- Example 4: Multiple imputation; data are MAR (not MCAR), but are normally distributed n <- 300 p <- 5 pctmiss <- 0.2 set.seed(1010) y <- matrix (rnorm(n * p), nrow = n) missing <- matrix(runif(n * p), nrow = n) < pctmiss y[missing] <- NA Out <- OrderMissing(y) y <- Out$data spatcnt <- Out$spatcnt g2 <- seq(spatcnt[1] + 1, spatcnt[2]) g4 <- seq(spatcnt[3] + 1, spatcnt[4]) y[c(g2, g4), ] <- 2 * y[c(g2, g4), ] out <- TestMCARNormality(data = y, imputation.number = 100) out boxplot(out) # Removing Groups 2 and 4 y1 <- y[-seq(spatcnt[1] + 1, spatcnt[2]), ] out <- TestMCARNormality(data = y1, imputation.number = 100) out boxplot(out) #-- Example 5: Test of homoscedasticity for complete data n <- 50 p <- 5 r <- 0.4 sigma <- r * (matrix(1, p, p) - diag(1, p)) + diag(1, p) set.seed(1010) eig <- eigen(sigma) sig.sqrt <- eig$vectors %*% diag(sqrt(eig$values)) %*% solve(eig$vectors) sig.sqrt <- (sig.sqrt + t(sig.sqrt)) / 2 y1 <- matrix(rnorm(n * p), nrow = n) %*% sig.sqrt n <- 75 p <- 5 y2 <- matrix(rnorm(n * p), nrow = n) n <- 25 p <- 5 r <- 0 sigma <- r * (matrix(1, p, p) - diag(1, p)) + diag(2, p) y3 <- matrix(rnorm(n * p), nrow = n) %*% sqrt(sigma) ycomplete <- rbind(y1, y2, y3) y1[, 1] <- NA y2[, c(1 ,3)] <- NA y3 [, 2] <- NA ygroup <- rbind(y1, y2, y3) out <- TestMCARNormality(data = ygroup, method = "Hawkins", imputed.data = ycomplete) out # ---- Example 6, real data data("agingdata", package = "MissMech") TestMCARNormality(agingdata, del.lesscases = 1) #----- Example 5.1: An example of using the AndersonDarling function set.seed(50) n1 <- 30 n2 <- 45 n3 <- 60 v1 <- rnorm(n1) v2 <- runif(n2) v3 <- rnorm(n3, 2, 3) AndersonDarling(data = c(v1, v2, v3), number.cases = c(n1, n2, n3)) #----- R code for Table 1 simulation results # To reach the results in Table 1, uncomment the appropriate line and set the distribution parameters set.seed(1010) n <- 300 p <- 5 pctmiss <- 0.2 pval_Hw <- c() pval_Non <- c() pval_HwComp <- c() pval_NonComp <- c() df.t <- 3 shape.g <- 2 rep <- 1000 for (k in 1:rep){ y <- matrix(rnorm(n * p), nrow = n) #y <- matrix(rt(n * p, df.t), nrow = n) #y <- matrix(rgamma(n * p, shape.g, 1) , nrow = n) #y <- matrix(runif(n * p) , nrow = n) ycomp <- y missing <- matrix(runif(n * p), nrow = n) < pctmiss y[missing] <- NA out <- TestMCARNormality(data = y, del.lesscases = 6, imputation.number = 1, method = "Auto", imputation.method = "Dist.Free", nrep = 10000, n.min = 30, seed = NA, alpha = 0.05, imputed.data = NA) ycomp <- ycomp[sort(out$caseorder), ] out.comp <- TestMCARNormality(data = out$analyzed.data, del.lesscases = 6, imputation.number = 1, method = "Auto", imputation.method = "Dist.Free", nrep = 10000, n.min = 30, seed = NA, alpha = 0.05, imputed.data = ycomp) pval_Hw <- c(out$pvalcomb, pval_Hw) pval_Non <- c(out$pnormality, pval_Non) pval_HwComp <- c(out.comp$pvalcomb, pval_HwComp) pval_NonComp <- c(out.comp$pnormality, pval_NonComp) } c(sum(pval_Hw < 0.05) / k, sum(pval_Non < 0.05) / k, sum(pval_HwComp < 0.05) / k, sum(pval_NonComp < 0.05) / k) #-- R code for the simulation result stated in Example 3 regarding the # power of the Hawkins and the nonparametric tests set.seed(1010) n <- 300 p <- 5 r <- 0.3 mu <- rep(0, p) sigma <- r * (matrix(1, p, p) - diag(1, p)) + diag(1, p) eig <- eigen(sigma) sig.sqrt <- eig$vectors %*% diag(sqrt(eig$values)) %*% solve(eig$vectors) sig.sqrt <- (sig.sqrt + t(sig.sqrt)) / 2 pval_Hw <- c() pval_Non <- c() pval_HwComp <- c() pval_NonComp <- c() rep <- 1000 for (k in 1:rep){ y <- matrix(rnorm(n * p), nrow = n) %*% sig.sqrt ycomp <- y for (j in 2:p){ y[ycomp[, j - 1] > 0.8, j] <- NA } out <- TestMCARNormality(data = y, del.lesscases = 6, imputation.number = 1, method = "Auto", imputation.method = "Dist.Free", nrep = 10000, n.min = 30, seed = NA, alpha = 0.05, imputed.data = NA) ycomp <- ycomp[sort(out$caseorder), ] out.comp <- TestMCARNormality(data = out$analyzed.data, del.lesscases = 6, imputation.number = 1, method = "Auto", imputation.method = "Dist.Free", nrep = 10000, n.min = 30, seed = NA, alpha = 0.05, imputed.data = ycomp) pval_Hw <- c(out$pvalcomb, pval_Hw) pval_Non <- c(out$pnormality, pval_Non) pval_HwComp <- c(out.comp$pvalcomb, pval_HwComp) pval_NonComp <- c(out.comp$pnormality, pval_NonComp) } c(sum(pval_Hw < 0.05) / k, sum(pval_Non < 0.05) / k, sum(pval_HwComp < 0.05) / k, sum(pval_NonComp < 0.05) / k)