library("simFrame") # load package set.seed(1234) # set seed for reproducibility #### code from section 2: show methods for generic function 'setNA' showMethods("setNA") #### code from section 3: demonstrate use of accessor and mutator methods nc <- NAControl(NArate = 0.05) getNArate(nc) setNArate(nc, c(0.01, 0.03, 0.05, 0.07, 0.09)) getNArate(nc) #### code from section 4.1: illustrative example for data generation ## control object for generating data from a multivariate normal distribution library("mvtnorm") dc <- DataControl(size = 10, distribution = rmvnorm, dots = list(mean = rep(0, 2), sigma = matrix(c(1, 0.5, 0.5, 1), 2, 2))) ## generate data foo <- generate(dc) foo #### code from section 4.2: illustrative example for sampling ## load population data and set up samples data("eusilcP") set <- setup(eusilcP, size = 10, k = 2) ## examine indices of set up samples summary(set) set ## draw samples from population draw(eusilcP[, c("id", "eqIncome")], set, i = 1) #### code from section 4.3: illustrative example for adding contamination ## define control object cc <- DARContControl(target = "V2", epsilon = 0.2, fun = function(x) x * 100) ## contaminate data generated in section 4.1 bar <- contaminate(foo, cc) bar #### code from section 4.4: illustrative example for inserting missing values ## define control object nc <- NAControl(NArate = 0.3) ## insert missing values into data from section 4.3 setNA(bar, nc) #### code from section 4.5: illustrative example for running simulations ## initializations data("eusilcP") ## control objects for sampling and contamination sc <- SampleControl(size = 500, k = 50) cc <- DARContControl(target = "eqIncome", epsilon = 0.02, fun = function(x) x * 25) ## define function for simulation runs sim <- function(x) { c(mean = mean(x$eqIncome), trimmed = mean(x$eqIncome, trim = 0.02)) } ## set seed and run simulation set.seed(12345) results <- runSimulation(eusilcP, sc, contControl = cc, fun = sim) ## inspect results head(results) aggregate(results) ## compute true values tv <- mean(eusilcP$eqIncome) tv ## plot results plot(results, true = tv) simDensityplot(results, true = tv) #### code from section 6.1: example for design-based simulations ## initializations library("laeken") data("eusilcP") set.seed(12345) ## set up samples set <- setup(eusilcP, design = "region", grouping = "hid", size = c(75, 250, 250, 125, 200, 225, 125, 150, 100), k = 100) ## define contamination cc <- DCARContControl(target = "eqIncome", epsilon = 0.005, grouping = "hid", dots = list(mean = 500000, sd = 10000)) ## define function for simulation runs sim <- function(x, k) { g <- gini(x$eqIncome, x$.weight)$value eqIncHill <- fitPareto(x$eqIncome, k = k, method = "thetaHill", groups = x$hid) gHill <- gini(eqIncHill, x$.weight)$value eqIncPDC <- fitPareto(x$eqIncome, k = k, method = "thetaPDC", groups = x$hid) gPDC <- gini(eqIncPDC, x$.weight)$value c(standard = g, Hill = gHill, PDC = gPDC) } ## run simulation results <- runSimulation(eusilcP, set, contControl = cc, design = "gender", fun = sim, k = 125) ## inspect results head(results) aggregate(results) ## compute true values tv <- simSapply(eusilcP, "gender", function(x) gini(x$eqIncome)$value) ## plot results plot(results, true = tv, xlab = "Gini coefficient") #### code from section 6.2: example for model-based simulations ## initializations library("robCompositions") library("mvtnorm") set.seed(12345) ## define function and control class for generating data crnorm <- function(n, mean, sigma) invilr(rmvnorm(n, mean, sigma)) sigma <- matrix(c(1, -0.5, 1.4, -0.5, 1, -0.6, 1.4, -0.6, 2), 3, 3) dc <- DataControl(size = 150, distribution = crnorm, dots = list(mean = c(0, 2, 3), sigma = sigma)) ## define control class for the insertion of missing values nc <- NAControl(NArate = 0.05) ## define function for simulation runs sim <- function(x, orig) { i <- apply(x, 1, function(x) any(is.na(x))) ni <- length(which(i)) xKNNa <- impKNNa(x)$xImp xLS <- impCoda(x, method = "lm")$xImp c(knn = aDist(xKNNa, orig)/ni, LS = aDist(xLS, orig)/ni) } ## run simulation results <- runSimulation(dc, nrep = 100, NAControl = nc, fun = sim) ## inspect results head(results) aggregate(results) ## plot results plot(results, xlab = "Relative Aitchison distance") simDensityplot(results, alpha = 0.6, xlab = "Relative Aitchison distance") #### code from section 6.3: example for parallel computing ## start snow cluster cl <- makeCluster(4, type="SOCK") ## load required packages on workers clusterEvalQ(cl, { library("simFrame") library("robCompositions") library("mvtnorm") }) ## setup random number stream clusterSetupRNG(cl, seed=12345) ## define function and control class for generating data crnorm <- function(n, mean, sigma) invilr(rmvnorm(n, mean, sigma)) sigma <- matrix(c(1, -0.5, 1.4, -0.5, 1, -0.6, 1.4, -0.6, 2), 3, 3) dc <- DataControl(size = 150, distribution = crnorm, dots = list(mean = c(0, 2, 3), sigma = sigma)) ## define control class for the insertion of missing values nc <- NAControl(NArate = c(0.01, 0.03, 0.05, 0.07, 0.09)) ## define function for simulation runs sim <- function(x, orig) { i <- apply(x, 1, function(x) any(is.na(x))) ni <- length(which(i)) xKNNa <- impKNNa(x)$xImp xLS <- impCoda(x, method = "lm")$xImp c(knn = aDist(xKNNa, orig)/ni, LS = aDist(xLS, orig)/ni) } ## export object to workers clusterExport(cl, c("crnorm", "sigma", "dc", "nc", "sim")) ## run simulation results <- clusterRunSimulation(cl, dc, nrep = 100, NAControl = nc, fun = sim) ## stop snow cluster stopCluster(cl) ## inspect results head(results) aggregate(results) ## plot results plot(results, ylab = "Relative Aitchison distance") simDensityplot(results, NArate=0.07, alpha = 0.6, xlab = "Relative Aitchison distance")