# This file contains all code used in Section 6 of the paper by Speidel, Drechsler, Jolani. # install hmi if (!require("hmi")) { install.packages("hmi") } # load the required packages library("hmi") # We should avoid this: library("broom.mixed") # load the required data sets data("Gcsemv", package = "hmi") data("selfreport", package = "mice") data("nhanes_mod", package = "hmi") data("nhanes_sub", package = "hmi") ## Section 6.1 # summary of the data summary(Gcsemv) # check type of data list_of_types_maker(Gcsemv) # example on how a variable type can be specified by the user modified_list <- list_of_types_maker(Gcsemv) modified_list$coursework <- "ordered_categorical" # set up a model_formula model_formula <- written ~ 1 + gender + coursework + (1 + gender | school) # imputation of the data set.seed(123) dat_imputed <- hmi(data = Gcsemv, model_formula = model_formula) # Note: a warning message that one of the internal models did not converge to # the allowed tolerance of 0.002, but 0.0045, is given. Since this appears # in only one of multiple iterations and with only a rather small deviation # from the allowed tolerance, the author evaluates the issue as neglectable. # generation of Figure 1 plot(dat_imputed, layout = c(2, 2)) # convergence checks and plots. Will generate 12 plots. From those 12 plots, # number 5 and 8 are included in the article (Figure 3 and 2). chaincheck(dat_imputed, thin = NULL) # pooling summary(dat_imputed$pooling) # set up a analysis_function analysis_function <- function(complete_data) { # An empty list which will contain the (temporarily) results is set up. parameters_of_interest <- list() # The analysis model of interest is specified. my_model <- lme4::lmer(written ~ 1 + gender + coursework + (1 + gender | school), data = complete_data) # Specify, which parameters from the model should be returned. # In the example the analyst is interested in all fixed effects estimates # and the random effects covariance matrix. parameters_of_interest[[1]] <- lme4::fixef(my_model) parameters_of_interest[[2]] <- lme4::VarCorr(my_model)[[1]][, ] # The following line is essential if the elements of interest should be labeled. ret <- unlist(parameters_of_interest) # Labeling the output names(ret) <- c("intercept", "gender", "coursework", "sigma0", "sigma01", "sigma10", "sigma1") # Return the results. return(ret) } # pooling with hmi_pool hmi_pool(mids = dat_imputed, analysis_function = analysis_function) ## Section 6.2 # initialization of arrays low <- array(dim = 9971) up <- array(dim = 9971) # filling the lower bounds array low[nhanes_sub$ind310 == 1] <- 0 low[nhanes_sub$ind310 == 2] <- 3001 low[nhanes_sub$ind310 == 3] <- 5001 low[nhanes_sub$ind310 == 4] <- 10001 low[nhanes_sub$ind310 == 5] <- 15001 low[nhanes_sub$ind310 == 6] <- 0 low[nhanes_sub$ind310 == 7] <- 20001 low[nhanes_sub$ind310 == 8] <- 0 # filling the upper bounds array up[nhanes_sub$ind310 == 1] <- 3000 up[nhanes_sub$ind310 == 2] <- 5000 up[nhanes_sub$ind310 == 3] <- 10000 up[nhanes_sub$ind310 == 4] <- 15000 up[nhanes_sub$ind310 == 5] <- 20000 up[nhanes_sub$ind310 == 6] <- 20000 up[nhanes_sub$ind310 == 7] <- Inf up[nhanes_sub$ind310 == 8] <- Inf # generation of an interval object ind310interval <- generate_interval(low, up) # inspecting the interval object head(ind310interval) # imputation of the data set.seed(123) nhanes_imp <- hmi(nhanes_mod, maxit = 20, k = 3) # table of an interval object table(nhanes_mod$ind310) # generation of Figure 4 plot(nhanes_mod$ind310, sort = "mostprecise_increasing") # generating midpoints of an interval object midpoints <- center_interval(nhanes_mod$ind310) # table of the midpoints table(midpoints) # switching from interval to idf format idf <- interval2idf(nhanes_mod$ind310) # switching from idf to interval format intervaldf <- idf2interval(idf) # splitting up an interval object bounds <- split_interval(nhanes_mod$ind310) head(bounds) # calculations for interval objects log_savings_in_euro <- log(nhanes_mod$ind310 * 0.8) ## Section 6.3 # calculation of the shares of observations rounded to the nearest multiple of 5 and 10 sum(selfreport$wr %% 10 == 0)/nrow(selfreport) # 0.1941748 sum(selfreport$wr %% 5 == 0)/nrow(selfreport) # 0.3800971 # imputation of the data set.seed(123) selfreport_imputed <- hmi(selfreport[, c("hr", "wr")], rounding_degrees = list(wr = c(1, 5, 10))) # generation of Figure 5 par(mfrow = c(1, 2)) hist(selfreport$wr, breaks = 100, xlab = "selfreported weight", main = "Original, heaped data", ylim = c(0, 100)) hist(mice::complete(selfreport_imputed, 1)$wr, breaks = 100, xlab = "selfreported weight", main = "Data after first imputation", ylim = c(0, 100))