## Title: Multiple Response Variables Regression Models in R: The mcglm Package ## Author: Wagner Hugo Bonat LEG/IMADA ## Loading extra packages ## library("devtools") ## Install last version from github repository ## install_github("wbonat/mcglm") ## CODE SECTION 5 - IMPLEMENTATION IN R --------------------------------- library("mcglm") args(mcglm) ## Data example data <- data.frame("id" = gl(2, 2), "time" = rep(1:2, 2)) ## Compnents of the matrix linear predictor Z0 <- mc_id(data) Z1 <- mc_ma(id = "id", time = "time", data = data, order = 1) ## Matrix linear predictor matrix_pred <- list(c(Z0, Z1), c(Z0, Z1)) ## Link, variance and covariance functions link <- c("log", "logit") variance <- c("poisson_tweedie", "binomialP") covariance <- c("identity", "inverse") ## Number of trials and offset Ntrial <- list(NULL, data$Ntrial) offset <- list(data$offset, NULL) ## CODE SECTION 6 - EXAMPLES -------------------------------------------- ## EXAMPLE 1 - GAUSSIAN MIXED MODELS ------------------------------------ ## Fit using lme4 package library("lme4") data("sleepstudy", package = "lme4") fit1.lme4 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy, REML = FALSE) ## Linear predictor form_ex1 <- Reaction ~ Days ## Matrix linear predictor Z_ex1 <- mc_mixed(~ 0 + Subject / Days, data = sleepstudy) Z0_ex1 <- mc_id(sleepstudy) ## Fitting fit_ex1 <- mcglm(linear_pred = c(form_ex1), matrix_pred = list(resp = c(Z0_ex1, Z_ex1)), control_algorithm = list(correct = FALSE), data = sleepstudy) ## Summary summary(fit_ex1) ## Comparing log-likelihood mcglm versus lme4 logLik(fit1.lme4) plogLik(fit_ex1) ## Selecting the components of the matrix linear prediction using SIC ## Fitting reference model fit0_ex1 <- mcglm(linear_pred = c(form_ex1), matrix_pred = list(resp = c(Z0_ex1)), control_algorithm = list(correct = FALSE), data = sleepstudy) ## Matrix linear predictor components - step 1 Z.sic <- list("Z1" = Z_ex1[[1]], "Z2" = Z_ex1[[2]]) ## Computing SIC mc_sic_covariance(fit0_ex1, scope = Z.sic, idx = c(1, 2), data = sleepstudy, response = 1) ## Fitting fit1_ex1 <- mcglm(linear_pred = c(form_ex1), matrix_pred = list(resp = c(Z0_ex1, Z.sic)), control_algorithm = list(correct = FALSE), data = sleepstudy) ## Matrix linear predictor components - step 2 Z3_ex1 <- list("Z3" = Z_ex1[[3]]) mc_sic_covariance(fit1_ex1, scope = Z3_ex1, idx = c(1), response = 1) ## END CODE EXAMPLE 1 --------------------------------------------------- ## EXAMPLE 2 - LONGITUDINAL DATA ANALYSIS ------------------------------- data("dietox", package = "geepack") ## Excluding NA observations dietox <- na.exclude(dietox) ## Factors dietox$Pig <- as.factor(dietox$Pig) dietox$Evit <- as.factor(dietox$Evit) dietox$Cu <- as.factor(dietox$Cu) ## Matrix linear predictor components Z0_ex2 <- mc_id(dietox) Z1_ex2 <- mc_mixed(~ 0 + Pig, data = dietox) ## Fitting reference model fit0_ex2 <- mcglm(linear_pred = c(Weight ~ 1), matrix_pred = list(resp = c(Z0_ex2, Z1_ex2)), data = dietox) ## Canditate covariates - step 1 scope <- c("poly(Time, 2)", "Evit", "Cu", "Evit * poly(Time, 2)", "Cu * poly(Time, 2)","Evit * Cu") ## Computing SIC - step 1 N <- dim(dietox)[1] mc_sic(fit0_ex2, scope = scope, data = dietox, response = 1, penalty = log(N)) ## Fitting fit1_ex2 <- mcglm(linear_pred = c(Weight ~ poly(Time, 2)), matrix_pred = list(resp = c(Z0_ex2, Z1_ex2)), data = dietox) ## Updating candidate covariates - step 2 scope <- c("Evit", "Cu", "Evit * poly(Time, 2)", "Cu * poly(Time, 2)","Evit * Cu") ## Computing SIC - step 2 mc_sic(fit1_ex2, scope = scope, data = dietox, response = 1, penalty = log(N)) ## Updating model fit2_ex2 <- mcglm(linear_pred = c(Weight ~ poly(Time, 2)), link = "log", variance = "tweedie", power_fixed = FALSE, control_algorithm = list(tuning = 0.85), matrix_pred = list(resp = c(Z0_ex2, Z1_ex2)), data = dietox) ## Plot Pearson residuals - Figure 1 par(mfrow = c(1, 2), mar = c(2.6, 2.5, 1.2, 0.5), mgp = c(1.6, 0.6, 0)) plot(residuals(fit1_ex2, type = "pearson")[, 1] ~ fitted(fit1_ex2)[, 1], ylab = "Pearson residuals", xlab = "Fitted values", main = "A") plot(residuals(fit2_ex2, type = "pearson")[, 1] ~ fitted(fit2_ex2)[, 1], ylab = "Pearson residuals", xlab = "Fitted values", main = "B") ## GOF's rbind(gof(fit1_ex2), gof(fit2_ex2)) ## Marginal test summary(fit2_ex2, verbose = TRUE, print = "Dispersion") ## Conditional test mc_conditional_test(fit2_ex2, parameters = c("power11", "tau11", "tau12"), test = 2:3, fixed = 1) ## END CODE EXAMPLE 2 --------------------------------------------------- ## EXAMPLE 3 - SPATIAL AREAL DATA ANALYSIS ------------------------------ ## Loading extra packages library("spdep") ## Loading data set data("malaria", package = "gcmr") malaria$Y <- malaria$cases / malaria$size ## Linear predictor form_ex3 <- Y ~ netuse + I(green / 100) + phc ## Matrix linear predictor list_neigh <- tri2nb(coords = malaria[, 1:2]) Z_ex3 <- mc_car(list_neigh = list_neigh) ## Fitting fit1_ex3 <- mcglm(linear_pred = c(form_ex3), matrix_pred = list(Z_ex3), link = "logit", variance = "binomialP", covariance = "inverse", control_algorithm = list(verbose = FALSE, max_iter = 100, tuning = 0.1, method = "rc"), Ntrial = list(malaria$size), data = malaria) ## Summary summary(fit1_ex3) ## Autocorrelation mc_compute_rho(fit1_ex3) ## Model using expm covariance link function Z0_ex3 <- mc_id(malaria) Z1_ex3 <- as.matrix(1 / dist(malaria[, 1:2] / 1000, upper = TRUE, diag = TRUE)) Z1_ex3 <- list("Z1" = Z1_ex3) fit2_ex3 <- mcglm(linear_pred = c(form_ex3), matrix_pred = list(c(Z0_ex3, Z1_ex3)), link = "logit", variance = "binomialP", covariance = "expm", Ntrial = list(malaria$size), data = malaria) ## Comparing fitted models rbind(gof(fit1_ex3), gof(fit2_ex3)) ## END CODE EXAMPLE 3 --------------------------------------------------- ## EXAMPLE 4 - MIXED RESPONSE VARIABLES --------------------------------- ## Loading the data set data("soya", package = "mcglm") ## Linear predictors form.grain <- grain ~ block + water * pot form.seed <- seeds ~ block + water * pot soya$viablepeasP <- soya$viablepeas / soya$totalpeas form.peas <- viablepeasP ~ block + water * pot ## Matrix linear predictor Z0_ex4 <- mc_id(soya) ## Fitting models fit.grain <- mcglm(linear_pred = c(form.grain), matrix_pred = list(Z0_ex4), data = soya) fit.seed <- mcglm(linear_pred = c(form.seed), matrix_pred = list(Z0_ex4), link = c("log"), variance = c("tweedie"), power_fixed = TRUE, data = soya) fit.peas <- mcglm(linear_pred = c(form.peas), matrix_pred = list(Z0_ex4), link = "logit", variance = "binomialP", Ntrial = list(soya$totalpeas), data = soya) ## Multivariate model fit.joint <- mcglm(linear_pred = c(form.grain, form.seed, form.peas), matrix_pred = list(Z0_ex4, Z0_ex4, Z0_ex4), link = c("identity", "log", "logit"), variance = c("constant", "tweedie", "binomialP"), Ntrial = list(NULL, NULL, soya$totalpeas), data = soya) ## Comparing univariate and multivariate models rbind(gof(list(fit.grain, fit.seed, fit.peas)), gof(fit.joint)) ## Correlation between response variables summary(fit.joint, verbose = TRUE, print = "Correlation") ## ANOVA anova(fit.joint) ## END CODE EXAMPLE 4 --------------------------------------------------- ## EXAMPLE 5 - BIVARIATE REPEATED MEASURES MODELS FOR COUNT DATA -------- ## Loading data data("Hunting", package = "mcglm") ## Linear predictors form.OT <- OT ~ METHOD * ALT + SEX + ALT * poly(MONTH, 4) form.BD <- BD ~ METHOD * ALT + SEX + ALT * poly(MONTH, 3) ## Matrix linear predictor Z0_ex5 <- mc_id(Hunting) Z1_ex5 <- mc_mixed(~ 0 + HUNTER.MONTH, data = Hunting) ## Fitting fit1_ex5 <- mcglm(linear_pred = c(form.BD, form.OT), matrix_pred = list(c(Z0_ex5, Z1_ex5), c(Z0_ex5, Z1_ex5)), link = c("log","log"), variance = c("poisson_tweedie","poisson_tweedie"), power_fixed = c(FALSE, FALSE), offset = list(log(Hunting$OFFSET), log(Hunting$OFFSET)), control_algorithm = list(max_iter = 200, verbose = FALSE), data = Hunting) ## ANOVA anova(fit1_ex5) ## Summary summary(fit1_ex5, print = c("power", "Dispersion", "Correlation")) ## END CODE EXAMPLE 5 --------------------------------------------------- ## EXAMPLE 6 - MULTIVARIATE TWEEDIE MODELS FOR SPATIAL AREAL DATA ------- ## Loading data data("soil", package = "mcglm") ## Linear predictors form.ca <- CA ~ COORD.X * COORD.Y + SAND + SILT + CLAY + PHWATER form.mg <- MG ~ COORD.X * COORD.Y + SAND + SILT + CLAY + PHWATER form.k <- K ~ COORD.X * COORD.Y + SAND + SILT + CLAY + PHWATER ## Matrix linear predictor list_neigh <- tri2nb(coords = soil[, 1:2]) Z_ex6 <- mc_car(list_neigh = list_neigh) ## Time consuming !!! ## Univariate models ## Fit CA fit.ca <- mcglm(linear_pred = c(form.ca), matrix_pred = list(Z_ex6), link = "log", variance = "tweedie", covariance = "inverse", power_fixed = FALSE, data = soil, control_algorithm = list(max_iter = 500, tuning = 0.1)) ## Fit MG fit.mg <- mcglm(linear_pred = c(form.mg), matrix_pred = list(Z_ex6), link = "log", variance = "tweedie", covariance = "inverse", power_fixed = FALSE, data = soil, control_algorithm = list(max_iter = 500, tuning = 0.05)) ## Fit K fit.k <- mcglm(linear_pred = c(form.k), matrix_pred = list(Z_ex6), link = "log", variance = "tweedie", covariance = "inverse", power_fixed = FALSE, data = soil, control_algorithm = list(max_iter = 500, tuning = 0.05)) ## Initial values ini <- list() ini$regression <- list("CA" = coef(fit.ca, type = "beta")$Estimates, "MG" = coef(fit.mg, type = "beta")$Estimates, "K" = coef(fit.k, type = "beta")$Estimates) ini$power <- list("CA" = coef(fit.ca, type = "power")$Estimates, "MG" = coef(fit.mg, type = "power")$Estimates, "K" = coef(fit.k, type = "power")$Estimates) ini$tau <- list("CA" = coef(fit.ca, type = "tau")$Estimates, "MG" = coef(fit.mg, type = "tau")$Estimates, "K" = coef(fit.k, type = "tau")$Estimates) ini$rho <- c(0, 0, 0) ## Fitting multivariate model fit_ex6 <- mcglm(linear_pred = c(form.ca, form.mg, form.k), matrix_pred = list(Z_ex6, Z_ex6, Z_ex6), link = c("log","log","log"), variance = c("tweedie","tweedie","tweedie"), covariance = c("inverse","inverse","inverse"), power_fixed = c(FALSE, FALSE, FALSE), control_initial = ini, data = soil, control_algorithm = list(max_iter = 100)) ## END ----------------------------------------------------------------