set.seed(0) # Loading packages library("nlme") library("lme4") library("varTestnlme") library("datasets") library("saemix") library("mvtnorm") library("dplyr") library("tidyr") # Fitting the models ## Linear model # Linear model 1, with correlated slope and intercept lm1.h1.lme4 <- lmer(distance ~ 1 + Sex + age + age * Sex + (1 + scale(age) | Subject), data = Orthodont, REML = FALSE) lm1.h0.lme4 <- lmer(distance ~ 1 + Sex + age + age * Sex + (1 | Subject), data = Orthodont, REML = FALSE) lm1.h1.nlme <- lme(distance ~ 1 + Sex + age + age * Sex, random = ~ 1 + age | Subject, data = Orthodont, method = "ML") lm1.h0.nlme <- lme(distance ~ 1 + Sex + age + age * Sex, random = ~ 1 | Subject, data = Orthodont, method = "ML") # Linear model 2, with independant slope and intercept lm2.h1.lme4 <- lmer(distance ~ 1 + Sex + age + age * Sex + (1 + age || Subject), data = Orthodont, REML = FALSE) lm2.h1.nlme <- lme(distance ~ 1 + Sex + age + age * Sex, random = list(Subject = pdDiag(~1+age)), data = Orthodont, method = "ML") lm2.h0.lme4 <- lm1.h0.lme4 # Linear model 3, testing for randomness in the model lm2.h0.nlme <- lm1.h0.nlme lm3.h1.lme4 <- lm2.h1.lme4 lm3.h1.nlme <- lm2.h1.nlme lm3.h0 <- lm(distance ~ 1 + Sex + age + age * Sex, data = Orthodont) # Generalized linear model data("Loblolly", package = "datasets") glm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), family = binomial, data = cbpp) glm0 <- glm(cbind(incidence, size - incidence) ~ period, family = binomial, data = cbpp) # Nonlinear mixed models data("cbpp", package = "lme4") start <- c(Asym = 103, R0 = -8.5, lrc = -3.2) nlm1.nlme <- nlme(height ~ SSasymp(age, Asym, R0, lrc), fixed = Asym + R0 + lrc ~ 1, random = pdDiag(Asym + R0 + lrc ~ 1), start = start, data = Loblolly) nlm0.nlme <- nlme(height ~ SSasymp(age, Asym, R0, lrc), fixed = Asym + R0 + lrc ~ 1, random = pdDiag(Asym ~ 1), start = start, data = Loblolly) nlm1.lme4 <- nlmer(height ~ SSasymp(age, Asym, R0, lrc) ~ (0 + Asym + R0 + lrc || Seed), start = start, data = Loblolly) nlm0.lme4 <- nlmer(height ~ SSasymp(age, Asym, R0, lrc) ~ (0 + Asym | Seed), start = start, data = Loblolly) modelSSasymp <- function(psi, id, xidep){ Asym <- psi[id,1] R0 <- psi[id,2] lrc <- psi[id,3] age <- xidep[,1] ypred <- Asym + (R0 - Asym) * exp( - exp(lrc) * age) return(ypred) } psi0 = matrix(start, ncol = 3, dimnames = list(NULL, c("Asym", "R0", "lrc"))) saemix.modelH1 <- saemixModel(model = modelSSasymp, description = "Asymptotic regression", psi0 = psi0, transform.par = c(0, 0, 0), covariance.model = diag(3), omega.init = 10 * diag(3), error.model = "constant") saemix.modelH0 <- saemixModel(model = modelSSasymp, description = "Asymptotic regression", psi0 = psi0, transform.par = c(0, 0, 0), covariance.model = diag(c(1, 0, 0)), omega.init = 10 * diag(3), error.model = "constant") saemix.data <- saemixData(name.data = Loblolly, name.group = c("Seed"), name.predictors = c("age"), name.response = c("height")) ctrl <- saemixControl(print = FALSE, save = FALSE, save.graphs = FALSE) nlm1.saemix <- saemix(saemix.modelH1, saemix.data, ctrl) nlm0.saemix <- saemix(saemix.modelH0, saemix.data, ctrl) # Variance components testing ## Linear model # Case 1 vt <- varCompTest(lm1.h1.lme4, lm1.h0.lme4) print(vt) summary(vt) library("EnvStats") print.htestEnvStats(vt) vt <- varCompTest(lm1.h1.nlme, lm1.h0.nlme, pval = "both") summary(vt) # Case 2 vt <- varCompTest(lm2.h1.lme4, lm2.h0.lme4) summary(vt) # Case 3 vt <- varCompTest(lm3.h1.nlme, lm3.h0) summary(vt) vt <- varCompTest(lm3.h1.nlme, lm3.h0, pval.comp = "both") summary(vt) vt <- varCompTest(lm3.h1.lme4, lm3.h0, pval.comp = "both") summary(vt) vt <- varCompTest(lm3.h1.nlme, lm3.h0, pval.comp = "both", fim = "compute") summary(vt) vt <- varCompTest(lm3.h1.lme4, lm3.h0, pval.comp = "both", fim = "compute") summary(vt) ## Generalized model vt <- varCompTest(glm1, glm0) summary(vt) ## Nonlinear mixed model vt <- varCompTest(nlm1.nlme, nlm0.nlme) summary(vt) vt <- varCompTest(nlm1.lme4, nlm0.lme4) summary(vt) vt <- varCompTest(nlm1.nlme, nlm0.nlme, pval.comp = "both", fim = "compute") summary(vt) vt <- varCompTest(nlm1.lme4, nlm0.lme4, pval.comp = "both", fim = "compute") summary(vt) vt <- varCompTest(nlm1.saemix,nlm0.saemix, pval.comp = "both", fim = "extract") summary(vt) # Functions to perform B bootstrap repetitions # Summary statistics extraction to be used with nlme mySumm <- function(m, diagSigma = F) { beta <- nlme::fixef(m) resStd <- stats::sigma(m) Sigma <- varTestnlme::extractVarCov(m) if(diagSigma){ theta <- c(beta, diag(Sigma), resStd) }else{ theta <- c(beta, Sigma[lower.tri(Sigma,diag = T)], resStd) } return(theta) } # Summary statistics extraction to be used with lme4 mySummlme4 <- function(m, diagSigma = F) { beta <- lme4::fixef(m) resStd <- stats::sigma(m) grpFactor <- names(lme4::getME(m, "cnms")) vc <- lme4::VarCorr(m) Sigma <- as.matrix(Matrix::bdiag(as.matrix(vc))) if(diagSigma){ theta <- c(beta, diag(Sigma), resStd) }else{ theta <- c(beta, Sigma[lower.tri(Sigma,diag = T)], resStd) } return(theta) } # Function to perform (repet) repetitions of a bootstrap procedure of size B boot.rep <- function(B, repet, seed){ set.seed(seed) dfall <- data.frame() for (u in 1:repet){ print(u) phi <- rmvnorm(n, c(a,b), matcov) y0 <- phi[,1] + phi[,2] %*% t(tj) + matrix(rnorm(n*J, 0, s), nr = n, nc = J) sub <- 1:n %*% matrix(1, nr = 1, nc = J) d0 <- data.frame(y = as.vector(t(y0)), tj = rep(tj, n), sj = rep(sj, n), subject = as.vector(t(sub))) # fitting model under H0 and H1 d0_grouped <- groupedData(y ~ sj | subject, data=d0, order.groups = F) fity0h1 <- lme(y ~ 1 + tj, data = d0_grouped, random = list(subject = pdSymm(~ 1 + tj)), method = "ML") fity0h1lme4 <- lmer(y ~ 1 + tj + (1 + tj | subject), data = d0, REML = F) thetanlme <- mySumm(fity0h1, diagSigma = F) thetalme4 <- mySummlme4(fity0h1lme4, diagSigma = F) invfimnlme <- suppressWarnings( try(varTestnlme::extractFIM.lme(fity0h1, "full"), silent = TRUE)) if (inherits(invfimnlme, "try-error")) invfimnlme <- matrix(NA,ncol=length(thetanlme), nrow = length(thetanlme)) invfimlme4 <- as.matrix(merDeriv::vcov.lmerMod(fity0h1lme4, full = T)) invfimbootnlme <- varTestnlme::bootinvFIM.lme(fity0h1, B = B, seed = u) invfimbootlme4 <- varTestnlme::bootinvFIM.merMod(fity0h1lme4, B = B, seed = u) ci.extractnlme <- cbind(thetanlme - 1.96 * sqrt(diag(invfimnlme)), thetanlme + 1.96 * sqrt(diag(invfimnlme))) ci.extractlme4 <- cbind(thetalme4 - 1.96 * sqrt(diag(invfimlme4)), thetalme4 + 1.96 * sqrt(diag(invfimlme4))) ci.bootnlme <- cbind(thetanlme - 1.96 * sqrt(diag(invfimbootnlme)), thetanlme + 1.96 * sqrt(diag(invfimbootnlme))) ci.bootlme4 <- cbind(thetalme4 - 1.96 * sqrt(diag(invfimbootlme4)), thetalme4 + 1.96 * sqrt(diag(invfimbootlme4))) df <- data.frame(rbind(ci.extractnlme, ci.extractlme4, ci.bootnlme, ci.bootlme4)) df$typeCI <- rep(c("extractnlme", "extractlme4", "bootstrapnlme", "bootstraplme4"), each = length(thetalme4)) names(df) <- c("ci.low","ci.upp","typeCI") df$rep <- u df$Parameter <- rep(colnames(invfimbootnlme),4) df$trueValue <- trueTheta df$estimates <- rep(c(thetanlme,thetalme4),2) rownames(df) <- NULL dfall <- rbind(dfall,df) } dfall$boot.size <- B return(dfall) } ## FIM estimation ## Note that small discrepancies can be observed for the results obtained with ## the nlme package, between the manuscript and the user's computer, or between ## two different user's computer. This occurs when there are two different ## versions of the linear algebra library used by nlme and the way it handles ## matrix multiplication. ## These differences occur: ## - at the data generation step, using the rmvnorm function which relies on ## matrix multiplication when the covariance matrix matcov is non diagonal ## - inside the lme function, at the fitting stage ## These small differences have an impact on the estimated values for the ## parameters, and for the FIM, and therefore on the confidence intervals. As a ## consequence, when counting the proportion of confidence intervals containing ## the true value used to generate the data, the results can be different if ## some intervals contain (resp. do not contain) the true value using one ## computer or the other. However, results should be identical when run on the ## same computer with the same settings. # Generate simulated data n <- 100; J <- 10 # sizes a <- 5; b <- 7; c <- 2 # mean of random effects tj <- seq(0, 20, length.out = J); sj <- tj^2 # covariates s1 <- 0.8; s2 <- 1; s3 <- 0.8; rho12 <- 0.5 matcov <- matrix( c(s1^2, rho12 * s1 * s2, rho12 * s1 * s2, s2^2), nr = 2) # covariance of random effects s <- 1.2 # residual variance trueTheta <- c(a, b, s1^2, rho12 * s1 * s2, s2^2, s) res_boot_100 <- boot.rep(B = 100, repet = 500, seed = 0) res_boot_300 <- boot.rep(B = 300, repet = 500, seed = 0) res_boot_100$inCI <- (res_boot_100$ci.low <= res_boot_100$trueValue & res_boot_100$trueValue <= res_boot_100$ci.upp) res_boot_300$inCI <- (res_boot_300$ci.low <= res_boot_300$trueValue & res_boot_300$trueValue <= res_boot_300$ci.upp) # aggregate results to compute empirical coverage by type of method comp_B100 <- aggregate(inCI ~ Parameter*typeCI, data = res_boot_100, FUN = mean) %>% pivot_wider(names_from = typeCI, values_from = inCI) names(comp_B100) <- c("Parameter", "boot_100_lme4", "boot_100_nlme", "extract_100_lme4", "extract_100_nlme") comp_B300 <- aggregate(inCI ~ Parameter * typeCI, data = res_boot_300, FUN = mean) %>% pivot_wider(names_from = typeCI, values_from = inCI) names(comp_B300) <- c("Parameter", "boot_300_lme4", "boot_300_nlme", "extract_300_lme4", "extract_300_nlme") # average results corresponding to the extraction of the FIM between the two # sets of 1000 replicates table_all <- left_join(comp_B100, comp_B300) %>% dplyr::mutate(extract_lme = (extract_100_nlme + extract_300_nlme) / 2, extract_lme4 = (extract_100_lme4 + extract_300_lme4) / 2) %>% select(!c("extract_100_lme4", "extract_100_nlme", "extract_300_lme4", "extract_300_nlme")) table_all$Parameter <- factor(table_all$Parameter, levels = c("(Intercept)", "tj", "var((Intercept))", "var(tj)", "cov((Intercept),tj)", "sd_residual"), labels=c("beta_1", "beta_2", "gamma_1^2", "gamma_2^2", "gamma_{1,2}", "sigma^2")) library("xtable") xt <- xtable( table_all[order(table_all$Parameter), c(1, 6, 7, 3, 5, 2, 4)], include.rownames = FALSE, digits = 10) saveRDS(table_all, "table_all.rds") saveRDS(res_boot_100, "res_boot_100.rds") saveRDS(res_boot_300, "res_boot_300.rds") print(xt) xtable(xt, digits = 8)