set.seed(0)

# Loading packages
library("nlme")
library("lme4")
library("varTestnlme")
library("datasets")
library("saemix")
library("mvtnorm")
library("dplyr")
library("tidyr")

# Fitting the models
## Linear model

# Linear model 1, with correlated slope and intercept
lm1.h1.lme4 <- lmer(distance ~ 1 + Sex + age + age * Sex + (1 + scale(age) | Subject),
                    data = Orthodont, REML = FALSE)
lm1.h0.lme4 <- lmer(distance ~ 1 + Sex + age + age * Sex + (1 | Subject),
                    data = Orthodont, REML = FALSE)
lm1.h1.nlme <- lme(distance ~ 1 + Sex + age + age * Sex,
                   random = ~ 1 + age | Subject, data = Orthodont, method = "ML")
lm1.h0.nlme <- lme(distance ~ 1 + Sex + age + age * Sex,
                   random = ~ 1 | Subject, data = Orthodont, method = "ML")


# Linear model 2, with independant slope and intercept
lm2.h1.lme4 <- lmer(distance ~ 1 + Sex + age + age * Sex + (1 + age || Subject),
                    data = Orthodont, REML = FALSE)
lm2.h1.nlme <- lme(distance ~ 1 + Sex + age + age * Sex,
                   random =  list(Subject = pdDiag(~1+age)), data = Orthodont,
                   method = "ML")
lm2.h0.lme4 <- lm1.h0.lme4

# Linear model 3, testing for randomness in the model
lm2.h0.nlme <- lm1.h0.nlme
lm3.h1.lme4 <- lm2.h1.lme4
lm3.h1.nlme <- lm2.h1.nlme
lm3.h0 <- lm(distance ~ 1 + Sex + age + age * Sex, data = Orthodont)


# Generalized linear model
data("Loblolly", package = "datasets")
glm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
              family = binomial, data = cbpp)
glm0 <- glm(cbind(incidence, size - incidence) ~ period,
            family = binomial, data = cbpp)


# Nonlinear mixed models
data("cbpp", package = "lme4")
start <- c(Asym = 103, R0 = -8.5, lrc = -3.2)
nlm1.nlme <- nlme(height ~ SSasymp(age, Asym, R0, lrc),
                  fixed = Asym + R0 + lrc ~ 1,
                  random = pdDiag(Asym + R0 + lrc ~ 1),
                  start = start,
                  data = Loblolly)
nlm0.nlme <- nlme(height ~ SSasymp(age, Asym, R0, lrc),
                  fixed = Asym + R0 + lrc ~ 1,
                  random = pdDiag(Asym ~ 1),
                  start = start,
                  data = Loblolly)
nlm1.lme4 <- nlmer(height ~ SSasymp(age, Asym, R0, lrc)
                   ~ (0 + Asym + R0 + lrc || Seed),
                   start = start,
                   data = Loblolly)
nlm0.lme4 <- nlmer(height ~ SSasymp(age, Asym, R0, lrc)
                   ~ (0 + Asym | Seed),
                   start = start,
                   data = Loblolly)
modelSSasymp <- function(psi, id, xidep){
  Asym <- psi[id,1]
  R0 <- psi[id,2]
  lrc <- psi[id,3]
  age <- xidep[,1]
  ypred <- Asym + (R0 - Asym) * exp( - exp(lrc) * age)
  return(ypred)
}
psi0 = matrix(start, ncol = 3, dimnames = list(NULL, c("Asym", "R0", "lrc")))
saemix.modelH1 <- saemixModel(model = modelSSasymp,
                              description = "Asymptotic regression",
                              psi0 = psi0,
                              transform.par = c(0, 0, 0),
                              covariance.model = diag(3),
                              omega.init = 10 * diag(3),
                              error.model = "constant")
saemix.modelH0 <- saemixModel(model = modelSSasymp,
                              description = "Asymptotic regression",
                              psi0 = psi0,
                              transform.par = c(0, 0, 0),
                              covariance.model = diag(c(1, 0, 0)),
                              omega.init = 10 * diag(3),
                              error.model = "constant")
saemix.data <- saemixData(name.data = Loblolly,
                          name.group = c("Seed"),
                          name.predictors = c("age"),
                          name.response = c("height"))
ctrl <- saemixControl(print = FALSE, save = FALSE, save.graphs = FALSE)
nlm1.saemix <- saemix(saemix.modelH1, saemix.data, ctrl)
nlm0.saemix <- saemix(saemix.modelH0, saemix.data, ctrl)


# Variance components testing
## Linear model

# Case 1
vt <- varCompTest(lm1.h1.lme4, lm1.h0.lme4)
print(vt)
summary(vt)
library("EnvStats")
print.htestEnvStats(vt)
vt <- varCompTest(lm1.h1.nlme, lm1.h0.nlme, pval = "both")
summary(vt)

# Case 2
vt <- varCompTest(lm2.h1.lme4, lm2.h0.lme4)
summary(vt)

# Case 3
vt <- varCompTest(lm3.h1.nlme, lm3.h0)
summary(vt)
vt <- varCompTest(lm3.h1.nlme, lm3.h0, pval.comp = "both")
summary(vt)
vt <- varCompTest(lm3.h1.lme4, lm3.h0, pval.comp = "both")
summary(vt)
vt <- varCompTest(lm3.h1.nlme, lm3.h0, pval.comp = "both", fim = "compute")
summary(vt)
vt <- varCompTest(lm3.h1.lme4, lm3.h0, pval.comp = "both", fim = "compute")
summary(vt)


## Generalized model
vt <- varCompTest(glm1, glm0)
summary(vt)


## Nonlinear mixed model
vt <- varCompTest(nlm1.nlme, nlm0.nlme)
summary(vt)
vt <- varCompTest(nlm1.lme4, nlm0.lme4)
summary(vt)
vt <- varCompTest(nlm1.nlme, nlm0.nlme, pval.comp = "both", fim = "compute")
summary(vt)
vt <- varCompTest(nlm1.lme4, nlm0.lme4, pval.comp = "both", fim = "compute")
summary(vt)
vt <- varCompTest(nlm1.saemix,nlm0.saemix, pval.comp = "both", fim = "extract")
summary(vt)


# Functions to perform B bootstrap repetitions

# Summary statistics extraction to be used with nlme
mySumm <- function(m, diagSigma = F) {
  beta <- nlme::fixef(m)
  resStd <- stats::sigma(m)
  Sigma <- varTestnlme::extractVarCov(m)
  if(diagSigma){
    theta <- c(beta, diag(Sigma), resStd)
  }else{
    theta <- c(beta, Sigma[lower.tri(Sigma,diag = T)], resStd)
  }
  return(theta)
}

# Summary statistics extraction to be used with lme4
mySummlme4 <- function(m, diagSigma = F) {
  beta <- lme4::fixef(m)
  resStd <- stats::sigma(m)
  grpFactor <- names(lme4::getME(m, "cnms"))
  vc <- lme4::VarCorr(m)
  Sigma <- as.matrix(Matrix::bdiag(as.matrix(vc)))
  if(diagSigma){
    theta <- c(beta, diag(Sigma), resStd)
  }else{
    theta <- c(beta, Sigma[lower.tri(Sigma,diag = T)], resStd)
  }
  return(theta)
}


# Function to perform (repet) repetitions of a bootstrap procedure of size B
boot.rep <- function(B, repet, seed){
  set.seed(seed)
  dfall <- data.frame()
  for (u in 1:repet){
    print(u)
    phi <- rmvnorm(n, c(a,b), matcov)
    y0 <- phi[,1] + phi[,2] %*% t(tj) + matrix(rnorm(n*J, 0, s), nr = n, nc = J)
    sub <- 1:n %*% matrix(1, nr = 1, nc = J)
    d0 <- data.frame(y = as.vector(t(y0)), tj = rep(tj, n), sj = rep(sj, n),
                     subject = as.vector(t(sub)))
    
    # fitting model under H0 and H1
    d0_grouped <- groupedData(y ~ sj | subject, data=d0, order.groups = F)
    fity0h1 <- lme(y ~ 1 + tj, data = d0_grouped,
                   random = list(subject = pdSymm(~ 1 + tj)), method = "ML")
    fity0h1lme4 <- lmer(y ~ 1 + tj + (1 + tj | subject), data = d0, REML = F)
    
    thetanlme <- mySumm(fity0h1, diagSigma = F)
    thetalme4 <- mySummlme4(fity0h1lme4, diagSigma = F)
    
    invfimnlme <- suppressWarnings(
      try(varTestnlme::extractFIM.lme(fity0h1, "full"), silent = TRUE))
    if (inherits(invfimnlme, "try-error"))
      invfimnlme <- matrix(NA,ncol=length(thetanlme), nrow = length(thetanlme))
    invfimlme4 <- as.matrix(merDeriv::vcov.lmerMod(fity0h1lme4, full = T))
    invfimbootnlme <- varTestnlme::bootinvFIM.lme(fity0h1, B = B, seed = u)
    invfimbootlme4 <- varTestnlme::bootinvFIM.merMod(fity0h1lme4, B = B, seed = u)
    
    
    ci.extractnlme <- cbind(thetanlme - 1.96 * sqrt(diag(invfimnlme)),
                            thetanlme + 1.96 * sqrt(diag(invfimnlme)))
    ci.extractlme4 <- cbind(thetalme4 - 1.96 * sqrt(diag(invfimlme4)),
                            thetalme4 + 1.96 * sqrt(diag(invfimlme4)))
    ci.bootnlme <- cbind(thetanlme - 1.96 * sqrt(diag(invfimbootnlme)),
                         thetanlme + 1.96 * sqrt(diag(invfimbootnlme)))
    ci.bootlme4 <- cbind(thetalme4 - 1.96 * sqrt(diag(invfimbootlme4)),
                         thetalme4 + 1.96 * sqrt(diag(invfimbootlme4)))
    
    df <- data.frame(rbind(ci.extractnlme, ci.extractlme4,
                           ci.bootnlme, ci.bootlme4))
    df$typeCI <- rep(c("extractnlme", "extractlme4",
                       "bootstrapnlme", "bootstraplme4"),
                     each = length(thetalme4))
    names(df) <- c("ci.low","ci.upp","typeCI")
    df$rep <- u
    df$Parameter <- rep(colnames(invfimbootnlme),4)
    df$trueValue <- trueTheta
    df$estimates <- rep(c(thetanlme,thetalme4),2)
    rownames(df) <- NULL
    
    dfall <- rbind(dfall,df) 
  }
  dfall$boot.size <- B
  return(dfall)
}


## FIM estimation
## Note that small discrepancies can be observed for the results obtained with
## the nlme package, between the manuscript and the user's computer, or between
## two different user's computer. This occurs when there are two different
## versions of the linear algebra library used by nlme and the way it handles
## matrix multiplication.
## These differences occur:
##    - at the data generation step, using the rmvnorm function which relies on
##      matrix multiplication when the covariance matrix matcov is non diagonal
##    - inside the lme function, at the fitting stage
## These small differences have an impact on the estimated values for the
## parameters, and for the FIM, and therefore on the confidence intervals. As a
## consequence, when counting the proportion of confidence intervals containing
## the true value used to generate the data, the results can be different if
## some intervals contain (resp. do not contain) the true value using one
## computer or the other.  However, results should be identical when run on the
## same computer with the same settings.

# Generate simulated data
n <- 100; J <- 10 # sizes

a <- 5; b <- 7; c <- 2 # mean of random effects
tj <- seq(0, 20, length.out = J); sj <- tj^2 # covariates
s1 <- 0.8; s2 <- 1; s3 <- 0.8; rho12 <- 0.5
matcov <- matrix(
  c(s1^2, rho12 * s1 * s2, rho12 * s1 * s2, s2^2),
  nr = 2) # covariance of random effects
s <- 1.2 # residual variance
trueTheta <- c(a, b, s1^2, rho12 * s1 * s2, s2^2, s)

res_boot_100 <- boot.rep(B = 100, repet = 500, seed = 0)
res_boot_300 <- boot.rep(B = 300, repet = 500, seed = 0)

res_boot_100$inCI <- (res_boot_100$ci.low <= res_boot_100$trueValue &
                      res_boot_100$trueValue <= res_boot_100$ci.upp)
res_boot_300$inCI <- (res_boot_300$ci.low <= res_boot_300$trueValue &
                      res_boot_300$trueValue <= res_boot_300$ci.upp)

# aggregate results to compute empirical coverage by type of method
comp_B100 <- aggregate(inCI ~ Parameter*typeCI,
                       data = res_boot_100, FUN = mean) %>%
  pivot_wider(names_from = typeCI, values_from = inCI)
names(comp_B100) <- c("Parameter", "boot_100_lme4", "boot_100_nlme",
                      "extract_100_lme4", "extract_100_nlme")
comp_B300 <- aggregate(inCI ~ Parameter * typeCI,
                       data = res_boot_300, FUN = mean) %>%
  pivot_wider(names_from = typeCI, values_from = inCI)
names(comp_B300) <- c("Parameter", "boot_300_lme4", "boot_300_nlme",
                      "extract_300_lme4", "extract_300_nlme")

# average results corresponding to the extraction of the FIM between the two
# sets of 1000 replicates
table_all <- left_join(comp_B100, comp_B300) %>%
  dplyr::mutate(extract_lme = (extract_100_nlme + extract_300_nlme) / 2,
                extract_lme4 = (extract_100_lme4 + extract_300_lme4) / 2) %>%
  select(!c("extract_100_lme4", "extract_100_nlme",
            "extract_300_lme4", "extract_300_nlme"))
table_all$Parameter <- factor(table_all$Parameter,
  levels = c("(Intercept)", "tj", "var((Intercept))", "var(tj)",
             "cov((Intercept),tj)", "sd_residual"),
  labels=c("beta_1", "beta_2", "gamma_1^2", "gamma_2^2",
           "gamma_{1,2}", "sigma^2"))

library("xtable")
xt <- xtable(
  table_all[order(table_all$Parameter), c(1, 6, 7, 3, 5, 2, 4)],
  include.rownames = FALSE, digits = 10)
saveRDS(table_all, "table_all.rds")
saveRDS(res_boot_100, "res_boot_100.rds")
saveRDS(res_boot_300, "res_boot_300.rds")
print(xt)
xtable(xt, digits = 8)