## Required libraries are loaded for the analysis
library("ggplot2")
library("tidyverse", quietly = TRUE)
library("nlme", quietly = TRUE)
library("gridExtra", quietly = TRUE)
library("foreign")
library("lattice")

## Loading the skewlmm library
library("skewlmm")

## Example with balanced data: mice weight data

## Load and preprocess miceweight data
data("miceweight")

miceweight <- miceweight %>% 
  mutate(interWeek = week - 4) %>% 
  mutate(interTreat = if_else(interWeek < 0, "C", treat))

## Plot individual trajectories and mean trends for miceweight data
a1<-ggplot(miceweight,aes(x=week,y=weight,group=mouseID)) + geom_line(alpha=.3) +
  stat_summary(aes(group = 1),geom = "line", fun= mean, colour=1,size=1) +
  scale_x_continuous(breaks= pretty_breaks())+
  theme_minimal(base_size=9) + facet_wrap(~treat) + geom_vline(xintercept = 4, linetype = 2)
## Figure 1
a1 

## Fit a linear mixed model using nlme package
fitlme <- lme(weight ~ interWeek*interTreat, data=miceweight,random=~interWeek|mouseID)

## Visualize random effects: histograms and Q-Q plots
g1<-nlme::ranef(fitlme) %>% dplyr::rename(`intercepts`=`(Intercept)`,
                                          `slopes`=interWeek) %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(x=value))+ geom_histogram(bins=7,aes(y=..density..)) +
  theme_minimal(base_size=9)+ facet_wrap(~name,scales = "free")+xlab('')+ylab('density')
g2<-nlme::ranef(fitlme) %>% dplyr::rename(`intercepts`=`(Intercept)`,
                                          `slopes`=interWeek) %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(sample=value))+geom_qq() + geom_qq_line()+
  facet_wrap(~name,scales = 'free')+theme_minimal(base_size=9)
## Figure 2
gridExtra::grid.arrange(g1,g2,ncol=1)

## Fit heavy tails/skewed linear mixed models using package skewlmm
# normal 
fit_norm <- smn.lmm(data = miceweight, 
                    formFixed = weight ~ interWeek * interTreat,
                    formRandom = ~ interWeek, groupVar = "mouseID")
# slash
fit_sl <- update(object = fit_norm, distr = "sl")
# skew-slash
fit_ssl <- smsn.lmm(data = miceweight, 
                    formFixed = weight ~ interWeek * interTreat,
                    formRandom = ~ interWeek, groupVar = "mouseID",
                    distr = "ssl")
## Printing a fitted object
fit_ssl

## Model comparison using likelihood ratio test
lr.test(fit_sl, fit_ssl)

## Compare Healy plots for normal and skew-slash models
## Figure 3
grid.arrange(healy.plot(fit_norm, calcCI = TRUE)+ theme(plot.title = element_text( face="italic", size=9)),
             healy.plot(fit_ssl, calcCI = TRUE)+ theme(plot.title = element_text( face="italic", size=9)), nrow=1)

## Other models with autoregressive and DEC dependency structures
fit_ssl_ar1 <- update(fit_ssl, depStruct = "ARp", pAR = 1)
fit_ssl_ar2 <- update(fit_ssl, depStruct = "ARp", pAR = 2)
fit_ssl_DEC <- update(fit_ssl, depStruct = "DEC", timeVar = "interWeek")

## Example of parallel optimization for phi
fit_ssl_DEC_par <- update(fit_ssl_DEC, 
                         control = lmmControl(parallelphi = TRUE))

fit_ssl_DEC$elapsedTime
fit_ssl_DEC_par$elapsedTime

## Compare models using information criteria
criteria(list(UNC = fit_ssl, AR1 = fit_ssl_ar1,
              AR2 = fit_ssl_ar2, DEC = fit_ssl_DEC))

## Visualize residual autocorrelation
## Figure 4
grid.arrange(plot(acfresid(fit_ssl, calcCI = TRUE, maxLag = 6)) + theme_minimal(base_size=9) + theme(plot.title = element_text( face="italic", size=9)),
             plot(acfresid(fit_ssl_ar1, calcCI = TRUE, maxLag = 6)) + theme_minimal(base_size=9)+ theme(plot.title = element_text( face="italic", size=9)), nrow=1)

## Summary of the autoregressive skew-slash model
summary(fit_ssl_ar1)

## Mahalabolis distance visualizations 
## Figure 5
grid.arrange(plot(mahalDist(fit_ssl_ar1), nlabels = 0),
             weight_plot(fit_ssl_ar1), ncol=2)

## Fitted model plot
## Figure 6
plot(fit_ssl_ar1, type = "normalized") + theme_minimal(base_size=9) + theme(plot.title = element_text( face="italic", size=9))

## Extracting (asymptotic) confidence intervals
confint(fit_ssl_ar1) %>% round(3)

## Calculating bootstrapped intervals - might take a few minutes to compute
boot_ssl_ar1 <- confint(fit_ssl_ar1, method = "bootstrap", B=100) 

boot_ssl_ar1 %>% round(digits = 3)

## Additional option: updating the model to impose diagonal scale matrix for the random effects
fit_ssl_ar1D <- update(fit_ssl_ar1, covRandom = "pdDiag")

## Model comparison using LR test
lr.test(fit_ssl_ar1, fit_ssl_ar1D)

## Additional option: fixing part of lambda as 0 and using the EM algorithm for estimation
fit_ssl1 <- update(fit_ssl, skewind = c(1, 0), 
                   control = lmmControl(algorithm = "EM"))


## Example with data measured on continuous time: Six Cities study

## Read the dataset and filter out IDs with only one observation
ds <- read.dta("https://content.sph.harvard.edu/fitzmaur/ala2e/fev1.dta")
ds <- ds %>% filter(!(id %in% unique(id)[table(id)==1]))

# Transform the dataset to create new variables
ds <- ds %>% transform(agec = age - 12, time = age - baseage,
                       fev1 = exp(logfev1),
                       htp = case_when(ht> 1.5 ~ ht - 1.5,
                                       TRUE ~ 0),
                       agep = case_when(age > 15 ~ age - 15,
                                        TRUE ~ 0))

## Trajectories plot over age and height
g1 <- ds %>% ggplot(aes(x = age, y = fev1, group=id))+
  geom_line(alpha = .2) + theme_bw() +
  geom_smooth(aes(x = age, y = fev1, group = 1), se=FALSE, color=4,
              linewidth = .8, method = 'gam', formula = y ~ s(x, bs = "cs")) +
  ylab("FEV1")
g2 <- ds %>% ggplot(aes(x = ht, y = fev1, group=id))+
  geom_line(alpha = .2)+theme_bw() +
  geom_smooth(aes(x = ht, y = fev1, group = 1), se=FALSE, color=4, linewidth = .8, 
              method = 'gam', formula = y ~ s(x, bs = "cs")) +
  ylab("FEV1")
## Figure 7
grid.arrange(g1, g2, ncol=2)

## Fitting a normal model using the skewlmm package
mod_norm <- smn.lmm(fev1 ~ baseage + age + agep + 
                      baseht + ht + htp + age*ht, data = ds, 
                    groupVar = "id", formRandom = ~ht, 
                    distr = "norm")
mod_norm

## Fitting a skew-contaminated normal model
mod_scn <- smsn.lmm(fev1 ~ baseage + age + agep + 
                      baseht + ht + htp + age*ht, data = ds,
                    groupVar = "id", formRandom = ~ht, 
                    distr = "scn",
                    timeVar = "time", depStruct = "DEC")
mod_scn

## Model comparison using LR test
lr.test(mod_norm, mod_scn)

## Fitting a (symetric) contaminated normal model
mod_cn <- update(mod_norm, distr = "cn", timeVar = "time", depStruct = "DEC")
## Testing skewness parameter using a LR test
lr.test(mod_cn, mod_scn)

## Updating model with different distributions/dependence structures
mod_normCAR1 <- update(mod_cn, distr = "norm", depStruct = "CAR1")
mod_normDEC <- update(mod_cn, distr = "norm")
mod_cnCAR1 <- update(mod_cn, depStruct = "CAR1")
mod_cnUNC <- update(mod_cn, depStruct = "UNC")
## Comparing models using information criteria
criteria(list(`UNC-N-LMM` = mod_norm,
              `CAR-N-LMM` = mod_normCAR1,
              `DEC-N-LMM` = mod_normDEC,
              `UNC-CN-LMM` = mod_cnUNC,
              `CAR-CN-LMM` = mod_cnCAR1,
              `DEC-CN-LMM` = mod_cn))

## Plotting fitted model
## Figure 8
mod_cnCAR1 %>% plot()

## Mahalanobis plot 
## Figure 9
mahalDist(mod_cnCAR1) %>% plot(nlabels = 0)

## Model summary
summary(mod_cnCAR1)

## Example with synthetic data

## First setting: 50 subjects with 5 observations per subject

## Generating a data set with function rsmsn.lmm
nj1 <- 5 
m <- 50
gendatList <- map(.x = rep(nj1, m), 
                  .f = function(nj) rsmsn.lmm(time1 = 1:nj, 
                                              x1 = cbind(1, 1:nj), 
                                              z1 = rep(1, nj), 
                                              beta=c(1, 2), 
                                              sigma2=.25, 
                                              D1=.5*diag(1), 
                                              lambda=2, 
                                              depStruct="ARp", phi=.5, 
                                              distr = "st", nu = 5))
gendat_50 <- bind_rows(gendatList, .id="ind")

## Visualizing simulated data set
## Figure 10
ggplot(gendat_50, aes(x=x, y=y, group=ind)) + geom_line(alpha = .5) +
 stat_summary(aes(group=1), geom="line", 
              fun=mean, col=4, size=1) + theme_bw()

## Fitting AR(1)-ST-LMM
fm1 <- smsn.lmm(y ~ x, data=gendat_50, groupVar="ind", depStruct="ARp",
                pAR=1, distr = "st")

## Extracting asymptotic confidence intervals
confint(fm1, method = "asymptotic") %>% round(3)

## Computing bootstraped confidence intervals
confint(fm1, method = "bootstrap") %>% round(3)

## Second setting: 1000 subjects with 10 observations per subject

## Generating a data set with function rsmsn.lmm
nj1 <- 10
m <- 1000
gendatList <- map(.x = rep(nj1, m), 
                  .f = function(nj) rsmsn.lmm(time1 = 1:nj, 
                                              x1 = cbind(1, 1:nj), 
                                              z1 = rep(1, nj), 
                                              beta=c(1, 2), 
                                              sigma2=.25, 
                                              D1=.5*diag(1), 
                                              lambda=2, 
                                              depStruct="ARp", phi=.5, 
                                              distr = "st", nu = 5))
gendat_1000 <- bind_rows(gendatList, .id="ind")

## Fitting AR(1)-ST-LMM
fm2 <- smsn.lmm(y ~ x, data=gendat_1000, groupVar="ind", depStruct="ARp",
                pAR=1, distr = "st")

## Extracting asymptotic confidence intervals
confint(fm2) %>% round(3)