## Required libraries are loaded for the analysis library("ggplot2") library("tidyverse", quietly = TRUE) library("nlme", quietly = TRUE) library("gridExtra", quietly = TRUE) library("foreign") library("lattice") ## Loading the skewlmm library library("skewlmm") ## Example with balanced data: mice weight data ## Load and preprocess miceweight data data("miceweight") miceweight <- miceweight %>% mutate(interWeek = week - 4) %>% mutate(interTreat = if_else(interWeek < 0, "C", treat)) ## Plot individual trajectories and mean trends for miceweight data a1<-ggplot(miceweight,aes(x=week,y=weight,group=mouseID)) + geom_line(alpha=.3) + stat_summary(aes(group = 1),geom = "line", fun= mean, colour=1,size=1) + scale_x_continuous(breaks= pretty_breaks())+ theme_minimal(base_size=9) + facet_wrap(~treat) + geom_vline(xintercept = 4, linetype = 2) ## Figure 1 a1 ## Fit a linear mixed model using nlme package fitlme <- lme(weight ~ interWeek*interTreat, data=miceweight,random=~interWeek|mouseID) ## Visualize random effects: histograms and Q-Q plots g1<-nlme::ranef(fitlme) %>% dplyr::rename(`intercepts`=`(Intercept)`, `slopes`=interWeek) %>% pivot_longer(cols = everything()) %>% ggplot(aes(x=value))+ geom_histogram(bins=7,aes(y=..density..)) + theme_minimal(base_size=9)+ facet_wrap(~name,scales = "free")+xlab('')+ylab('density') g2<-nlme::ranef(fitlme) %>% dplyr::rename(`intercepts`=`(Intercept)`, `slopes`=interWeek) %>% pivot_longer(cols = everything()) %>% ggplot(aes(sample=value))+geom_qq() + geom_qq_line()+ facet_wrap(~name,scales = 'free')+theme_minimal(base_size=9) ## Figure 2 gridExtra::grid.arrange(g1,g2,ncol=1) ## Fit heavy tails/skewed linear mixed models using package skewlmm # normal fit_norm <- smn.lmm(data = miceweight, formFixed = weight ~ interWeek * interTreat, formRandom = ~ interWeek, groupVar = "mouseID") # slash fit_sl <- update(object = fit_norm, distr = "sl") # skew-slash fit_ssl <- smsn.lmm(data = miceweight, formFixed = weight ~ interWeek * interTreat, formRandom = ~ interWeek, groupVar = "mouseID", distr = "ssl") ## Printing a fitted object fit_ssl ## Model comparison using likelihood ratio test lr.test(fit_sl, fit_ssl) ## Compare Healy plots for normal and skew-slash models ## Figure 3 grid.arrange(healy.plot(fit_norm, calcCI = TRUE)+ theme(plot.title = element_text( face="italic", size=9)), healy.plot(fit_ssl, calcCI = TRUE)+ theme(plot.title = element_text( face="italic", size=9)), nrow=1) ## Other models with autoregressive and DEC dependency structures fit_ssl_ar1 <- update(fit_ssl, depStruct = "ARp", pAR = 1) fit_ssl_ar2 <- update(fit_ssl, depStruct = "ARp", pAR = 2) fit_ssl_DEC <- update(fit_ssl, depStruct = "DEC", timeVar = "interWeek") ## Example of parallel optimization for phi fit_ssl_DEC_par <- update(fit_ssl_DEC, control = lmmControl(parallelphi = TRUE)) fit_ssl_DEC$elapsedTime fit_ssl_DEC_par$elapsedTime ## Compare models using information criteria criteria(list(UNC = fit_ssl, AR1 = fit_ssl_ar1, AR2 = fit_ssl_ar2, DEC = fit_ssl_DEC)) ## Visualize residual autocorrelation ## Figure 4 grid.arrange(plot(acfresid(fit_ssl, calcCI = TRUE, maxLag = 6)) + theme_minimal(base_size=9) + theme(plot.title = element_text( face="italic", size=9)), plot(acfresid(fit_ssl_ar1, calcCI = TRUE, maxLag = 6)) + theme_minimal(base_size=9)+ theme(plot.title = element_text( face="italic", size=9)), nrow=1) ## Summary of the autoregressive skew-slash model summary(fit_ssl_ar1) ## Mahalabolis distance visualizations ## Figure 5 grid.arrange(plot(mahalDist(fit_ssl_ar1), nlabels = 0), weight_plot(fit_ssl_ar1), ncol=2) ## Fitted model plot ## Figure 6 plot(fit_ssl_ar1, type = "normalized") + theme_minimal(base_size=9) + theme(plot.title = element_text( face="italic", size=9)) ## Extracting (asymptotic) confidence intervals confint(fit_ssl_ar1) %>% round(3) ## Calculating bootstrapped intervals - might take a few minutes to compute boot_ssl_ar1 <- confint(fit_ssl_ar1, method = "bootstrap", B=100) boot_ssl_ar1 %>% round(digits = 3) ## Additional option: updating the model to impose diagonal scale matrix for the random effects fit_ssl_ar1D <- update(fit_ssl_ar1, covRandom = "pdDiag") ## Model comparison using LR test lr.test(fit_ssl_ar1, fit_ssl_ar1D) ## Additional option: fixing part of lambda as 0 and using the EM algorithm for estimation fit_ssl1 <- update(fit_ssl, skewind = c(1, 0), control = lmmControl(algorithm = "EM")) ## Example with data measured on continuous time: Six Cities study ## Read the dataset and filter out IDs with only one observation ds <- read.dta("https://content.sph.harvard.edu/fitzmaur/ala2e/fev1.dta") ds <- ds %>% filter(!(id %in% unique(id)[table(id)==1])) # Transform the dataset to create new variables ds <- ds %>% transform(agec = age - 12, time = age - baseage, fev1 = exp(logfev1), htp = case_when(ht> 1.5 ~ ht - 1.5, TRUE ~ 0), agep = case_when(age > 15 ~ age - 15, TRUE ~ 0)) ## Trajectories plot over age and height g1 <- ds %>% ggplot(aes(x = age, y = fev1, group=id))+ geom_line(alpha = .2) + theme_bw() + geom_smooth(aes(x = age, y = fev1, group = 1), se=FALSE, color=4, linewidth = .8, method = 'gam', formula = y ~ s(x, bs = "cs")) + ylab("FEV1") g2 <- ds %>% ggplot(aes(x = ht, y = fev1, group=id))+ geom_line(alpha = .2)+theme_bw() + geom_smooth(aes(x = ht, y = fev1, group = 1), se=FALSE, color=4, linewidth = .8, method = 'gam', formula = y ~ s(x, bs = "cs")) + ylab("FEV1") ## Figure 7 grid.arrange(g1, g2, ncol=2) ## Fitting a normal model using the skewlmm package mod_norm <- smn.lmm(fev1 ~ baseage + age + agep + baseht + ht + htp + age*ht, data = ds, groupVar = "id", formRandom = ~ht, distr = "norm") mod_norm ## Fitting a skew-contaminated normal model mod_scn <- smsn.lmm(fev1 ~ baseage + age + agep + baseht + ht + htp + age*ht, data = ds, groupVar = "id", formRandom = ~ht, distr = "scn", timeVar = "time", depStruct = "DEC") mod_scn ## Model comparison using LR test lr.test(mod_norm, mod_scn) ## Fitting a (symetric) contaminated normal model mod_cn <- update(mod_norm, distr = "cn", timeVar = "time", depStruct = "DEC") ## Testing skewness parameter using a LR test lr.test(mod_cn, mod_scn) ## Updating model with different distributions/dependence structures mod_normCAR1 <- update(mod_cn, distr = "norm", depStruct = "CAR1") mod_normDEC <- update(mod_cn, distr = "norm") mod_cnCAR1 <- update(mod_cn, depStruct = "CAR1") mod_cnUNC <- update(mod_cn, depStruct = "UNC") ## Comparing models using information criteria criteria(list(`UNC-N-LMM` = mod_norm, `CAR-N-LMM` = mod_normCAR1, `DEC-N-LMM` = mod_normDEC, `UNC-CN-LMM` = mod_cnUNC, `CAR-CN-LMM` = mod_cnCAR1, `DEC-CN-LMM` = mod_cn)) ## Plotting fitted model ## Figure 8 mod_cnCAR1 %>% plot() ## Mahalanobis plot ## Figure 9 mahalDist(mod_cnCAR1) %>% plot(nlabels = 0) ## Model summary summary(mod_cnCAR1) ## Example with synthetic data ## First setting: 50 subjects with 5 observations per subject ## Generating a data set with function rsmsn.lmm nj1 <- 5 m <- 50 gendatList <- map(.x = rep(nj1, m), .f = function(nj) rsmsn.lmm(time1 = 1:nj, x1 = cbind(1, 1:nj), z1 = rep(1, nj), beta=c(1, 2), sigma2=.25, D1=.5*diag(1), lambda=2, depStruct="ARp", phi=.5, distr = "st", nu = 5)) gendat_50 <- bind_rows(gendatList, .id="ind") ## Visualizing simulated data set ## Figure 10 ggplot(gendat_50, aes(x=x, y=y, group=ind)) + geom_line(alpha = .5) + stat_summary(aes(group=1), geom="line", fun=mean, col=4, size=1) + theme_bw() ## Fitting AR(1)-ST-LMM fm1 <- smsn.lmm(y ~ x, data=gendat_50, groupVar="ind", depStruct="ARp", pAR=1, distr = "st") ## Extracting asymptotic confidence intervals confint(fm1, method = "asymptotic") %>% round(3) ## Computing bootstraped confidence intervals confint(fm1, method = "bootstrap") %>% round(3) ## Second setting: 1000 subjects with 10 observations per subject ## Generating a data set with function rsmsn.lmm nj1 <- 10 m <- 1000 gendatList <- map(.x = rep(nj1, m), .f = function(nj) rsmsn.lmm(time1 = 1:nj, x1 = cbind(1, 1:nj), z1 = rep(1, nj), beta=c(1, 2), sigma2=.25, D1=.5*diag(1), lambda=2, depStruct="ARp", phi=.5, distr = "st", nu = 5)) gendat_1000 <- bind_rows(gendatList, .id="ind") ## Fitting AR(1)-ST-LMM fm2 <- smsn.lmm(y ~ x, data=gendat_1000, groupVar="ind", depStruct="ARp", pAR=1, distr = "st") ## Extracting asymptotic confidence intervals confint(fm2) %>% round(3)