## --- ## This script reproduces the calculations in ## ## Stef van Buuren: Broken Stick Model for Irregular Longitudinal Data ## To appear in Journal of Statistical Software. ## ## Adapted from: jss4594.Rmd ## Date: Dec 28, 2022 ## ## Run as ## - "Compile Report" in RStudio to produce HTML, PDF or MS Word ## - source file ## - interactive in R console ## ## This code ## - recreates and writes figures into the "../Figures/" directory ## - assumes that the file "../Figures/JAMES_tryout.png" exists ## ## Good luck! ## --- ## Check and install needed packages needed <- c("AGD", "brokenstick", "coda", "dplyr", "ggplot2", "grDevices", "gridExtra", "knitr", "lattice", "lme4", "MASS", "mice", "splines", "tidyr") missing <- needed[!(needed %in% utils::installed.packages()[, "Package"])] if (length(missing)) { if (interactive()) { answer <- askYesNo(paste("Package(s)", paste(missing, collapse = " "), "needed. Install from CRAN?")) if (answer) install.packages(missing, repos = "https://cloud.r-project.org/") } else { stop("Install missing packages: ", paste(missing, collapse = ", ")) } } ## ---- include=FALSE----------------------------------------------------------- options(tinytex.verbose = TRUE) ## ----setup, include=FALSE----------------------------------------------------- if (packageVersion("brokenstick") < "2.4.0") stop("brokenstick 2.4.0 needed") require("brokenstick") require("dplyr") require("tidyr") require("ggplot2") require("lattice") old <- options(digits = 3, width = 72) knitr::opts_chunk$set(echo = TRUE, cache = FALSE, fig.height = 7, fig.width = 7, out.width = "100%", fig.path = "../Figures/") colors <- c("#006CC2B3", "#B61A51B3", "gray50", "#006CC2CC", "#B61A51CC", "gray50") ## ---- visits, fig.height = 4, fig.cap = "Abacus plot of observation times for ## the first 20 children of the SMOCC data. Data collection was done through 10 ## waves. Each dot represents the age at which an observation was made. The ## variability in timing between children rises with age.", echo=FALSE, ## set.theme = TRUE---- data <- smocc_200 data$month <- data$age * 12 schedule <- c(0, 0.92, 1.84, 3, 6, 9, 12, 15, 18, 24) schedule_labels <- c("b", "4w", "8w", "3m", "6m", "9m", "12m", "15m", "18m", "24m") lattice::dotplot( reorder(id, rev(id)) ~ month, data = data[1:200, ], pch = 19, col = colors[4], xlab = "Age (in months)", cex = 0.7, scales = list(x = list(tck = 0, at = schedule, labels = schedule_labels)), panel = function(...) { panel.refline(v = schedule) panel.dotplot(...) } ) ## ----plotfit2echo, echo = TRUE, eval = ## FALSE ## fit2 <- brokenstick(hgt_z ~ age | id, smocc_200, seed = 1, knots = 0:2) ## fit9 <- brokenstick(hgt_z ~ age | id, smocc_200, seed = 1, knots = ## round(c(0:3, 6, 9, 12, 15, 18, 24)/12, 4)) ## ----plotfit2x, echo = FALSE, fig.height = 5, fig.cap = "Data and fitted ## curves for three children (blue = observed data, red = fitted curves). The ## broken stick model with two lines (top) gives a crude approximation of the ## data. The model with nine lines (bottom) follows the data quite closely.", ## set.theme = TRUE---- fit2 <- brokenstick(hgt_z ~ age | id, smocc_200, seed = 1, knots = 0:2) fit9 <- brokenstick(hgt_z ~ age | id, smocc_200, seed = 1, knots = round(c(0:3, 6, 9, 12, 15, 18, 24)/12, 4)) ids <- c(10001, 10005, 10022) m2 <- plot(fit2, smocc_200, group = ids, xlab = "Age (years)", ylab = "Length (SDS)") m9 <- plot(fit9, smocc_200, group = ids, xlab = "Age (years)", ylab = "Length (SDS)") gridExtra::grid.arrange(m2, m9, nrow = 2) ## ----------------------------------------------------------------------------- fit <- brokenstick(hgt_z ~ age | id, data = smocc_200, knots = c(0, 0.5, 1, 2), seed = 12321) summary(fit) ## ----------------------------------------------------------------------------- bse <- predict(fit, x = "knots", shape = "wide") dim(bse) head(bse, 3) ## ----------------------------------------------------------------------------- library("splines") data <- brokenstick::smocc_200 internal <- c(0, 0.5, 1, 2) X <- bs(data$age, knots = internal, Boundary.knots = c(0, 2.68), degree = 1) colnames(X) <- paste("age", c(internal, 2.68), sep = "_") head(X) ## ----include = FALSE, eval = FALSE, echo = FALSE------------------------------ ## # inverse age coding ## X <- bs(-data$age, knots = c(-2.68, -2, -1), Boundary.knots = c(-2.68, 0), ## degree = 1) ## colSums(X) ## rowSums(X) ## ## # inverse age model ## data <- smocc_200 ## data$age <- -data$age ## fitn <- brokenstick(hgt_z ~ age | id, data = data, knots = c(-2, -1, 0)) ## plot(fitn) ## ----mod-lmer, cache=FALSE---------------------------------------------------- data <- cbind(brokenstick::smocc_200[, c("id", "age", "hgt_z")], X) ctl_lmer <- lme4::lmerControl( check.conv.grad = lme4::.makeCC("warning", tol = 0.04)) f <- hgt_z ~ 0 + age_0 + age_0.5 + age_1 + age_2 + age_2.68 + (0 + age_0 + age_0.5 + age_1 + age_2 + age_2.68 | id) mod_lmer <- lme4::lmer(f, data, control = ctl_lmer) class(mod_lmer) ## ----------------------------------------------------------------------------- bse_lmer <- t(t(lme4::ranef(mod_lmer)$id) + lme4::fixef(mod_lmer)) head(round(bse_lmer, 3), 3) ## ----mod-kr, cache=FALSE------------------------------------------------------ ctl_kr <- control_kr() mod_kr <- kr(y = data$hgt_z, x = X, g = data$id, control = ctl_kr) ## ----------------------------------------------------------------------------- library("coda") summary(mod_kr$mod$beta) ## ----------------------------------------------------------------------------- fit_lmer <- brokenstick(hgt_z ~ age | id, data = smocc_200, knots = c(0, 0.5, 1, 2), method = "lmer", control = ctl_lmer) head(predict(fit_lmer, x = "knots", shape = "wide"), 3) ## ----dataprep----------------------------------------------------------------- library("brokenstick") head(smocc_200, 3) ## ----y2z---------------------------------------------------------------------- library("AGD") z <- with(smocc_200, y2z(y = hgt, x = age, sex = ifelse(sex == "male", "M", "F"), ref = nl4.hgt)) identical(z, smocc_200$hgt_z) ## ----zscores, echo = FALSE, fig.height=3, fig.cap = "Distribution of height ## SDS for 200 SMOCC children aged 0-2 years relative to the Dutch 1997 length ## references. The distribution closely follows the normal distribution. A few ## outliers in the left tail are genuine observations from children born ## pre-term.", set.theme = TRUE, warning=FALSE---- ggplot(smocc_200, aes(hgt_z)) + xlim(-6, 6) + xlab("Height SDS") + theme_light() + geom_histogram(binwidth = 0.1, colour = "white", fill = colors[4], size = 0.1) + stat_function(fun = function(x) dnorm(x, 0, 1) * 194, color = colors[5], size = 0.7) ## ----z2y---------------------------------------------------------------------- y <- with(smocc_200, AGD::z2y(z = hgt_z, x = age, sex = ifelse(sex == "male", "M", "F"), ref = nl4.hgt)) all.equal(y, smocc_200$hgt, tol = 0.0001) ## ----plotsds, echo=FALSE, fig.height=4, fig.cap = "Detrended growth chart. ## Length growth of 52 infants 0-2 years expressed in the Z-score scale. Each ## trajectory represents a child. Trajectories are almost flat in this ## representation. More centile crossings occur near the start of the ## trajectory.", set.theme = TRUE, warning=FALSE---- ggplot(smocc_200[1:500, ], aes(x = age, y = hgt_z, group = id, color = as.factor(id))) + geom_line(size = 0.2) + geom_point(size = 0.7) + scale_colour_viridis_d(option = "viridis") + xlab("Age (years)") + ylab("Length SDS") + theme_light() + theme(legend.position = "none") ## ----line1calc---------------------------------------------------------------- fit1 <- brokenstick(hgt_z ~ age | id, smocc_200, knots = c(0, 2)) get_knots(fit1, hide = "none") ## ----line1plot, fig.height=2.5, fig.cap = "Observed data (blue) and fitted ## broken stick model (red) with a straight line between birth and two years. ## While this model picks up the overall trend, it ignores any systematic ## patterns around the line.", set.theme = TRUE, warning=FALSE---- ids <- c(10001, 10005, 10022) plot(fit1, group = ids, xlab = "Age (years)", ylab = "Length (SDS)") ## ----plotfit2z, warning=FALSE, eval=FALSE------------------------------------- ## fit2 <- brokenstick(hgt_z ~ age | id, smocc_200, knots = 0:2) ## plot(fit2, group = ids, ## xlab = "Age (years)", ylab = "Length (SDS)") ## ----------------------------------------------------------------------------- summary(fit2) ## ----fit9, cache = TRUE, warning = FALSE, eval = FALSE------------------------ ## fit9 <- brokenstick(hgt_z ~ age | id, smocc_200, seed = 1, ## knots = round(c(0:3, 6, 9, 12, 15, 18, 24)/12, 4)) ## ----echo = FALSE------------------------------------------------------------- fit9 <- brokenstick::fit_200 ## ----plotfit9, echo = FALSE, warning=FALSE, eval=FALSE------------------------ ## plot(fit9, group = ids, xlab = "Age (years)", ylab = "Length (SDS)") ## ----------------------------------------------------------------------------- p1 <- predict(fit9, shape = "vector") head(p1) ## ----------------------------------------------------------------------------- p2 <- predict(fit9, x = "knots", shape = "wide") head(p2, 3) ## ----------------------------------------------------------------------------- p3 <- predict(fit9, x = "knots") head(p3, 3) ## ----------------------------------------------------------------------------- table(p3$.source) ## ----predx-------------------------------------------------------------------- head(predict(fit9, x = c(0.42, 1.33, 4), shape = "wide", include_data = FALSE), 3) ## ----pred10001---------------------------------------------------------------- predict(fit9, group = 10001, shape = "vector") ## ----pred10001a--------------------------------------------------------------- predict(fit9, x = c(0.42, 1.33), group = 10001, shape = "vector") ## ----pred10001x--------------------------------------------------------------- predict(fit9, x = c(0.42, 1.33), y = c(-0.5, -1), group = c(10001, 10001)) ## ----------------------------------------------------------------------------- data <- data.frame( age = c(0, 0.12, 0.32, 0.62, 1.1, 0.25, 0.46), hgt_z = c(-1.2, -1.8, -1.7, -1.9, -2.1, -1.9, -1.5), id = c(rep("Fred", 5), rep("Alice", 2))) p <- predict(fit9, newdata = data, x = "knots") ## ----echo = TRUE, fig.height=2.5, fig.width=5.2, out.width = "67%", ## fig.cap="Broken stick model prediction for sparse data. Alice and Fred have ## few observations (blue points). The broken stick model borrows information ## from other children to calculate fitted trajectories over the full age range ## (red points).", set.theme=TRUE---- plot(fit9, newdata = data, ylim = c(-2.5, 0), xlab = "Age (years)", ylab = "Length (SDS)") ## ----obspred, echo = FALSE, fig.height=4, fig.cap="Comparison between ## predicted (vertical) and observed (horizontal) height in the SDS scale (left) ## and CM scale (right) in the SMOCC data. The model almost perfectly recreates ## the observed data."---- pred <- predict(fit_200, shape = "vector") oldpar <- par(mfrow = c(1, 2)) MASS::eqscplot(x = smocc_200$hgt_z, xlab = "Height (SDS)", y = pred, ylab = "Predicted Z-score", pch = ".", xlim = c(-4, 4), ylim = c(-4, 4)) abline(0, 1, col = "grey") y_cm <- with(smocc_200, z2y(z = hgt_z, x = age, sex = ifelse(sex == "male", "M", "F"), ref = nl4.hgt)) yhat_cm <- with(smocc_200, z2y(z = pred, x = age, sex = ifelse(sex == "male", "M", "F"), ref = nl4.hgt)) MASS::eqscplot(x = y_cm, xlab = "Height (cm)", y = yhat_cm, ylab = "Predicted (cm)", pch = ".") abline(0, 1, col = "grey") par <- par(oldpar) ## ----outofsample, echo = FALSE, eval=FALSE------------------------------------ ## # quick and dirty out-of-sample simulation ## r2a <- rep(NA, 100) ## r2b <- rep(NA, 100) ## for (i in 1:100) { ## set.seed(i) ## ids <- unique(smocc_200$id) ## in_train <- sample(ids, size = round(2 / 3 * length(ids))) ## in_test <- setdiff(ids, in_train) ## train <- smocc_200 %>% ## filter(id %in% in_train) ## test <- smocc_200 %>% ## filter(id %in% in_test) ## ## fit_train <- brokenstick(hgt_z ~ age | id, train, ## knots = round(c(0:3, 6, 9, 12, 15, 18, 24) / 12, 4)) ## pred <- predict(fit_train, newdata = test, shape = "vector") ## r2a[i] <- get_r2(fit_train, test) ## ## y_cm <- with(test, ## z2y(z = hgt_z, ## x = age, ## sex = ifelse(sex == "male", "M", "F"), ## ref = nl4.hgt)) ## yhat_cm <- with(test, ## z2y(z = pred, ## x = age, ## sex = ifelse(sex == "male", "M", "F"), ## ref = nl4.hgt)) ## r2b[i] <- cor(y_cm, yhat_cm, use = "complete.obs")^2 ## } ## oldpar <- par(mfrow = c(1, 2)) ## hist(r2a) ## hist(r2b) ## par <- par(oldpar) ## ## summary(r2a) ## summary(r2b) ## ----------------------------------------------------------------------------- train <- smocc_200[smocc_200$age < 2.1, ] fit1 <- brokenstick(hgt_z ~ age | id, data = train, knots = c(0, 1, 2)) fit2 <- brokenstick(hgt_z ~ age | id, data = train, knots = c(0, 1, 2, 3)) p1 <- predict(fit1, newdata = smocc_200, shape = "vector") p2 <- predict(fit2, newdata = smocc_200, shape = "vector") table(is.na(p1)) table(is.na(p2)) ## ----reverse, echo=TRUE, eval = FALSE----------------------------------------- ## data <- smocc_200 ## data$age <- -data$age ## fit2_mirror <- brokenstick(hgt_z ~ age | id, data = data, ## knots = c(-2, -1, 0), hide = "left") ## plot(fit2_mirror, groups = c(10001, 10005, 10022)) ## ----------------------------------------------------------------------------- knots <- round(c(0, 1/3, 1, 2, 4, 6, 10, 14, 24, 29), 3) labels <- c("birth", "4m", "1y", "2y", "4y", "6y", "10y", "14y", "24y", "") ## ----tbc1, echo=FALSE, fig.height=4, fig.cap = "Scatterplot of BMI SDS and ## log(age + 0.2) for the Terneuzen cohort data. The plot shows differential ## amounts of clustering of measurements around the specified break ages. ## Clustering is tighter at some ages (birth, 1y, 14y) than at others (4m, 2y, ## 24y).", set.theme = TRUE, warning=FALSE---- ggplot(mice::tbc, aes(age + 0.2, bmi.z)) + theme_light() + scale_x_log10(name = "log(age + 0.2)", breaks = knots + 0.2, labels = labels, minor_breaks = NULL) + scale_y_continuous(name = "BMI SDS", breaks = -4:4, limits = c(-4, 4)) + geom_point(size = 0.5, col = colors[1], shape = 20, na.rm = TRUE) ## ----tbc_lmer, cache=TRUE----------------------------------------------------- ctl <- lme4::lmerControl( check.conv.grad = lme4::.makeCC("warning", 0.02, NULL), check.conv.singular = lme4::.makeCC("ignore", 0.001)) fit_lmer <- brokenstick(bmi.z ~ age | id, data = mice::tbc, knots = knots, boundary = c(0, 29), method = "lmer", control = ctl) ## ----tbc_kr, cache=TRUE------------------------------------------------------- fit_kr <- brokenstick(bmi.z ~ age | id, data = mice::tbc, knots = knots, boundary = c(0, 29), seed = 41441, cormodel = "argyle") ## ----tbc-trajectories, fig.height=4, fig.cap = "Observed and fitted BMI SDS ## trajectories of six subjects as predicted by the lmer (red) and Argyle ## (green) models. The figure illustrates that fitted trajectories from the ## Argyle model are stabler in areas with sparse data.", set.theme = TRUE, ## warning=FALSE, echo=FALSE---- ids <- c(8, 1259, 2447, 7019, 7460, 7646) g <- plot(fit_lmer, group = ids, ylab = "BMI SDS", xlab = "Age (years)", ylim = c(-2.5, 2.5)) data <- predict( object = fit_kr, newdata = fit_kr$data, x = knots[-length(knots)], group = ids, include_data = FALSE) g <- g + ggplot2::geom_line(ggplot2::aes_string(x = "age", y = ".pred"), data = data, color = grDevices::hcl(120, 100, 40, 0.8)) + ggplot2::geom_point(ggplot2::aes_string(x = "age", y = ".pred"), data = data, color = grDevices::hcl(120, 100, 40, 0.7), size = 2) print(g) ## ----tbc_data---------------------------------------------------------------- tbc1 <- mice::tbc %>% filter(!is.na(ao) & first) %>% select(id, nocc, sex) tbc2 <- mice::tbc.target %>% filter(id %in% tbc1$id) prd <- predict(fit_kr, mice::tbc, x = "knots", shape = "wide", group = tbc1$id) data <- bind_cols(prd, select(tbc1, -id), select(tbc2, -id)) head(data, 3) ## ----tbc-trajectories-all, echo=FALSE, fig.height=3.5, fig.cap = "Fitted BMI ## SDS trajectories for 92 subjects, of which 18 persons have adult overweight ## (red = observed BMI at adult age > 1.3 SDS), while 74 individuals (grey) do ## not have adult overweight. The figure shows that the BMI distribution before ## the age of 2y is similar in both groups, whereas BMI at ages 10y and 14y is ## highly predictive of adult overweight.", set.theme = TRUE, warning=FALSE---- pd <- data %>% select(id, ao, `0`:`24`) %>% tidyr::pivot_longer(`0`:`24`, names_to = "age", values_to = "bmi") %>% mutate(age = as.numeric(age), logage = log10(age + 0.2)) ggplot(pd, aes(x = age + 0.2, y = bmi, group = id, colour = factor(ao))) + theme_light() + theme(legend.position = "none") + geom_hline(yintercept = 1.3, colour = "black", lwd = 1, lty = 2) + geom_line(data = subset(pd, ao == 0)) + geom_line(data = subset(pd, ao == 1)) + scale_x_log10(name = "Age (log10(year + 0.2))", breaks = knots + 0.2, labels = labels, minor_breaks = NULL) + scale_y_continuous(name = "BMI SDS", breaks = -4:4, limits = c(-4, 3)) + scale_colour_manual(values = c("grey", colors[5])) ## ----------------------------------------------------------------------------- m1 <- lm(bmi.z.jv ~ `6`, data) m2 <- lm(bmi.z.jv ~ `6` + I(`6`-`4`), data) anova(m1, m2) ## ----t2t, cache=TRUE---------------------------------------------------------- fit <- brokenstick(hgt_z ~ age | id, data = smocc_200, knots = 1:4/2) omega <- get_omega(fit) t2t <- omega + diag(fit$sigma2, ncol(omega)) round(cov2cor(t2t), 2) ## ----t2tm, cache=TRUE--------------------------------------------------------- fit <- brokenstick(hgt_z ~ age | id, data = smocc_200, knots = seq(0, 2, 0.1), cormodel = "argyle") omega <- get_omega(fit) t2t <- omega + diag(fit$sigma2, ncol(omega)) dim(t2t) ## ----weightloss-data, fig.cap="Daily body weight (KG) for 12 subjects followed ## for 63 days under one of three conditions. The graph illustrates the ## irregular nature of the data. Some trajectories are almost complete and ## regular, whereas others miss many daily measurements or display surprising ## jumps around their average weight level.", fig.height=4, warning=FALSE, ## set.theme=TRUE---- data <- brokenstick::weightloss ggplot(data, aes(day, body_weight, group = subject, colour = condition)) + scale_x_continuous(name = "Day", breaks = c(0, 21, 42, 63), minor_breaks = c(7, 14, 28, 35, 49, 56)) + ylab("Body weight (KG)") + geom_line() + geom_point(size = 0.7) + theme_light() + theme(legend.position = "bottom") ## ----fig5, echo=FALSE, eval=FALSE--------------------------------------------- ## ggplot(data, aes(factor(week), body_weight)) + ## geom_boxplot() + ## facet_wrap(vars(subject), scales = "free") + ## theme_light() ## ----weightloss-constant, fig.height=4.5, fig.cap = "Constant model. Observed ## and fitted trajectories for a model that summarises each experimental period ## by a constant. The estimated level is approximately the mean of the ## measurements made during the period. The model produces sudden jumps at ages ## when the condition changes and thus fails to describe the data well.", ## set.theme = TRUE, warning=FALSE---- data <- brokenstick::weightloss fit0 <- brokenstick(body_weight ~ day | subject, data, knots = c(0, 21, 42, 63), degree = 0, method = "lmer", hide = "none") plot(fit0, size_y = 0, color_y = rep("grey", 2), scales = "free_y", xlab = "Day", ylab = "Body weight (KG)", n_plot = 12, ncol = 4) ## ----------------------------------------------------------------------------- prd <- data.frame(predict(fit0, data, x = "knots", shape = "wide")) control <- prd[, 2] diet <- prd[, 3] diet[c(4, 12)] <- prd[c(4, 12), 4] activity <- prd[, 4] activity[c(4, 12)] <- prd[c(4, 12), 3] effects <- 1000 * data.frame( diet_control = diet - control, activity_control = activity - control, activity_diet = activity - diet) round(effects) round(colMeans(effects)) ## ----weightloss-slope, fig.height=4.5, fig.cap = "Broken stick model. Observed ## and fitted trajectories for a model that summarises each experimental period ## by a line. The slope indicates the change during the period. The model ## connects lines from adjacent periods and provides an informative description ## of the data.", set.theme = TRUE, warning=FALSE---- ctl_lmer <- lme4::lmerControl( check.conv.grad = lme4::.makeCC("warning", tol = 0.01)) fit1 <- brokenstick(body_weight ~ day | subject, data, knots = c(0, 21, 42, 63), method = "lmer", control = ctl_lmer, hide = "none") plot(fit1, size_y = 0, color_y = rep("grey", 2), size_yhat = 1.5, scales = "free_y", , xlab = "Day", ylab = "Body weight (KG)", n_plot = 12, ncol = 4) ## ----------------------------------------------------------------------------- prd <- data.frame(predict(fit1, data, x = "knots", shape = "wide")) control <- prd[, 3] - prd[, 2] diet <- prd[, 4] - prd[, 3] diet[c(4, 12)] <- prd[c(4, 12), 5] - prd[c(4, 12), 4] activity <- prd[, 5] - prd[, 4] activity[c(4, 12)] <- prd[c(4, 12), 4] - prd[c(4, 12), 3] effects <- 1000 * data.frame( control = control, diet = diet, activity = activity) round(effects) round(colMeans(effects)) ## ----------------------------------------------------------------------------- df <- data.frame(y = 1000 * c(diet, activity), activity = rep(c(0, 1), each = 12), activity2 = rep(c(rep(0, 3), 1, rep(0, 7), 1), 2)) coef(lm(y ~ 1, data = df)) coef(lm(y ~ activity, data = df)) coef(lm(y ~ activity + activity2, data = df)) ## ----zscores1, echo=TRUE------------------------------------------------------ boy <- data.frame(x = c(1, 14), y = c(52.6, 81.7)) ref <- AGD::nl4.hgt boy$z <- AGD::y2z(y = boy$y, x = boy$x/12, sex = "M", ref = ref) boy$z ## ----interpolationplots, echo = FALSE, fig.height = 3, fig.cap = "The effect ## of three interpolation methods (linear in cm (blue), linear in SDS (red), and ## by the broken stick model (green)). For each method, the vertical line ## indicates the age at which the interpolated curve crosses the mean of the ## age-conditional length distribution. Both linear methods results in ## unrealistic trajectories at intermediate ages. For example, all points on the ## blue lines are too low. Interpolation by the broken stick method is the only ## alternative that correctly describes faster growth during the first months.", ## set.palette = TRUE---- sds <- c(-2, -1, 0, 1, 2) age <- seq(1, 14, 0.5) z <- rep(sds, times = length(age)) x <- rep(age, each = length(sds)) w <- AGD::z2y(z = z, x = x/12, sex = 'M', ref = ref) w <- matrix(w, ncol = length(sds), byrow = TRUE) dimnames(w) <- list(age, sds) mycolors <- c(grDevices::hcl(120, 100, 40, 0.8), grDevices::hcl(240, 100, 40, 0.8), grDevices::hcl(0, 100, 40, 0.8)) oldpar <- par(mfrow = c(1, 2), mar = c(4, 4, 1, 2)) lwd <- 2 # raw scale plot matplot(x = age, y = w, type = "l", lty = 1, col = "grey", lwd = c(1, 1, 2, 1, 1), xlab = "Age (months)", ylab = "Length (cm)") abline(v = 0.95*12, col = mycolors[2], lty = 3) abline(v = 7.5, col = mycolors[3], lty = 3) abline(v = 0.38*12, col = mycolors[1], lty = 3) matpoints(x = boy$x, y = boy$y, pch = 20, cex = 1.5, col = mycolors[2], type = "p") # A: linear in cm matpoints(x = boy$x, y = boy$y, pch = 20, lwd = lwd, col = mycolors[2], type = "l") # B: linear in Z zout <- approx(x = boy$x, y = boy$z, xout = as.numeric(rownames(w)))$y yout <- AGD::z2y(x = age/12, z = zout, ref = ref) matpoints(x = age, y = yout, pch = 20, lwd = lwd, col = mycolors[3], type = "l") # C: brokenstick in Z z <- rep(NA, length(age)) z[1] <- boy$z[1]; z[length(z)] <- boy$z[2] zout <- predict(fit_200, x = age/12, y = z, shape = "vector") yout <- AGD::z2y(x = age/12, z = zout, ref = ref) matpoints(x = age, y = yout, pch = 20, lwd = lwd, col = mycolors[1], type = "l") # Z-scale plot - linear in cm v <- matrix(c(c(1, 14), rep(-2:2, each = 2)), ncol = 6, byrow = FALSE) matplot(x = v[,1], y = v[,2:6], type = "l", lty = 1, col = "grey", lwd = c(1, 1, 1.5, 1, 1), ylim = c(-2.5, 2.5), xlab = "Age (months)", ylab = "Length (SDS)") abline(v = 0.95*12, col = mycolors[2], lty = 3) abline(v = 7.5, col = mycolors[3], lty = 3) abline(v = 0.38*12, col = mycolors[1], lty = 3) # A: linear in cm yout <- approx(x = boy$x, y = boy$y, xout = age)$y zout <- AGD::y2z(x = age/12, y = yout, ref = ref) matpoints(x = boy$x, y = boy$z, pch = 20, cex = 1.5, col = mycolors[2], type = "p") matpoints(x = age, y = zout, pch = 20, lwd = lwd, col = mycolors[2], type = "l") # B: linear in Z zout <- approx(x = boy$x, y = boy$z, xout = age)$y yout <- AGD::z2y(x = age/12, z = zout, ref = ref) matpoints(x = age, y = zout, lwd = lwd, col = mycolors[3], type = "l") # C: brokenstick z <- rep(NA, length(age)) z[1] <- boy$z[1]; z[length(z)] <- boy$z[2] zout <- predict(fit_200, x = age/12, y = z, shape = "vector") yout <- AGD::z2y(x = age/12, z = zout, ref = ref) matpoints(x = age, y = zout, col = mycolors[1], lwd = lwd, type = "l") par(oldpar) ## ----brokenstickinterpolation, echo = TRUE------------------------------------ age <- round(seq(1, 14, 0.5), 3) z <- rep(NA, length(age)) z[1] <- boy$z[1]; z[length(z)] <- boy$z[2] zout <- predict(fit_200, x = age/12, y = z, shape = "vector") yout <- AGD::z2y(x = age/12, z = zout, ref = ref) ## ----mi-code, cache=TRUE, fig.height=3, fig.cap = "Observed data (blue) and 20 ## imputed trajectories (grey) for three subjects from the SMOCC data. Imputed ## trajectories are possible realizations of the observed data in the ## hypothetical case that the observations are made at the specified break ages. ## Note that the pattern of child 10001 is more certain than of child 10022.", ## set.theme = TRUE, warning=FALSE---- knots <- round(c(0, 1, 2, 3, 6, 9, 12, 15, 18, 24)/12, 4) data <- bind_rows(smocc_200[!is.na(smocc_200$hgt_z), ], expand.grid(id = unique(smocc_200$id), age = knots)) fit_kr <- brokenstick(hgt_z ~ age | id, data = data, knots = knots, nimp = 20, seed = 15244) plot(fit_kr, show = c(TRUE, FALSE, TRUE), group = c(10001, 10005, 10022), xlab = "Age (years)", ylab = "Length (SDS)") ## ----t2t-imp------------------------------------------------------------------ cormat <- cor(matrix(t(fit_kr$imp), ncol = length(knots))) dimnames(cormat) <- list(knots, knots) round(cormat, 2) ## ----james, echo=FALSE, fig.height=4, fig.cap = "Curve matching. Predict ## infant length at 14 months given the length data up to 6 months using 10 ## matches. The red curve is the observed data for the target child. The bundle ## of grey curves are observed length curves from other children that match the ## trajectory of the target child up to month 6. The blue line represents the ## most likely future trajectory for the target child towards month 14. The ## variation between the grey curves represents the amount of uncertainty of the ## trajectory prediction.", set.theme = TRUE, warning=FALSE---- knitr::include_graphics("../Figures/JAMES_tryout.png") ## ----cm1, cache=TRUE---------------------------------------------------------- donor_data <- smocc_200 %>% filter(id != "10001") target_data <- smocc_200 %>% filter(id == "10001" & age < 0.51) knots <- round(c(0, 1, 2, 3, 6, 9, 12, 15, 18, 24)/12, 4) fit <- brokenstick(hgt_z ~ age | id, data = donor_data, knots = knots, seed = 15244) ## ----cm2---------------------------------------------------------------------- covariates <- donor_data %>% group_by(id) %>% slice(1) bse <- predict(fit, donor_data, x = "knots", shape = "wide") donors <- bind_cols(covariates, select(bse, -id)) model <- lm(`1.25` ~ `0` + `0.0833` + `0.1667` + `0.25` + `0.5` + sex + ga + bw, data = donors) summary(model) ## ----cm3---------------------------------------------------------------------- donors_pred <- predict(model) names(donors_pred) <- donors$id target <- bind_cols( slice(target_data, 1), select(predict(fit, target_data, x = "knots", shape = "wide"), -id)) target_pred <- predict(model, newdata = target) matches <- sort(abs(donors_pred - target_pred))[1:10] matches ## ----cm4, fig.height=4, fig.cap = "Curve matching. Observed (blue) and fitted ## (red) trajectories of the 10 closest matches for subject 10001. Fitted values ## at the outcome age (1.25y) are close to the predicted value for the target ## child (0.101 SD).", set.theme = TRUE, warning=FALSE---- ids <- as.numeric(names(matches)) plot(fit, group = ids, xlim = c(0, 1.4), size_y = 1, size_yhat = 0, xlab = "Age (years)", ylab = "Length (SDS)", ncol = 5)