# Section 5 Package description
## Getting Started
library("FamEvent")
## Section 5.1 Penetrance curves
penplot(base.parms = c(0.01, 3), vbeta = c(-1.3, 2.35), base.dist = "Weibull",
frailty.dist = "gamma", variation = "frailty", depend = 1, agemin = 20)
## Section 5.2 Family data generation
# Simulating 200 families from population-based design with Weibull baseline and familial correlation generated due to shared gamma frailty
set.seed(4321)
fam <- simfam(N.fam = 200, design = "pop+", variation = "frailty", base.dist = "Weibull",
frailty.dist = "gamma", depend = 1, base.parms = c(0.01, 3),
vbeta = c(-1.13, 2.35), allelefreq = 0.02, agemin = 20)
#Printing the simulated data only for family id = 1
head(fam[fam$famID == 1, ], n = 7)
# Summary of the simulated families
summary(fam)
# Pedigree plot for family id = 1
plot(fam, famid = 1)
# Saving the pedigree plot as a pdf file
plot(fam, famid = 1, pdf = TRUE, file = "pedigreeplot.pdf")
## Section 5.3 Penetrance model estimation
# Fitting a penetrance model accounting for the family design “pop+”
fit <- penmodel(Surv(time, status) ~ gender + mgene, cluster = "famID", gvar = "mgene",
parms = c(0.01, 3, -1.13, 2.35), data = fam, design = "pop+",
base.dist = "Weibull", robust = TRUE)
# Summary of the fitted penetrance model
summary(fit)
# Generating penetrance curves with CIs based on the model fit along with Kaplan-Meier plot from data excluding probands
plot(fit, agemax = 80, conf.int = FALSE, add.KM = TRUE)
## Section 5.4 Age-specific penetrance estimatioin with CI and SE
# Penetrance estimates at given ages and given covariate values and their CIs based on 100 Monte Carlo (MC) simulations
penetrance(fit, fixed = c(1,1), age = c(50, 60, 70), CI = TRUE, MC = 100)
### Section 6 Illustrating examples
## Section 6.1 Penetrance estimation in presence of missing genotype data
#Simulating family data including 30% missing genotypes
set.seed(4321)
fam <- simfam(N.fam = 300, design = "pop+", base.dist = "Weibull", allelefreq = 0.02,
base.parms = c(0.01, 3), vbeta = c(0.5, 2), probandage = c(45, 2.5),
agemin = 15, mrate = 0.3)
# True penetrance values for the assumed model
penplot(base.parms = c(0.01,3), vbeta = c(0.5, 2), base.dist = "Weibull", agemin = 15)
# Fitting penetrance model for complete data
fam0 <- fam[ ! is.na(fam$mgene), ]
fit0 <- penmodel(Surv(time, status) ~ gender + mgene, cluster = "famID", gvar = "mgene",
parms = c(0.01, 1, 0.5, 2), data = fam0, design = "pop+", base.dist = "Weibull")
# Summary of the fitted penetrance model for complete data
summary(fit0)
#Penetrance estimates by age 70 for male carrier (fixed = c(1,1)) and female carriers (fixed = c(0,1)) and their CIs based on 100 MC simulations
penetrance(fit0, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
penetrance(fit0, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)
# EM algorithm for analyzing family data with missing genotypes
fitEM <- penmodelEM(Surv(time, status) ~ gender + mgene, cluster = "famID", gvar = "mgene",
parms = c(0.01, 1, 0.5, 2), data = fam, design = "pop+",
base.dist = "Weibull", method = "mendelian")
# Summary of the model fit
summary(fitEM)
# Penetrance estimates by age 70 for male and female carriers and their CIs
penetrance(fitEM, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
penetrance(fitEM, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)
## Section 6.2 Sample size and power calculation
# Simulating 50 POP+ families
set.seed(5432)
fam50 <- simfam(50, design = "pop+", variation = "none", base.dist = "Weibull",
base.parms = c(0.01, 3), vbeta = c(1.0, 1.0), allelefreq = 0.02,
dominant.m = TRUE, mrate = 0, probandage = c(45, 2.5), agemin = 20)
# Fitting a penetrance model for the simulated data
fit50 <- penmodel(Surv(time, status) ~ gender + mgene, cluster = "famID", gvar = "mgene",
parms = c(0.01, 3, 1, 1), data = fam50, design = "pop+",
base.dist = "Weibull", robust = TRUE)
summary(fit50)
# Simulation-based power calculation based on samples of 50 POP+ families for testing $H_0: \beta_{gene} = 0 \quad vs. \quad H_1: \beta_{gene} > 0$ when the true effect size $\beta_{gene}=1$.
fampower(N.fam = 50, N.sim = 100, effectsize = 1, beta.sex = 1, side = 1,
base.dist = "Weibull", design = "pop+", base.parms = c(0.01, 3),
probandage = c(45, 2.5), agemin = 20)
# Penetrance estimates by age 70 for male and female carriers based on the model fit with the 50 simulated families
penetrance(fit50, fixed=c(1,1), age=70, CI=TRUE, MC=100)
penetrance(fit50, fixed=c(0,1), age=70, CI=TRUE, MC=100)
# Simulating 200 POP+ families
set.seed(5432)
fam200 <- simfam(200, design = "pop+", variation = "none", base.dist = "Weibull",
base.parms = c(0.01, 3), vbeta = c(1.0, 1.0), allelefreq = 0.02,
dominant.m = TRUE, mrate = 0, probandage = c(45,2.5), agemin = 20)
# Fitting a penetrance model for the simulated data
fit200 <- penmodel(Surv(time, status) ~ gender + mgene, cluster = "famID", gvar = "mgene",
parms = c(0.01, 3, 1, 1), data = fam200, design = "pop+",
base.dist = "Weibull", robust = TRUE)
summary(fit200)
# Penetrance estimates by age 70 for male and female carriers based on the model fit with the 200 simulated families
penetrance(fit200, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
penetrance(fit200, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)
## Section 6.3 Optimal designs
# Simulating three sets of family data: high, intermediate and low riks families
set.seed(5432)
fam.high <- simfam(150, design = "cli+", variation = "none", base.dist = "Weibull",
base.parms = c(0.01, 3), vbeta = c(1.0, 1.5), allelefreq = 0.02,
dominant.m = TRUE, mrate = 0, probandage = c(45,2.5), agemin = 20)
fam.med <- simfam(150, design = "pop+", variation = "none", base.dist = "Weibull",
base.parms = c(0.01, 3), vbeta = c(1, 0.8), allelefreq = 0.02,
dominant.m = TRUE, mrate = 0, probandage = c(45, 2.5), agemin = 20)
fam.low <- simfam(150, design = "pop", variation = "none", base.dist = "Weibull",
base.parms = c(0.01, 3), vbeta = c(1, 0.5), allelefreq = 0.02,
dominant.m = TRUE, mrate = 0, probandage = c(45, 2.5), agemin = 20)
# Fitting penetrance models for each dataset
fit.high <- penmodel(Surv(time, status) ~ gender + mgene, cluster = "famID",
gvar = "mgene", parms = c(0.01, 3, 1.0, 1.5), data = fam.high,
design = "cli+", base.dist = "Weibull")
fit.med <- penmodel(Surv(time, status) ~ gender + mgene, cluster = "famID",
gvar = "mgene", parms = c(0.01, 3, 1.0, 0.8), data = fam.med,
design = "pop+", base.dist = "Weibull")
fit.low <- penmodel(Surv(time, status) ~ gender + mgene, cluster = "famID",
gvar = "mgene", parms = c(0.01, 3, 1.0, 0.5), data = fam.low,
design = "pop", base.dist = "Weibull")
# Summary of the model fits
summary(fit.high)
summary(fit.med)
summary(fit.low)
# Optimal designs to minimize the variance for estimating the log hazard ratio between mutation carriers and noncarriers.
se.rr <- c(summary(fit.high)$estimates[4, 2], summary(fit.med)$estimates[4, 2],
summary(fit.low)$estimates[4, 2])
se.rr
var.rr <- se.rr^2
wt <- 1/var.rr
300*wt/sum(wt)
# Optimal design based on variances for male penetrance
set.seed(5432)
phigh.m <- penetrance(fit.high, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
pmed.m <- penetrance(fit.med, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
plow.m <- penetrance(fit.low, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
se.male <- c(phigh.m$se, pmed.m$se, plow.m$se)
se.male
var.male <- se.male^2
wt <- 1/var.male
300*wt/sum(wt)
# Optimal design based on variances for female penetrance
set.seed(5432)
phigh.w <- penetrance(fit.high, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)
pmed.w <- penetrance(fit.med, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)
plow.w <- penetrance(fit.low, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)
se.female <- c(phigh.w$se, pmed.w$se, plow.w$se)
se.female
var.female <- se.female^2
wt <- 1/var.female
300*wt/sum(wt)
## Section 6.4 Real data analysis
# Loading the LSfam data and assigning the minimum age
data("LSfam", package = "FamEvent")
attr(LSfam, "agemin") <- 18
# Penetrance model with Weibull baseline hazard
fitLS.Weibull <- penmodelEM(Surv(time, status) ~ gender + mgene, cluster = "famID",
gvar = "mgene", base.dist = "Weibull", design = "pop+",
parms = c(0.05, 2, 1, 3), method = "mendelian",
data = LSfam[ ! is.na(LSfam$time), ])
summary(fitLS.Weibull)
# AIC value
fitLS.Weibull$AIC
# Plotting the penetrance curves based on the fitted model
plot(fitLS.Weibull, add.KM = FALSE, conf.int = TRUE, print = FALSE)
# Penetrance estimates by age 70 for male and female carriers based on the Weibull model
penetrance(fitLS.Weibull, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
penetrance(fitLS.Weibull, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)
# Penetrance model with a piecewise constant baseline
fitLS.piece <- penmodelEM(Surv(time, status) ~ gender + mgene, cluster = "famID",
gvar = "mgene", base.dist = "piecewise", design = "pop+",
parms = c(rep(0.01, 5), 1, 1.5),
cuts = c(30, 40, 50, 60), method = "mendelian",
data = LSfam[ ! is.na(LSfam$time), ])
summary(fitLS.piece)
# AIC value
fitLS.piece$AIC
# Plotting the penetrance curves based on the piecewise constant model
plot(fitLS.piece, add.KM = FALSE, conf.int = TRUE, ylim = c(0,1), print = FALSE)
# Penetrance estimates by age 70 for male and female carriers based on the piecewise constant model
penetrance(fitLS.piece, fixed = c(1,1), age = 70, CI = TRUE, MC = 100)
penetrance(fitLS.piece, fixed = c(0,1), age = 70, CI = TRUE, MC = 100)