R version 2.15.1 (2012-06-22) -- "Roasted Marshmallows" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. [Previously saved workspace restored] > # -------------------------------------------------------------------- > # > # Commentary: > # > # All results presented in the paper were obtained on > # a GNU/Linux 2.6.4 machine, using 48 kernels in parallel (doMC). > # > # On different computers, deviations may occur. We observe differences > # after the 4th decimal for the results involving either of the methods > # random survival forest, cforest. > # > # -------------------------------------------------------------------- > > library("pec") Loading required package: prodlim Loading required package: KernSmooth KernSmooth 2.23 loaded Copyright M. P. Wand 1997-2009 Loading required package: survival Loading required package: splines Loading required package: rms Loading required package: Hmisc Attaching package: ‘Hmisc’ The following object(s) are masked from ‘package:survival’: untangle.specials The following object(s) are masked from ‘package:base’: format.pval, round.POSIXt, trunc.POSIXt, units Attaching package: ‘rms’ The following object(s) are masked from ‘package:survival’: Surv > library("survival") > library("rms") > library("randomSurvivalForest") randomSurvivalForest 3.6.3 Type rsf.news() to see new features, changes, and bug fixes. > library("party") Loading required package: grid Loading required package: modeltools Loading required package: stats4 Attaching package: ‘modeltools’ The following object(s) are masked from ‘package:rms’: Predict Loading required package: coin Loading required package: mvtnorm Loading required package: zoo Attaching package: ‘zoo’ The following object(s) are masked from ‘package:base’: as.Date Loading required package: sandwich Loading required package: strucchange Loading required package: vcd Loading required package: MASS Loading required package: colorspace > library("doMC") Loading required package: foreach Loading required package: iterators Loading required package: codetools Loading required package: multicore > data("cost") > extends <- function(...) TRUE > print(version) _ platform x86_64-unknown-linux-gnu arch x86_64 os linux-gnu system x86_64, linux-gnu status major 2 minor 15.1 year 2012 month 06 day 22 svn rev 59600 language R version.string R version 2.15.1 (2012-06-22) nickname Roasted Marshmallows > print(sessionInfo()) R version 2.15.1 (2012-06-22) Platform: x86_64-unknown-linux-gnu (64-bit) locale: [1] LC_CTYPE=en_US.UTF8 LC_NUMERIC=C LC_TIME=en_US.UTF8 [4] LC_COLLATE=en_US.UTF8 LC_MONETARY=en_US.UTF8 LC_MESSAGES=en_US.UTF8 [7] LC_PAPER=C LC_NAME=C LC_ADDRESS=C [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF8 LC_IDENTIFICATION=C attached base packages: [1] stats4 grid splines stats graphics grDevices utils datasets methods [10] base other attached packages: [1] doMC_1.2.5 multicore_0.1-7 foreach_1.3.2 [4] codetools_0.2-8 iterators_1.0.5 party_1.0-2 [7] vcd_1.2-12 colorspace_1.1-0 MASS_7.3-18 [10] strucchange_1.4-6 sandwich_2.2-8 zoo_1.7-4 [13] coin_1.0-20 mvtnorm_0.9-9991 modeltools_0.2-19 [16] randomSurvivalForest_3.6.3 pec_2.1.9 rms_3.5-0 [19] Hmisc_3.8-3 prodlim_1.2.9 survival_2.36-14 [22] KernSmooth_2.23-7 loaded via a namespace (and not attached): [1] cluster_1.14.2 lattice_0.20-6 lava_0.9-42 RegisterData_1.7 tools_2.15.1 > > # -------------------------------------------------------------------- > # Section 3.3 > # -------------------------------------------------------------------- > # 'New' data: > newData <- data.frame(age=as.integer(c(28,74,95)), + sex=factor("female",levels=c("female", "male")), + hypTen=factor("no",levels=c("no","yes")), + ihd=factor("no",levels=c("no","yes")), + prevStroke=factor("no",levels=c("no","yes")), + othDisease=factor("no",levels=c("no","yes")), + alcohol=factor("no",levels=c("no","yes")), + diabetes=factor("no",levels=c("no","yes")), + smoke=factor("no",levels=c("no","yes")), + atrialFib=factor("no",levels=c("no","yes")), + hemor=factor("no",levels=c("no","yes")), + strokeScore=as.integer(38), + cholest=6) > print(newData) age sex hypTen ihd prevStroke othDisease alcohol diabetes smoke atrialFib hemor strokeScore 1 28 female no no no no no no no no no 38 2 74 female no no no no no no no no no 38 3 95 female no no no no no no no no no 38 cholest 1 6 2 6 3 6 > > # -------------------------------------------------------------------- > # Fitting the modelling strategies > # -------------------------------------------------------------------- > > fitform <- Surv(time,status) ~ age + sex + hypTen + ihd + prevStroke + + othDisease + alcohol + diabetes + smoke + atrialFib + hemor + + strokeScore + cholest > fitcox <- selectCox(fitform, data = cost, rule = "aic") > set.seed(13) > fitrsf <- rsf(fitform, data = cost, forest = TRUE, ntree = 1000) > set.seed(13) > fitcforest <- pecCforest(fitform, data = cost, controls = cforest_classical(ntree = 1000)) > > # -------------------------------------------------------------------- > # Table 2: Survival predictions > # -------------------------------------------------------------------- > > pcox <- predictSurvProb(fitcox, newdata = newData, times = 10 * 365.25) > prsf <- predictSurvProb(fitrsf, newdata = newData, times = 10 * 365.25) > pcf <- predictSurvProb(fitcforest, newdata = newData, times = 10 * 365.25) > psp <- round(cbind(pcox,prsf, pcf)*100,2) > newNames <- paste("\\code{newData}", 1:3) > mat <- cbind(newNames, newData$age, psp) > colnames(mat) <- c("Id", "Age", "selected Cox regression", "randomSurvivalForest", "cforest") > print(mat) Id Age selected Cox regression randomSurvivalForest cforest [1,] "\\code{newData} 1" "28" "86.05" "54.43" "47.73" [2,] "\\code{newData} 2" "74" "24.17" "35.98" "20.66" [3,] "\\code{newData} 3" "95" "1.91" "13.03" "9.74" > > # -------------------------------------------------------------------- > # Figure 1 > # -------------------------------------------------------------------- > png("Figure1.png",width=7,height=7/2.5,units= 'in',res=300) > par(mfrow=c(1,3),mar=c(5,4,4,2),las=1,cex.axis=1.1, cex.lab=1.1,oma=c(2,0,0,0), xpd=NA) > silent <- lapply(1:3, function(x){ + plotPredictSurvProb(fitcox, + newdata=newData[x,], + xlim=c(0,10*365.25), + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2), + xlab="Years", + lty=1, + ylab="", + lwd=2, + legend=FALSE, + percent=TRUE) + plotPredictSurvProb(fitrsf, + newdata=newData[c(x,x),], + xlim=c(0,10*365.25), + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2), + add=TRUE, + col=1, + lty=2, + ylab="", + lwd=2, + legend=FALSE, + percent=TRUE) + plotPredictSurvProb(fitcforest, + newdata=newData[x,], + xlim=c(0,10*365.25), + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2), + add=TRUE, + lty=3, + ylab="", + lwd=2, + legend=FALSE, + percent=TRUE) + text(0, 1.1,"Probability", xpd=NA, adj=1) + }) > width <- 7 > leg <- legend(0,0, legend=c("Selected Cox regression", + "randomSurvivalForest", + "cforest"), lty=1:3, lwd=2, plot=FALSE, horiz=TRUE) > xcoord <- grconvertX(width/2, "inches", "user")-leg$rect$w/2 > ycoord <- grconvertY(0, "inches", "user")+leg$rect$h > legend(xcoord,ycoord, legend=c("selected Cox regression", + "randomSurvivalForest", + "cforest"), lty=1:3, lwd=2,bty="n", cex=1.1, horiz=TRUE) > dev.off() null device 1 > > # -------------------------------------------------------------------- > # Section 5.1: > # -------------------------------------------------------------------- > > # Bootstrap cross-validation > # WARNING: long run times can be expected (in sequential mode) > # > run <- TRUE > if (run){ + set.seed(13) + registerDoMC() + print(system.time(fitpec <- pec(list("selectcox" = fitcox, + "rsf" = fitrsf, + "cforest" = fitcforest), + data = cost, + formula = Surv(time, status) ~ 1, + splitMethod = "Boot632plus", + B = 1000, + M = 350, + keep.index = TRUE, + keep.matrix = TRUE))) + save(fitpec,file="fitpec.rda") + } else { + load(file="fitpec-31August2012.rda") + } No covariates specified: Kaplan-Meier for censoring times used for weighting. Computing noinformation error using all permutations Split sample loop (B=1000) 100 200 300 400 500 600 700 800 900 1000 user system elapsed 24224.226 37.693 1246.278 > print(fitpec) Prediction error curves Prediction models: $KaplanMeier prodlim(formula = Surv(time, status) ~ 1) $selectcox selectCox(formula = fitform, data = cost, rule = "aic") $rsf rsf(formula = fitform, data = cost, ntree = 1000, forest = TRUE) $cforest pecCforest(formula = fitform, data = cost, controls = cforest_classical(ntree = 1000)) rightCensored response of a survival model No.Observations: 518 Pattern: Freq event 404 right.censored 114 IPCW: marginal model Method for estimating the prediction error: Bootstrap cross-validation Type: subsampling 350 out of 518 No. bootstrap samples: 1000 Sample size: 518 __________________________________________________ Cumulative prediction error, aka Integrated Brier score (IBS) aka Cumulative rank probability score Range of integration: 0 and time=4215 : Integrated Brier score (crps): IBS[0;time=4215] KaplanMeier 0.201 selectcox 0.169 rsf 0.160 cforest 0.171 > > # -------------------------------------------------------------------- > # Figure 2: > # -------------------------------------------------------------------- > png("Figure2.png", width=7.2,height=5,units = "in",res=300) > par(mar=c(5,5,2,2),las=1,cex.axis=1.1, cex.lab=1.1,oma=c(0,0,0,10), xpd=NA) > plot(fitpec, + what="Boot632plusErr", + xlim=c(0,10*365.25), + xlab="Years", + ylab="", + col=c("gray","black","black","black"), + lty=c(1,1,2,3), + lwd=2.5, + legend=FALSE, + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2)) > text(x=0.1,y=0.31,"Prediction error",xpd=NA,adj=c(1,-1)) > width <- 6.1 > height <- 5 > leg <- legend(0,0, text.width=4.9,legend=c("KaplanMeier", + "Selected Cox\nregression", + "randomSurvivalForest", + "cforest"),lty=c(1,1,2,3), lwd=2.5, plot=FALSE) > xcoord <- grconvertX(width-1.1, "inches", "user")-leg$rect$w/2 > ycoord <- grconvertY(height-2.5, "inches", "user")+leg$rect$h > legend(xcoord, + ycoord, + text.width=4.9, + legend=c("Kaplan-Meier","Selected Cox\nregression", "randomSurvivalForest", "cforest"), + lty=c(1,1,2,3), + lwd=2.5, + bty="n", + cex=1.1, + col=c("gray","black","black","black")) > dev.off() null device 1 > # -------------------------------------------------------------------- > # Figure 3: > # -------------------------------------------------------------------- > png("Figure3.png",width=7.2,height=5,units = "in",res=300) > par(mar=c(5,5,2,2),las=1,cex.axis=1.1, cex.lab=1.1,oma=c(0,0,0,10), xpd=NA) > plot(fitpec, + what="BootCvErr", + xlim=c(0,10*365.25), + xlab="Years", + ylab="", + col=c("gray","black","black","black"), + lty=c(1,1,2,3), + lwd=2.5, + legend=FALSE, + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2)) > text(x=0.1,y=0.31,"Prediction error",xpd=NA,adj=c(1,-1)) > width <- 6.1 > height <- 5 > leg <- legend(0,0,text.width=4.9,legend=c("KaplanMeier", + "Selected Cox\nregression", + "randomSurvivalForest", + "cforest"),lty=c(1,1,2,3),lwd=2.5,plot=FALSE) > xcoord <- grconvertX(width-1.1, "inches", "user")-leg$rect$w/2 > ycoord <- grconvertY(height-2.5, "inches", "user")+leg$rect$h > legend(xcoord, + ycoord, + text.width=4.9, + legend=c("Kaplan-Meier","Selected Cox\nregression","randomSurvivalForest","cforest"), + lty=c(1,1,2,3), + lwd=2.5, + bty="n", + cex=1.1, + col=c("gray","black","black","black")) > dev.off() null device 1 > > # -------------------------------------------------------------------- > # Figure 4: > # -------------------------------------------------------------------- > png("Figure4.png",width=5,height=8,units= "in",res=300) > par(mfrow=c(3,1),mar=c(5,5,4,2),las=1,cex.axis=1.1, cex.lab=1.1,oma=c(0,0,0,10), xpd=NA) > MB=100 > plot(fitpec, + what="Boot632plusErr", + models=2, + xlim=c(0,10*365.25), + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2), + special=TRUE, + special.maxboot=MB, + special.addprederr=c("AppErr", "BootCvErr","NoInfErr"), + legend=FALSE, + ylab="", + xlab="Years", + lwd=2.5) > title("Selected Cox regression") > text(x=0.1,y=0.31,"Prediction error",xpd=NA,adj=c(1,-1)) > plot(fitpec, + what="Boot632plusErr", + models=3, + xlim=c(0,10*365.25), + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2), + special=TRUE, + special.maxboot=MB, + special.addprederr=c("AppErr", "BootCvErr","NoInfErr"), + legend=FALSE, + ylab="", + xlab="Years", + lwd=2.5) > title("randomSurvivalForest") > text(x=0.1,y=0.31,"Prediction error",xpd=NA,adj=c(1,-1)) > plot(fitpec, + what="Boot632plusErr", + models=4, + xlim=c(0,10*365.25), + axis1.at=seq(0,10*365.25, 2*365.25), + axis1.label=seq(0,10,2), + special=TRUE, + special.maxboot=MB, + special.addprederr=c("AppErr", "BootCvErr","NoInfErr"), + legend=FALSE, + ylab="", + xlab="Years", + lwd=2.5) > title("cforest") > text(x=0.1,y=0.31,"Prediction error",xpd=NA,adj=c(1,-1)) > width <- 5 > height <- 7 > leg <- legend(0,0, legend=c("Boot632plusErr", "AppErr","BootCvErr", "NoInfErr"), lty=1:4, col=1,lwd=2.5, plot=FALSE) > xcoord <- grconvertX(width-0.8, "inches", "user")-leg$rect$w/2 > ycoord <- grconvertY(height-2, "inches", "user")+leg$rect$h > legend(xcoord,ycoord, legend=c("Boot632plusErr", "AppErr","BootCvErr", "NoInfErr"), lty=1:4, col=1,lwd=2,bty="n", cex=1.1) > dev.off() null device 1 > > proc.time() user system elapsed 24290.314 39.138 1324.198