## This function generates an nxp matrix of n(0,1) X's with pairwise correlation rho (as described in section 3 of the paper) gen.x = function(n,p,rho){ if(abs(rho)<1){ beta=sqrt(rho/(1-rho)) x0=matrix(rnorm(n*p),ncol=p) z=rnorm(n) x=beta*matrix(z,nrow=n,ncol=p,byrow=F)+x0 } if(abs(rho)==1){ x=matrix(z,nrow=n,ncol=p,byrow=F)} return(x) } ## This function creates true survival times as described in section 3 of the paper. In all simulations we set snr (signal to noise ratio) to 3. gen.times = function(x,snr){ n=nrow(x) p=ncol(x) b=((-1)^(1:p))*exp(-2*( (1:p)-1)/20) f=x%*%b e=rnorm(n) k=sqrt(var(f)/(snr*var(e))) y=exp(f+k*e) return(y) } ## This function creates censor times as described in section 3 of the paper. In all simulations we set snr (signal to noise ratio) to 3. gen.times.censor = function(x,snr){ n=nrow(x) p=ncol(x) b=((-1)^(1:p))*exp(-2*( (1:p)-1)/20) f=x%*%b e=rnorm(n) k=sqrt(var(f)/(snr*var(e))) y=exp(k*e) return(y) } library("glmpath") library("survival") library("glmnet") library("penalized") ##### CALCULATIONS FOR PAPER ##### ## Calculations for Table 1 ## set.seed(100) times.compare <- array(0, c(3,3,6,5)) n <- c(100, 100, 200, 500, 100) p <- c(2000, 5000, 1000, 50, 10000) rho <- c(0, 0.1, 0.2, 0.5, 0.9, 0.95) for(k in 1:3){ for(i in 1:5){ for(j in 1:6){ # Generating Data # x <- gen.x(n[i], p[i], rho[j]) c <- gen.times.censor(x, 3) t <- gen.times(x, 3) status <- t < c y <- apply(cbind(c,t),1,min) # Putting it in proper format # z = Surv(y, status) data <- list(x = x, time = y, status = status) # Fitting coxnet first to figure out the minimum lambda value to use on our path # fit <- glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, maxit = 1000) min.lam <- min(fit$lambda)*n[i] #We divide here, because coxpath uses a different scale for lambda times.compare[k,2,j,i] <- system.time(fit.path<-coxpath(data, standardize = FALSE, min.lambda = min.lam))[1] #calculating the timings for coxpath times.compare[k,1,j,i] <- system.time(fit.net<-glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, lambda = fit.path$lambda/n[i], maxit = 1000))[1] #calculating the timings for coxnet using the path calculated by coxpath times.compare[k,3,j,i]<-system.time(fit.pen<-penalized(z, x, lambda1 = min.lam, lambda2 = 0, model = "cox", steps = length(fit.path$lambda), trace = FALSE))[1] } } } avg.times.compare <- apply(times.compare, c(2,3,4), mean) # Averaging the timings over trials ### Calculations for the first large problem for Coxnet (Second to last row of the table) ### set.seed(100) solo.time <- matrix(0,ncol=6,nrow=3) for(j in 1:6){ for(i in 1:3){ ## Generating data x <- gen.x(200, 40000, rho[j]) c <- gen.times.censor(x, 3) t <- gen.times(x, 3) status <- c < t y <- apply(cbind(c,t),1,min) ## Puting data in proper format z = Surv(y, status) solo.time[i,j] <- system.time(fit <- glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, maxit = 1000, lambda.min = 0.05))[1] ## Calculating the timings for coxnet } } round(apply(solo.time, 2, mean), digits = 3) ## Averaging times over trials ### Calculations for the second large problem for Coxnet (Last row of the table) ### set.seed(100) solo.time.n <- matrix(0,ncol=6,nrow=3) for(j in 1:6){ for(i in 1:3){ ## Generating data x <- gen.x(100000, 500, rho[j]) c <- gen.times.censor(x, 3) t <- gen.times(x, 3) status <- c < t y <- apply(cbind(c,t),1,min) ## Putting data in proper format z = Surv(y, status) solo.time.n[i,j] <- system.time(fit <- glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, maxit = 1000, lambda.min = 0.05))[1] ## Calculating the timings for coxnet } } round(apply(solo.time.n, 2, mean), digits = 3) ## Averaging times over trials