## This function generates an nxp matrix of n(0,1) X's with pairwise correlation rho (as described in section 3 of the paper) gen.x = function(n,p,rho){ if(abs(rho)<1){ beta=sqrt(rho/(1-rho)) x0=matrix(rnorm(n*p),ncol=p) z=rnorm(n) x=beta*matrix(z,nrow=n,ncol=p,byrow=F)+x0 } if(abs(rho)==1){ x=matrix(z,nrow=n,ncol=p,byrow=F)} return(x) } ## This function creates true survival times as described in section 3 of the paper. In all simulations we set snr (signal to noise ratio) to 3. gen.times = function(x,snr){ n=nrow(x) p=ncol(x) b=((-1)^(1:p))*exp(-2*( (1:p)-1)/20) f=x%*%b e=rnorm(n) k=sqrt(var(f)/(snr*var(e))) y=exp(f+k*e) return(y) } ## This function creates true survival times as described in section 3 of the paper. In all simulations we set snr (signal to noise ratio) to 3. gen.times.censor = function(x,snr){ n=nrow(x) p=ncol(x) b=((-1)^(1:p))*exp(-2*( (1:p)-1)/20) f=x%*%b e=rnorm(n) k=sqrt(var(f)/(snr*var(e))) y=exp(k*e) return(y) } library("glmpath") library("survival") library("glmnet") ### TIMINGS WITH VARYING N (calculations for left part of plot 1) ### set.seed(150) timings.varying.n <- array(dim = c(4,5,8)) #Alpha, Trial, cor p, n alpha <- 1 cor <- 0.5 p <- c(50, 100, 200, 500, 1000) n <- c(1000, 2000, 5000, 7000, 10000, 13000, 15000, 20000) for(i in 1:4){ for(k in 1:5){ for(l in 1:8){ # Simulating data x <- gen.x(n[l], p[k], cor) c <- gen.times.censor(x, 3) t <- gen.times(x, 3) status <- t < c y <- apply(cbind(c,t),1,min) # Putting data in proper format z <- Surv(y, status) timings.varying.n[i,k,l] <- system.time(net.sol <- glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, maxit = 1000, lambda.min = 0.05))[1] #Calculating timing } } } mean.n <- apply(timings.varying.n, c(2,3), mean) # Averaging the trials ## Plotting the results ## pdf("TimingsVaryingN.pdf") par(cex=1.6) plot(n, mean.n[1,], pch=".", xlab = "Number of Samples (n)", ylab = "Time (seconds)", main = "Time vs. n", col = 1, ylim=c(0,55)) for(i in 1:5){ lines(n, mean.n[i,], col=i) } legend("topleft", c("p = 1000", "p = 500", "p = 200", "p = 100", "p = 50"), pch=rep("-",5),col = c(5,4,3,2,1), cex=(1.4/1.6)) dev.off() ### Timings Varying P (Calculations for the right part of plot 1) ### set.seed(200) timings.varying.p <- array(dim = c(4,7,4)) #Alpha, Trial, cor p, n alpha <- 1 cor <- 0.5 p <- c(1000, 2000, 5000, 10000, 15000, 20000, 30000) n <- c(50, 100, 200, 500) for(i in 1:4){ for(k in 1:7){ for(l in 1:4){ # Simulating data x <- gen.x(n[l], p[k], cor) c <- gen.times.censor(x, 3) t <- gen.times(x, 3) status <- t < c y <- apply(cbind(c,t),1,min) # Putting data in proper format z <- Surv(y, status) timings.varying.p[i,k,l] <- system.time(net.sol <- glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, maxit = 1000, lambda.min = 0.05))[1] #Calculating timing } } } mean.p <- apply(timings.varying.p, c(2,3), mean) # Averaging the trials ## Plotting the results ## pdf("TimingsVaryingP.pdf") par(cex=1.6) plot(p, mean.p[,1], pch=".", xlab = "Number of Covariates (p)", ylab = "Time (seconds)", main = "Time vs. p", col = 1, ylim=c(0,140)) for(i in 1:4){ lines(p, mean.p[,i], col=i) } legend("topleft", c("n = 500", "n = 200", "n = 100", "n = 50"), pch=rep("-",4),col = c(4,3,2,1), cex=1.4/1.6) dev.off()