## This function generates an nxp matrix of n(0,1) X's with pairwise correlation rho (as described in section 3 of the paper)

gen.x = function(n,p,rho){
 if(abs(rho)<1){
  beta=sqrt(rho/(1-rho))
 x0=matrix(rnorm(n*p),ncol=p)
 z=rnorm(n)
 x=beta*matrix(z,nrow=n,ncol=p,byrow=F)+x0
 }
 if(abs(rho)==1){ x=matrix(z,nrow=n,ncol=p,byrow=F)}

return(x)
}

## This function creates true survival times as described in section 3 of the paper. In all simulations we set snr (signal to noise ratio) to 3.

gen.times = function(x,snr){
n=nrow(x)
p=ncol(x)
b=((-1)^(1:p))*exp(-2*( (1:p)-1)/20)
f=x%*%b
e=rnorm(n)
k=sqrt(var(f)/(snr*var(e)))
y=exp(f+k*e)
return(y)
}

## This function creates true survival times as described in section 3 of the paper. In all simulations we set snr (signal to noise ratio) to 3.

gen.times.censor = function(x,snr){
n=nrow(x)
p=ncol(x)
b=((-1)^(1:p))*exp(-2*( (1:p)-1)/20)
f=x%*%b
e=rnorm(n)
k=sqrt(var(f)/(snr*var(e)))
y=exp(k*e)
return(y)
}

library("glmpath")
library("survival")
library("glmnet")

### TIMINGS WITH VARYING N (calculations for left part of plot 1) ###
set.seed(150)

timings.varying.n <- array(dim = c(4,5,8)) #Alpha, Trial, cor p, n
alpha <- 1
cor <- 0.5
p <- c(50, 100, 200, 500, 1000)
n <- c(1000, 2000, 5000, 7000, 10000, 13000, 15000, 20000)

for(i in 1:4){
    for(k in 1:5){
      for(l in 1:8){

        # Simulating data
        
        x <- gen.x(n[l], p[k], cor)
        c <- gen.times.censor(x, 3)
        t <- gen.times(x, 3)
        status <- t < c
        y <- apply(cbind(c,t),1,min)

        # Putting data in proper format

        z <- Surv(y, status)
        
          timings.varying.n[i,k,l] <- system.time(net.sol <-  glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, maxit = 1000, lambda.min = 0.05))[1] #Calculating timing
    }
  }
}

mean.n <- apply(timings.varying.n, c(2,3), mean) # Averaging the trials


## Plotting the results ##
pdf("TimingsVaryingN.pdf")
par(cex=1.6)
plot(n, mean.n[1,], pch=".", xlab = "Number of Samples (n)", ylab = "Time (seconds)", main = "Time vs. n", col = 1, ylim=c(0,55))
for(i in 1:5){
  lines(n, mean.n[i,], col=i)
}
legend("topleft", c("p = 1000", "p = 500", "p = 200", "p = 100", "p = 50"), pch=rep("-",5),col = c(5,4,3,2,1), cex=(1.4/1.6))
dev.off()

### Timings Varying P (Calculations for the right part of plot 1) ###

set.seed(200)

timings.varying.p <- array(dim = c(4,7,4)) #Alpha, Trial, cor p, n
alpha <- 1
cor <- 0.5
p <- c(1000, 2000, 5000, 10000, 15000, 20000, 30000)
n <- c(50, 100, 200, 500)

for(i in 1:4){
    for(k in 1:7){
      for(l in 1:4){

        # Simulating data
        
        x <- gen.x(n[l], p[k], cor)
        c <- gen.times.censor(x, 3)
        t <- gen.times(x, 3)
        status <- t < c
        y <- apply(cbind(c,t),1,min)

        # Putting data in proper format
        
        z <- Surv(y, status)
        
          timings.varying.p[i,k,l] <- system.time(net.sol <- glmnet(x,z,family="cox", alpha = 1, standardize = FALSE, maxit = 1000, lambda.min = 0.05))[1] #Calculating timing
      }
    }
  }

mean.p <- apply(timings.varying.p, c(2,3), mean) # Averaging the trials

## Plotting the results ##
pdf("TimingsVaryingP.pdf")
par(cex=1.6)
plot(p, mean.p[,1], pch=".", xlab = "Number of Covariates (p)", ylab = "Time (seconds)", main = "Time vs. p", col = 1, ylim=c(0,140))
for(i in 1:4){
  lines(p, mean.p[,i], col=i)
}
legend("topleft", c("n = 500", "n = 200", "n = 100", "n = 50"), pch=rep("-",4),col = c(4,3,2,1), cex=1.4/1.6)
dev.off()