#########################################################################################
# Replication file for Cattaneo, Feng, Palomba, and Titiunik - scpi
#########################################################################################

# load required packages
library("scpi")
library("ggplot2")
library("latex2exp")

# specs for ggplot
img.width  <- 6
img.height <- 4.5
dpi        <- "retina"

sims <- 1000 # number of replications

#########################################################################################
# One feature (gdp)
#########################################################################################

#########################################################################################
# Load data
data <- scpi_germany

## Set parameters for data preparation
id.var      <- "country"                     # ID variable
time.var    <- "year"                        # Time variable
period.pre  <- (1960:1990)                   # Pre-treatment period
period.post <- (1991:2003)                   # Post-treatment period
unit.tr     <- "West Germany"                # Treated unit
unit.co     <- setdiff(unique(data$country), unit.tr) # Donors pool
outcome.var <- "gdp"                         # Outcome variable
constant    <- TRUE                          # Include constant term
cointegrated.data <- TRUE                    # Cointegrated data

## Data preparation
df  <-   scdata(df = data, id.var = id.var, time.var = time.var, 
  outcome.var = outcome.var, period.pre = period.pre, 
  period.post = period.post, unit.tr = unit.tr, 
  constant = constant, 
  unit.co = unit.co, cointegrated.data = cointegrated.data)

## Estimate Synthetic Control with a simplex-type constraint (default)
res.est <- scest(data = df, w.constr = list(name = "simplex"))
summary(res.est)

## Quantify uncertainty
sims     <- sims         # Number of simulations
u.order  <- 1            # Degree of polynomial in B and C when modelling u
u.lags   <- 0            # Lags of B to be used when modelling u
u.sigma  <- "HC1"        # Estimator for the variance-covariance of u
u.missp  <- TRUE         # If TRUE then the model is treated as misspecified
e.order  <- 1            # Degree of polynomial in B and C when modelling e
e.lags   <- 0            # Lags of B to be used when modelling e
e.method <- "gaussian"   # Estimation method for out-of-sample uncertainty
lgapp    <- "linear"     # Local geometry approximation
cores    <- 1            # Number of cores to be used by scpi

set.seed(8894)
res.pi  <- scpi(data = df, sims = sims, e.method = e.method, e.order = e.order, e.lags = e.lags, 
  u.order = u.order, u.lags = u.lags, u.sigma = u.sigma, u.missp = u.missp, 
  cores = cores, w.constr = list(name = "simplex"), lgapp = lgapp) 


# visualize results
plot <- scplot(result = res.pi, plot.range = (1960:2003), 
  label.xy = list(x.lab = "Year", y.lab = "GDP per capita (thousand US dollars)"), 
  x.ticks = c(1960, 1970, 1980, 1990, 2000, 2003), e.out = TRUE)

plot$plot_out + ggtitle("")


methods <- c("lasso", "ols", "ridge", "L1-L2")
for (method in methods) {
  
  if (method %in%  c("ridge", "L1-L2")) lgapp <- "generalized"
  
  set.seed(8894)
  res.pi  <- scpi(data = df, sims = sims, e.method = e.method, e.order = e.order, 
    e.lags = e.lags, u.order = u.order, u.lags = u.lags, 
    u.sigma = u.sigma, u.missp = u.missp, lgapp = lgapp, 
    cores = cores, w.constr = list(name = method))
  
  # visualize results
  plot <- scplot(result = res.pi, plot.range = (1960:2003), 
    label.xy = list(x.lab = "Year", y.lab = "GDP per capita (thousand US dollars)"), 
    x.ticks = c(1960, 1970, 1980, 1990, 2000, 2003), e.out = TRUE)
  
  print(plot$plot_out + ggtitle(""))
}

########################################################
# Sensitivity Analysis for 1997 using subgaussian bounds
########################################################
set.seed(8894)
res.si  <- scpi(data = df, sims = sims, e.method = "gaussian", e.order = e.order, e.lags = e.lags, 
  u.order = u.order, u.lags = u.lags, u.sigma = u.sigma, u.missp = u.missp, 
  cores = cores, w.constr = list(name = "simplex"), lgapp = "linear") 

e.alpha <- 0.05  # default level in scpi
sens <- c(0.25, 0.5, 1, 1.5, 2)
time <- c(1997)
emean <- res.si$inference.results$e.mean
esig <- sqrt(res.si$inference.results$e.var)
sc.l.0 <- res.si$inference.results$CI.in.sample[, 1, drop = FALSE]
sc.r.0 <- res.si$inference.results$CI.in.sample[, 2, drop = FALSE]
y <- res.si$data$Y.post

for (l in 1:length(time)) {
  ssc.l.1 <- ssc.r.1 <- c()
  e.mean <- emean[time[l] - 1990]
  sig <- esig[time[l] - 1990]
  sig.seq <- sens*sig
  
  for (s in 1:length(sig.seq)) {
    eps  <- sqrt(-log(e.alpha / 2) * 2 * (sig.seq[s]^2))
    ssc.l.1[s] <- sc.l.0[time[l] - 1990] + e.mean - eps
    ssc.r.1[s] <- sc.r.0[time[l] - 1990] + e.mean + eps
  }
  
  sen.dat <- data.frame(t = c(1:5), lb1 = ssc.l.1, ub1 = ssc.r.1, 
    lb = rep(sc.l.0[time[l] - 1990], 5), 
    ub = rep(sc.r.0[time[l] - 1990], 5), 
    lab = as.factor(sens))
  plot <- ggplot() + theme_bw() +
    theme(panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank()) +
    labs(x = "sd. of e", y = "GDP per capita (thousand US dollars)")
  plot <- plot + geom_errorbar(data = sen.dat, aes(x = lab, ymin = lb1, ymax = ub1), 
    col = "maroon", width = 0.2, linetype = 5) +
    geom_errorbar(data = sen.dat, aes(x = lab, ymin = lb, ymax = ub), 
      col = "blue", width = 0.2, linetype = 1) +
    geom_hline(yintercept = y[time[l] - 1990], linetype = 1, size = 0.3, alpha = 0.8) +
    annotate("text", x = 5.4, y = y[time[l] - 1990]-.1, label = "Y(1)", size = 3.5) +
    scale_x_discrete(labels = c(parse(text = TeX("$0.25\\hat{\\sigma}$")), 
      parse(text = TeX("$0.5\\hat{\\sigma}$")), 
      parse(text = TeX("$\\hat{\\sigma}$")), 
      parse(text = TeX("$1.5\\hat{\\sigma}$")), 
      parse(text = TeX("$2\\hat{\\sigma}$"))))
  print(plot)
}

#########################################################################################
# Multiple features (gdp, trade)
#########################################################################################

#########################################################################################
# Load data
data <- scpi_germany

## Set parameters for data preparation
id.var      <- "country"                     # ID variable
time.var    <- "year"                        # Time variable
period.pre  <- (1960:1990)                   # Pre-treatment period
period.post <- (1991:2003)                   # Post-treatment period
unit.tr     <- "West Germany"                # Treated unit
unit.co     <- unique(data$country)[-7]      # Donors pool
outcome.var <- "gdp"                         # Outcome variable
cointegrated.data <- TRUE                    # Cointegrated data
features    <- c("gdp", "trade")              # Features to match
cov.adj     <- list(c("constant"))

## Data preparation
df  <-   scdata(df = data, id.var = id.var, time.var = time.var, 
  outcome.var = outcome.var, period.pre = period.pre, 
  period.post = period.post, unit.tr = unit.tr, 
  features = features, unit.co = unit.co, 
  cointegrated.data = cointegrated.data, cov.adj = cov.adj)

## Estimate Synthetic Control with a simplex-type constraint (default)
res.est <- scest(data = df, w.constr = list(name = "simplex"))
summary(res.est)

## Quantify uncertainty
sims     <- sims         # Number of simulations
u.order  <- 1            # Degree of polynomial in B and C when modelling u
u.lags   <- 0            # Lags of B to be used when modelling u
u.sigma  <- "HC1"        # Estimator for the variance-covariance of u
u.missp  <- TRUE         # If TRUE then the model is treated as misspecified
e.order  <- 1            # Degree of polynomial in B and C when modelling e
e.lags   <- 0            # Lags of B to be used when modelling e
e.method <- "gaussian"   # Estimation method for out-of-sample uncertainty
lgapp    <- "linear"     # Local geometry approximation

set.seed(8894)
res.pi <- scpi(data = df, sims = sims, e.method = e.method, e.order = e.order, e.lags = e.lags, 
  u.order = u.order, u.lags = u.lags, u.sigma = u.sigma, u.missp = u.missp, 
  cores = cores, w.constr = list(name = "simplex"), lgapp = "linear") 


# visualize results
plot <- scplot(result = res.pi, plot.range = (1960:2003), 
  label.xy = list(x.lab = "Year", y.lab = "GDP per capita (thousand US dollars)"), 
  x.ticks = c(1960, 1970, 1980, 1990, 2000, 2003), joint = TRUE)

plot$plot_out + ggtitle("") 


methods <- c("lasso", "ols", "L1-L2", "ridge")

for (method in methods) {
  if (method %in%  c("ridge", "L1-L2")) lgapp <- "generalized"
  
  set.seed(8894)
  res.pi  <- scpi(data = df, sims = sims, e.method = e.method, e.order = e.order, 
    e.lags = e.lags, u.order = u.order, u.lags = u.lags, lgapp = lgapp, 
    u.sigma = u.sigma, u.missp = u.missp, 
    cores = cores, w.constr = list(name = method)) 
  
  # visualize results
  plot <- scplot(result = res.pi, plot.range = (1960:2003), 
    label.xy = list(x.lab = "Year", y.lab = "GDP per capita (thousand US dollars)"), 
    x.ticks = c(1960, 1970, 1980, 1990, 2000, 2003), e.out = TRUE, joint = TRUE)
  
  print(plot$plot_out + ggtitle(""))
}

rm(list = ls(all = TRUE))
pacman::p_load(CVXR, nloptr, ECOSolveR, haven, doSNOW, parallel, doRNG, scpi)

## code to reproduce Table 4
source("code-table.R")

set.seed(8894)

simuls <- 100
cores <- 1
type.constr <- "simplex"

adh2015 <- scpi_germany

df <- scdata(df = adh2015, id.var = "country", time.var = "year", 
  outcome.var = "gdp", period.pre = (1960:1990), 
  period.post = (1991:2003), unit.tr = "West Germany", 
  constant = TRUE, unit.co = unique(adh2015$country)[-7],
  cointegrated.data = TRUE)

aux <- solverGetData(df, list("name" = type.constr))
store <- simul(aux, simuls, cores)
print(colMeans(store)*1000)
print(apply(store, 2, median, na.rm = TRUE) * 1000)
print(apply(store, 2, quantile, probs = c(0.5, 0.25, 0.75), na.rm = TRUE) * 1000)

