# Rscript for the paper "bootUR: An R Package for Bootstrap Unit Root Tests"

#### Packages ####
library("bootUR")
library("zoo")
library("CADFtest")
library("tseries")
library("fUnitRoots")
library("urca")

#### Two Auxiliary Functions needed for Section 5 ####
order_integration_other_packages <- function(data, max_order = 2, level = 0.05,
                                             method = "adf.test",
                                             plot_orders = FALSE) {
  # Function to determine the order of integration with
  # the unit root functions of the other packages
  N <- NCOL(data)
  if (!is.null(colnames(data))) {
    var_names <- colnames(data)
  } else {
    var_names <- paste0("Variable ", 1:NCOL(data))
  }
  d <- rep(NA, N)
  names(d) <- var_names
  datad <- data
  i_in_datad <- 1:N
  for (d_i in (max_order - 1):0) {
    datad <- diff_mult(data[, i_in_datad], rep(d_i, length(i_in_datad)),
                       keep_NAs = FALSE)
    if (method == "adf.test") {
      out <- apply(datad, 2, level = level, function(x, level){
        pval = adf.test(x[!is.na(x)])$p.value
        rej_H0 = pval < level
        return(rej_H0)
      })
    } else if (method == "CADFtest") {
      out <- apply(datad, 2, level = level, function(x, level){
        n <- length(x)
        p_max <- floor(12*(n/100)^{1/4})
        pval = CADFtest(x, type = "trend", criterion = "MAIC",
                        max.lag.y = p_max)$p.value
        rej_H0 = pval < level
        return(rej_H0)
      })
    } else if (method == "unitrootTest") {
      out <- apply(datad, 2, level = level, function(x, level){
        pval = unitrootTest(x, type = "ct")@test$p.value[1]
        rej_H0 = pval < level
        return(rej_H0)
      })
    } else if (method =="ur.ers"){
      out <- apply(datad, 2, level = level, function(x, level){
        fit = ur.ers(x[!is.na(x)], type = "DF-GLS", model = "trend")
        rej_H0 <- fit@teststat <
          fit@cval[which.min(abs(c(0.01, 0.05, 0.1) - level))]
        return(rej_H0)
      })
    } else if (method == "ur.df") {
      out <- apply(datad, 2, level = level, function(x, level){
        n <- length(x)
        p_max <- floor(12*(n/100)^{1/4})
        fit = ur.df(x[!is.na(x)], type = "trend", selectlags = "AIC", lags = p_max)
        rej_H0 <- fit@teststat[1] <
          fit@cval[1, which.min(abs(c(0.01, 0.05, 0.1) - level))]
        return(rej_H0)
      })
    } else {

      stop("Invalid test argument.")
    }
    d[i_in_datad[!out]] <- d_i + 1
    if (any(out)) {
      if (d_i == 0) {
        d[i_in_datad[out]] <- 0
      } else {
        i_in_datad <- i_in_datad[out]
      }
    } else {
      break
    }
  }
  if (plot_orders) {
    if (!requireNamespace("ggplot2", quietly = TRUE)) {
      warning("Cannot plot orders of integration as package ggplot2 not installed.")
    } else {
      g <- plot_order_integration(d)
      print(g)
    }
  }
  return(list(diff_data = diff_mult(data, d), order_int = d))
}

plot_multiple_orders <- function(d, show_names = TRUE, show_legend = TRUE,
                                 cols = c("#1B9E77", "#7570B3", "#D95F02"),
                                 size.legend.title = 10, size.legend.text = 10,
                                 size.axis.text.x = 8, size.axis.text.y = 8,
                                 size.axis.title = 10) {
  # Function to create Figures 3 and 4 : classification of series into
  # I(0), I(1) and I(2) for the different unit root tests
  if (!requireNamespace("ggplot2", quietly = TRUE)) {
    stop("Cannot plot orders of integration as package ggplot2 not installed.")
  } else {
    df <- data.frame(
      var_names = factor(rep(rownames(d), ncol(d)), levels = rev(rownames(d))),
      method = factor(rep(colnames(d), each = nrow(d)), levels = colnames(d)),
      order = paste0("I(", c(d), ")"))

    if (show_names) {
      g <- ggplot2::ggplot(data = df,
             ggplot2::aes(x = method, y = var_names, fill = order)) +
        ggplot2::geom_tile(height = 0.9, width = 0.8, show.legend = show_legend) +
        ggplot2::labs(x = "Method", y = "Variables", fill = "Order of Integration") +
        ggplot2::scale_fill_manual(values = cols) +
        ggplot2::theme_minimal() +
        ggplot2::theme(legend.title = ggplot2::element_text(size = size.legend.title),
                       legend.text = ggplot2::element_text(size = size.legend.text),
                       axis.text.x = ggplot2::element_text(size = size.axis.text.x),
                       axis.text.y = ggplot2::element_text(size = size.axis.text.y),
                       panel.border = ggplot2::element_blank(),
                       panel.grid.major = ggplot2::element_blank(),
                       panel.grid.minor = ggplot2::element_blank(),
                       axis.title = ggplot2::element_text(size = size.axis.title))
    } else {
      g <- ggplot2::ggplot(data = df,
             ggplot2::aes(x = method, y = var_names, fill = order)) +
        ggplot2::geom_tile(height = 0.9, width = 0.8, show.legend = show_legend) +
        ggplot2::labs(x = "Method", y = "Variables", fill = "Order of Integration") +
        ggplot2::scale_fill_manual(values = cols) +
        ggplot2::theme_minimal() +
        ggplot2::theme(legend.title = ggplot2::element_text(size.legend.title),
          legend.text = ggplot2::element_text(size = size.legend.text),
          axis.text.x = ggplot2::element_text(size = size.axis.text.x),
          axis.text.y = element_blank(),
          panel.border = ggplot2::element_blank(),
          panel.grid.major = ggplot2::element_blank(),
          panel.grid.minor = ggplot2::element_blank(),
          axis.title = ggplot2::element_text(size = size.axis.title))
    }
    return(g)
  }
}


##### Set seed for reproducibility ####
set.seed(155776)

##### Section 4: An introduction to the bootUR package ####
data("MacroTS")

# Section 4.1
check_missing_insample_values(MacroTS)
sample_check <- find_nonmissing_subsample(MacroTS)
sample_check
# Figure 1
plot_missing_values(MacroTS, show_names = TRUE)
ggplot2::ggsave("MacroTS_missing.pdf", width = 40, height = 20, units = "cm")

# Section 4.2
GDP_NL <- MacroTS[, 4]
adf_out1 <- adf(data = GDP_NL, deterministics  = "trend", two_step = FALSE)
print(adf_out1)
adf_out2 <- adf(data = GDP_NL, deterministics  = "trend", two_step = TRUE)
print(adf_out2)

adf_out1$estimate
adf_out1$statistic
adf_out1$p.value

boot_adf(data = GDP_NL, bootstrap = "SB", deterministics  = "trend",
         detrend = "OLS")

boot_union(data = GDP_NL, bootstrap = "SWB")

# Section 4.3
boot_ur_out <- boot_ur(data = MacroTS[, 1:5], bootstrap = "MBB")
boot_ur_out
boot_ur_out$statistic
boot_ur_out$p.value

boot_panel(data = MacroTS[, 1:5], bootstrap = "DWB")
boot_sqt(data = MacroTS[, 1:5])
boot_fdr(data = MacroTS[, 1:5], bootstrap = "BWB")


##### Section 5: Applications ####

#### PSID ####
# Bootstrap unit root tests on Smeekes (2015) application for Table 3
load("PSID.RData")
j <- 1:6
q <- c((j - 1)/6, 1)
PSID_boot_sqt <- boot_sqt(PSID, steps = q, bootstrap = "MBB", B = 4999,
                          block_length = 5, max_lag = 3, show_progress = FALSE)
PSID_boot_fdr <- boot_fdr(PSID, bootstrap = "MBB", B = 4999, block_length = 5,
                          max_lag = 3, show_progress = FALSE)
PSID_boot_ur <- boot_ur(PSID, bootstrap = "MBB", B = 4999,
                        block_length = 5, max_lag = 3, show_progress = FALSE)

Table_PSID <- matrix(NA, 4, 3)
colnames(Table_PSID) <- c("boot_ur", "boot_sqt", "boot_fdr")
rownames(Table_PSID) <- c("Total rejections (N=181)", "CLG (N=58)",
                          "HSG (N=87)", "HSD (N=36)")
Table_PSID[1,1] <- sum(PSID_boot_ur$p.value<0.05)
Table_PSID[1,2] <- sum(PSID_boot_sqt$rejections == TRUE)
Table_PSID[1,3] <- sum(PSID_boot_fdr$rejections == TRUE)

Table_PSID[2,1] <- sum(PSID_boot_ur$p.value[1:58]<0.05)
Table_PSID[2,2] <- sum(PSID_boot_sqt$rejections[1:58] == TRUE)
Table_PSID[2,3] <- sum(PSID_boot_fdr$rejections[1:58] == TRUE)

Table_PSID[3,1] <- sum(PSID_boot_ur$p.value[(58+1):(58+87)]<0.05)
Table_PSID[3,2] <- sum(PSID_boot_sqt$rejections[(58+1):(58+87)] == TRUE)
Table_PSID[3,3] <- sum(PSID_boot_fdr$rejections[(58+1):(58+87)] == TRUE)

Table_PSID[4,1] <- sum(PSID_boot_ur$p.value[(58+87+1):(181)]<0.05)
Table_PSID[4,2] <- sum(PSID_boot_sqt$rejections[(58+87+1):(181)] == TRUE)
Table_PSID[4,3] <- sum(PSID_boot_fdr$rejections[(58+87+1):(181)] == TRUE)

totals <- matrix(c(rep(181, 3), rep(58, 3),
                   rep(87, 3), rep(36, 3)), 4, 3, byrow = T)
Table_prop <- round(Table_PSID/totals, 2)
rownames(Table_prop) <- c("Proportions of rejections", rep("Proportion", 3))
Table3 <- rbind(Table_PSID[1,], Table_prop[1,],
                Table_PSID[2,], Table_prop[2,],
                Table_PSID[3,], Table_prop[3,],
                Table_PSID[4,], Table_prop[4,])
rownames(Table3) <- c("Total rejections (N=181)", "Proportion of rejections",
                          "CLG (N=58)", "Proportion",
                          "HSG (N=87)", "Proportion",
                          "HSD (N=36)", "Proportion")
print(Table3)

#### MacroTS ####
# bootUR: panel unit root test for Table 4
MacroTS_panel_diffs <- boot_panel(data = diff(MacroTS))
MacroTS_panel_levels <- boot_panel(data = MacroTS)
Table4 <- matrix(NA, 2, 2)
colnames(Table4) <- c("MacroTS", "FRED-QD")
rownames(Table4) <- c("in first differences", "in levels")
Table4[, 1] <- round(c(MacroTS_panel_diffs$p.value,
                       MacroTS_panel_levels$p.value), 3)

# bootUR: unit root tests on individual and multiple series
MacroTS_adf <- order_integration(data = MacroTS, method = "adf")
MacroTS_boot_ur <- order_integration(data = MacroTS, method = "boot_ur")
MacroTS_boot_sqt_default <- order_integration(data = MacroTS, method = "boot_sqt")
MacroTS_boot_sqt_10step <- order_integration(data = MacroTS,
                                             method = "boot_sqt", steps = 0:10/10)
MacroTS_boot_fdr <- order_integration(data = MacroTS, method = "boot_fdr")

# Other packages: unit root tests
MacroTS_CADFtest <- order_integration_other_packages(data = MacroTS,
                                                     method = "CADFtest")
MacroTS_fUnitRoots <- order_integration_other_packages(data = MacroTS,
                                                       method = "unitrootTest")
MacroTS_tseries <- order_integration_other_packages(data = MacroTS,
                                                    method = "adf.test")
MacroTS_urca_urdf <- order_integration_other_packages(data = MacroTS,
                                                      method = "ur.df")
MacroTS_urca_urers <- order_integration_other_packages(data = MacroTS,
                                                       method = "ur.ers")

# Figure 3
MacroTS_d_tests <- cbind(MacroTS_adf$order_int, MacroTS_boot_ur$order_int,
                         MacroTS_boot_sqt_default$order_int,
                         MacroTS_boot_sqt_10step$order_int,
                         MacroTS_boot_fdr$order_int,
                         MacroTS_CADFtest$order_int,
                         MacroTS_fUnitRoots$order_int,
                         MacroTS_tseries$order_int,
                         MacroTS_urca_urdf$order_int,
                         MacroTS_urca_urers$order_int)
colnames(MacroTS_d_tests) <- c("adf","boot_ur", "boot_sqt: StepM",
                               "boot_sqt: 10-step", "boot_fdr", "CADFtest",
                               "unitrootTest","adf.test", "ur.df", "ur.ers")
plot_multiple_orders(MacroTS_d_tests)
ggplot2::ggsave("MacroTS_order_integration.pdf", width = 30, height = 10,
                units = "cm")

#### FRED-QD ####
# Download the FRED_QD dataset directly from the website
FRED_url <- url("https://files.stlouisfed.org/files/htdocs/fred-md/quarterly/2020-06.csv")
FRED_QD <- read.csv(FRED_url)
# Alternatively, to make the replication script standalone, the FRED-QD data can
# also be directly accessed via the provided csv file. Uncomment the line below
# in that case.
# FRED_QD <- read.csv("2020-06.csv")

trans_code_FRED_QD <- FRED_QD[2, -1] # Transformation codes (1 to 7) in FRED-QD dataset
order_int_trans_code <- lapply(trans_code_FRED_QD, function(x){(x==1|x==4)*0 +
    (x==2|x==5|x==7)*1 + (x==3|x==6)*2})
data_FRED_QD <- FRED_QD[-c(1:2, nrow(FRED_QD) - 2:0), -1]
N_FRED_QD <- ncol(data_FRED_QD)
date_FRED_QD <- as.yearmon(FRED_QD[-c(1:2, nrow(FRED_QD) - 2:0), 1], "%m/%d/%Y")
for (i in 1:N_FRED_QD) {
  if (trans_code_FRED_QD[i] %in% 4:6) {
    data_FRED_QD[, i] <- log(data_FRED_QD[, i])
  }
}

# Figure 2
plot_missing_values(data_FRED_QD, legend_size  = 12, axis_text_size =12)
# 1 internal NA: 3rd obs in column 188
ggplot2::ggsave("FRED_QD_internalNA.pdf", width = 40, height = 20, units = "cm")
data_FRED_QD[2, 188] <- NA # Fix internal NA
data_FRED_QD <- zoo(data_FRED_QD, date_FRED_QD)

# bootUR: panel unit root test for Table 3
FRED_QD_panel_diffs <- boot_panel(data = diff(data_FRED_QD))
FRED_QD_panel_levels <- boot_panel(data = data_FRED_QD)
Table4[, 2] <- round(c(FRED_QD_panel_diffs$p.value, FRED_QD_panel_levels$p.value),
                     3)
print(Table4)

# bootUR: unit root tests on individual and multiple series
ptm <- proc.time()
FRED_QD_adf <- order_integration(data = data_FRED_QD, method = "adf")
time_adf <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_boot_ur <- order_integration(data = data_FRED_QD, method = "boot_ur")
time_boot_ur <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_boot_sqt_default <- order_integration(data = data_FRED_QD,
                                              method = "boot_sqt")
time_boot_sqt <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_boot_sqt_20step <- order_integration(data = data_FRED_QD,
                                             method = "boot_sqt",
                                             steps = 0:20/20)
time_boot_sqt_20step <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_boot_fdr <- order_integration(data = data_FRED_QD, method = "boot_fdr")
time_boot_fdr <- (proc.time() - ptm)[3]

# Other packages: unit root tests
ptm <- proc.time()
FRED_QD_CADFtest <- order_integration_other_packages(data = data_FRED_QD,
                                                     method = "CADFtest")
time_CADFtest <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_fUnitRoots <- order_integration_other_packages(data = data_FRED_QD,
                                                       method = "unitrootTest")
time_fUnitRoots <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_tseries <- order_integration_other_packages(data = data_FRED_QD,
                                                    method = "adf.test")
time_tseries <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_urca_urdf <- order_integration_other_packages(data = data_FRED_QD,
                                                      method = "ur.df")
time_urca_urdf <- (proc.time() - ptm)[3]
ptm <- proc.time()
FRED_QD_urca_urers <- order_integration_other_packages(data = data_FRED_QD,
                                                       method = "ur.ers")
time_urca_urers <- (proc.time() - ptm)[3]

# For our inspection now, remove ptm and timing when uploading
time_nonboot <- c(time_adf, time_CADFtest, time_fUnitRoots, time_tseries,
                  time_urca_urdf, time_urca_urers)
time_boot <- c(time_boot_ur, time_boot_sqt,
               time_boot_sqt_20step, time_boot_fdr)
print(time_nonboot)
print(time_boot)

# Figures 4-5
FRED_QD_d_tests <- cbind(order_int_trans_code, FRED_QD_adf$order_int,
                         FRED_QD_boot_ur$order_int,
                         FRED_QD_boot_sqt_default$order_int,
                         FRED_QD_boot_sqt_20step$order_int,
                         FRED_QD_boot_fdr$order_int,
                         FRED_QD_CADFtest$order_int,
                         FRED_QD_fUnitRoots$order_int,
                         FRED_QD_tseries$order_int,
                         FRED_QD_urca_urdf$order_int,
                         FRED_QD_urca_urers$order_int)
colnames(FRED_QD_d_tests) <- c("FRED","adf","boot_ur", "boot_sqt: StepM",
                               "boot_sqt: 20-step","boot_fdr", "CADFtest",
                               "unitrootTest","adf.test", "ur.df", "ur.ers")
plot_multiple_orders(FRED_QD_d_tests[1:(N_FRED_QD/2), ],
                     size.legend.title = 30, size.legend.text = 30,
                     size.axis.text.x = 20, size.axis.text.y = 25,
                     size.axis.title = 30)
ggplot2::ggsave("FRED_QD_order_integration_1to124.pdf", width = 90,
                height = 100, units = "cm")
plot_multiple_orders(FRED_QD_d_tests[-c(1:(N_FRED_QD/2)), ],
                     size.legend.title = 30, size.legend.text = 30,
                     size.axis.text.x = 20, size.axis.text.y = 25,
                     size.axis.title = 30)
ggplot2::ggsave("FRED_QD_order_integration_125to248.pdf", width = 90,
                height = 100, units = "cm")
