## R package 'markets' (Version 1.1.4)
## ==============================================================================
##
## Estimation Methods for Markets in Equilibrium and Disequilibrium
##
## Pantelis Karapanagiotis
##
## The structure of the replication script follows the structure of the main
## text. Every section of the text has a dedicated block in the replication
## script.
##
## The replication code uses previously stored benchmark data to create the
## benchmark figures and does NOT directly depend on benchmark executions.
## The stored data were obtained from executing the benchmark code in Goethe
## university's cluster. See the README for replicating the benchmarks.
benchmark_data <- "time_benchmarks.rds"

## Required packages for replication
library("knitr")
library("markets")
library("ggplot2")
library("ggthemes")




## Section 1: Introduction
## ==============================================================================





## Section 2: Econometric background
## ==============================================================================





## Section 3: Scope, design, and alternatives
## ==============================================================================

## Figure 1: Design overview.
## -----------------------------------------------------------------------------
# The design figure is created using https://app.diagrams.net/
# Source: https://github.com/pi-kappa-devel/markets/blob/master/rsc/design.drawio
# Figure: https://github.com/pi-kappa-devel/markets/blob/master/man/figures/design.png


## Figure 2: Market class implementation.
## -----------------------------------------------------------------------------
# The implementation figure is created using https://app.diagrams.net/
# Source: https://github.com/pi-kappa-devel/markets/blob/master/rsc/implementation.drawio
# Figure: https://github.com/pi-kappa-devel/markets/blob/master/man/figures/implementation.png




## Section 4: Functionality via an empirical example
## ==============================================================================

# Loading the example's data
house_data <- fair_houses()


## Listing 2: Estimating the equilibrium model.
## -----------------------------------------------------------------------------
# An example of estimating the equilibrium model
eq <- equilibrium_model(
  HS | RM | ID | TREND ~ RM + TREND + W + CSHS + L1RM + L2RM + MONTH |
    RM + TREND + W + L1RM + MA6DSF + MA3DHF + MONTH,
  house_data, estimation_options = list(control = list(maxit = 5000))
)


## Listing 3: Estimating the deterministic adjustment model.
## -----------------------------------------------------------------------------
# An example of estimating the deterministic adjustment model
da <- diseq_deterministic_adjustment(
  HS | RM | ID | TREND ~ RM + TREND + W + CSHS + L1RM + L2RM + MONTH |
    RM + TREND + W + L1RM + MA6DSF + MA3DHF + MONTH,
  house_data,
  verbose = 2,
  estimation_options = list(control = list(maxit = 5000))
)


## Listing 4: Estimating the directional model.
## -----------------------------------------------------------------------------
# An example of estimating the directional model
dr <- diseq_directional(
  HS | RM | ID | TREND ~ TREND + W + CSHS + L1RM + L2RM |
    RM + TREND + W + MA6DSF + MA3DHF + MONTH,
  house_data, estimation_options = list(
    method = "Nelder-Mead", control = list(maxit = 5000)
  )
)


## Listing 5: Estimating the basic model.
## -----------------------------------------------------------------------------
# An example of estimating the basic model
start <- coef(eq)
start <- start[names(start) != "RHO"]
bs <- diseq_basic(
  HS | RM | ID | TREND ~ RM + TREND + W + CSHS + L1RM + L2RM + MONTH |
    RM + TREND + W + L1RM + MA6DSF + MA3DHF + MONTH,
  house_data, verbose = 2, correlated_shocks = FALSE,
  estimation_options = list(
    start = start,
    control = list(maxit = 5000)
  )
)


## Listing 6: Estimating the stochastic adjustment model.
## -----------------------------------------------------------------------------
# An example of estimating the stochastic adjustment model
sa <- diseq_stochastic_adjustment(
  HS | RM | ID | TREND ~ RM + TREND + W + CSHS + MONTH |
    RM + TREND + W + L1RM + L2RM + MA6DSF + MA3DHF + MONTH |
    TREND + L2RM + L3RM,
  house_data |> dplyr::mutate(L3RM = dplyr::lag(RM, 3)),
  correlated_shocks = FALSE,
  estimation_options = list(
    control = list(maxit = 5000), standard_errors = c("W")
  )
)


## Table 2: Estimation results.
## -----------------------------------------------------------------------------
## Create a table summarizing the estimation results

#' Model coefficients tibble
#'
#' Format the results of a fitted model using the mask "estimate (p-value)" and
#' store the formatted output into a tibble.
#'
#' @param fit Fitted model
#' @return A tibble with formatted model coefficients
coef_tibble <- function(fit) {
  coefs <- markets:::market_fit_coefficients(fit, summary = TRUE)
  name <- name(fit)
  name <- paste0(substr(name, 1, 1), tolower(substr(name, 2, nchar(name))))
  tibble::tibble(
    Coefficient = rownames(coefs),
    !!str2lang(paste0("`", name, "`")) :=
        sprintf("%.4f (%.2f)", coefs[, 1], coefs[, 4])
  )
}

# Merge the formatted results based on the names of the estimated coefficient
# to make them easily comparable
coefs <- coef_tibble(da) |>
  dplyr::full_join(coef_tibble(eq), by = "Coefficient") |>
  dplyr::full_join(coef_tibble(bs), by = "Coefficient") |>
  dplyr::full_join(coef_tibble(dr), by = "Coefficient") |>
  dplyr::full_join(coef_tibble(sa), by = "Coefficient") |>
  dplyr::filter(!grepl("MONTH", Coefficient)) |>
  dplyr::relocate(2, .after = 5) |>
  dplyr::slice(1:14, 19, 15, 20:23, 16, 17, 24, 18)

# Render the merged results as a table
knitr::kable(coefs, "pipe")

## Listing 7: Market model fits' summaries.
## -----------------------------------------------------------------------------
# An example of summarizing model fits
summary(da)


## Listing 7: Market model fits' summaries.
## -----------------------------------------------------------------------------
# Remove some lines from output for brevity
out <- capture.output(summary(da))
cat(c(out[1:22], "...", out[31:39], "...", out[75:length(out)]), sep = "\n")


## Figure 3: Market fits' visualizations.
## -----------------------------------------------------------------------------
# An example of plotting the equilibrium model
plot(eq)


# An example of plotting the basic model
plot(bs)


# An example of plotting the directional model
plot(dr)


# An example of plotting the deterministic adjustment model
plot(da)


# An example of plotting the stochastic adjustment model
plot(sa)


## Listing 8: Fitted demanded and supplied quantities.
## -----------------------------------------------------------------------------
# An example of retrieving demanded and supplied fitted values
demanded <- demanded_quantities(sa)
supplied <- supplied_quantities(sa)


## Figure 4: Fitted demanded and supplied quantities.
## -----------------------------------------------------------------------------
## Plot fitted demanded and supplied quantities

# Get the dates for the data points included in the stochastic adjustment
# model estimation.
dates <- fair_houses()$DATE[sa@model@data$TREND]

# Create a data tibble with dates and fitted demanded and supplied quantities.
pdt <- tibble::tibble(
  Date = c(dates, dates),
  Quantity = c(demanded, supplied),
  Side = c(rep("Demand", length(demanded)), rep("Supply", length(supplied)))
)

# Create and format the plot.
ggplot2::ggplot(pdt, ggplot2::aes(x = Date)) +
  ggplot2::geom_line(ggplot2::aes(y = Quantity, linetype = Side, color = Side)) +
  ggplot2::scale_x_date(date_breaks = "12 month", date_labels = "%b %Y") +
  ggplot2::theme(
    panel.background = ggplot2::element_rect(fill = "transparent"),
    plot.background = ggplot2::element_rect(
      fill = "transparent", color = NA
    ),
    legend.background = ggplot2::element_rect(fill = "transparent"),
    legend.box.background = ggplot2::element_rect(
      fill = "transparent", color = NA
    ),
    axis.text.x = ggplot2::element_text(angle = 60, vjust = 1.0, hjust = 1),
    legend.position = c(0.8, 0.8)
  )


## Listing 9: Aggregate fitted quantities.
## -----------------------------------------------------------------------------
# An example of retrieving aggregate demanded and supplied fitted values
c(demand = aggregate_demand(eq), supply = aggregate_supply(eq))
c(demand = aggregate_demand(sa), supply = aggregate_supply(sa))


## Figure 5: Fitted shortages.
## -----------------------------------------------------------------------------
## Plot fitted shortages

# Create a data tibble with dates (from Figure 4) and fitted shortages.
pdt <- tibble::tibble(Date = dates, Shortage = shortages(sa))

# Create and format the plot.
ggplot2::ggplot(pdt, ggplot2::aes(x = Date)) +
  ggplot2::geom_line(ggplot2::aes(y = Shortage)) +
  ggplot2::scale_y_continuous(breaks = c(0)) +
  ggplot2::scale_x_date(date_breaks = "12 month", date_labels = "%b %Y") +
  ggplot2::theme(
    panel.background = ggplot2::element_rect(fill = "transparent"),
    plot.background = ggplot2::element_rect(
      fill = "transparent", color = NA
    ),
    panel.grid.major.y = ggplot2::element_line(
      colour = "black", linetype = "dashed"
    ),
    axis.text.x = ggplot2::element_text(angle = 60, vjust = 1.0, hjust = 1)
  )


## Listing 10: Shortage indicators.
## -----------------------------------------------------------------------------
c(no_shortages = sum(shortage_indicators(sa)),
  no_surpluses = sum(!shortage_indicators(sa)))


## Listing 11: Shortage standard deviation.
## -----------------------------------------------------------------------------
shortage_standard_deviation(da)


## Figure 6: Normalized shortages, relative shortages, and shortage probabilities.
## -----------------------------------------------------------------------------
hist(normalized_shortages(da), main = NULL)


hist(relative_shortages(da), main = NULL)


hist(shortage_probabilities(da), main = NULL)


## Listing 12: Marginal effects.
## -----------------------------------------------------------------------------
fits <- c(sa = sa, da = da)
sapply(fits, function(m) shortage_marginal(m, "RM"))


## Listing 13: Marginal effects.
## -----------------------------------------------------------------------------
sapply(fits, function(m) shortage_probability_marginal(m, "MA3DHF"))
sapply(fits, function(m) {
    shortage_probability_marginal(m, "CSHS", aggregate = "at_the_mean")
})




## Section 5: Estimation benchmarks
## ==============================================================================

## Create the benchmark figures.
##
## The benchmark data are produced by executing `benchmark.sh full`, which
## executes the R script `benchmark.R` multiple times
## by passing different arguments. Replicating the complete data sample takes
## some time. To perform a small-scale replication, one can instead use
## `benchmark.sh test` (this will also be time-consuming in
## most machines, but it will not run for days). The resulting time
## measurements should reveal similar patterns in all machines and processors,
## but the absolute time measurements will vary depending on the processor
## speed.
##
## The replication code included here loads the previously generated
## benchmark data from `time_benchmarks.rds` to replicate the
## figures of the main text.

# Load the benchmark data for a varying number of observations.
obs_benchmarks <- readRDS(benchmark_data) |>
  dplyr::filter(addpars == 0)

# Load the benchmark data for a varying number of parameters.
params_benchmarks <- readRDS(benchmark_data) |>
  dplyr::filter(addpars > 0)

#' BFGS parameter benchmark ratio.
#'
#' Calculate the ratio of average execution times of BFGS with analytically
#' calculated and numerically approximated gradients for a given model. This is
#' only used to avoid hard-typing the numbers on pp. 29 and 30 of the main text.
#'
#' @param model_class Model class string.
#' @return A rounded (with two significant digits) numerical value.
bfgs_params_benchmark_ratio <- function(model_class) {
  means <- params_benchmarks |>
    dplyr::filter(expr != "nm" & model == model_class) |>
    dplyr::group_by(expr) |>
    dplyr::summarize(`mean` = mean(`mean`))

  round(
  (means |> dplyr::filter(expr == "bfgs_numerical") |> dplyr::pull(mean)) / (
      means |> dplyr::filter(expr == "bfgs_calculated") |> dplyr::pull(mean)), 2)
}

#' Model data subset.
#'
#' Get the subset of the passed data for a particular model.
#'
#' @param data Benchmark data.
#' @param model_string Model class string.
#' @return A tibble with the data corresponding to the passed model string.
get_model_data <- function(data, model_string) {
  data |>
    dplyr::filter(model == as.symbol(model_string)) |>
    dplyr::arrange(model, expr, nobs)
}

#' Methods names.
#'
#' Replace the tibble column names with more readable names for the figures.
#'
#' @param method_names A vector with method names as stored in the benchmark data.
#' @return A vector with more human friendly names.
better_method_names <- function(method_names) {
  method_names[method_names == "bfgs_calculated"] <- "BFGS with calculated gradient"
  method_names[method_names == "bfgs_numerical"] <- "BFGS with numerical gradient"
  method_names[method_names == "nm"] <- "Nelder Mead"
  method_names
}

#' Parameter counts.
#'
#' Adjust the parameter count for each model. Each model uses a different number
#' of starting parameters (see section 5 and appendix B), and this function
#' is used to adjust the labels of the parameter benchmark figures appropriately.
#'
#' @param model Model type string.
#' @param addpars Number of additional parameters in the benchmark.
#' @return The total number of parameters of the benchmark.
better_parameter_count <- function(model, addpars) {
  params <- 14 + addpars
  indices <- model == "diseq_deterministic_adjustment"
  params[indices] <- params[indices] + 1
  indices <- model == "diseq_stochastic_adjustment"
  params[indices] <- params[indices] + 6
  params
}

#' Observation benchmark figure.
#'
#' Create and format a benchmark figure for a model with a varying number of
#' observations.
#'
#' @param model Model type string.
#' @return A benchmark figure of ggplot2 type.
figure_for_obs_benchmarck <- function(model) {
  data <- get_model_data(obs_benchmarks, model) |>
    dplyr::mutate(
      uci = mean + sd, lci = mean - sd, obs = nobs * tobs,
      Method = better_method_names(expr)
    )

  ggplot2::ggplot(data, ggplot2::aes(x = obs, y = mean, color = Method)) +
    ggplot2::xlab("Number of observations (log2 scale)") +
    ggplot2::scale_x_continuous(breaks = unique(data$obs), trans = "log2") +
    ggplot2::ylab("Estimation time in seconds (log2 scale)") +
    ggplot2::scale_y_continuous(trans = "log2") +
    ggplot2::geom_line(ggplot2::aes(x = obs, y = uci), linetype = "dotted") +
    ggplot2::geom_line(ggplot2::aes(x = obs, y = lci), linetype = "dotted") +
    ggplot2::geom_line() +
    ggthemes::theme_tufte() +
    ggplot2::theme(
      legend.position = c(0.35, 0.85),
      legend.title = ggplot2::element_text(size = 18),
      legend.text = ggplot2::element_text(size = 16),
      axis.text.x = ggplot2::element_text(angle = 45, size = 14),
      axis.text.y = ggplot2::element_text(size = 14),
      axis.title.x = ggplot2::element_text(size = 16),
      axis.title.y = ggplot2::element_text(size = 16)
    )
}


#' Parameter benchmark figure.
#'
#' Create and format a benchmark figure for a model with a varying number of
#' estimated parameter.
#'
#' @param model Model type string.
#' @return A benchmark figure of ggplot2 type.
figure_for_params_benchmarck <- function(model) {
  data <- get_model_data(params_benchmarks, model) |>
    dplyr::mutate(
      uci = mean + sd, lci = mean - sd,
      params = better_parameter_count(model, addpars),
      Method = better_method_names(expr)
    )

  ggplot2::ggplot(data, ggplot2::aes(x = params, y = mean, color = Method)) +
    ggplot2::xlab("Number of parameters") +
    ggplot2::scale_x_continuous(breaks = unique(data$params)) +
    ggplot2::ylab("Estimation time in seconds") +
    ggplot2::ylim(NA, max(data$mean + data$sd) * 1.2) +
    ggplot2::geom_line(ggplot2::aes(x = params, y = uci), linetype = "dotted") +
    ggplot2::geom_line(ggplot2::aes(x = params, y = lci), linetype = "dotted") +
    ggplot2::geom_line() +
    ggthemes::theme_tufte() +
    ggplot2::theme(
      legend.position = c(0.35, 0.88),
      legend.title = ggplot2::element_text(size = 18),
      legend.text = ggplot2::element_text(size = 16),
      axis.text.x = ggplot2::element_text(size = 14),
      axis.text.y = ggplot2::element_text(size = 14),
      axis.title.x = ggplot2::element_text(size = 16),
      axis.title.y = ggplot2::element_text(size = 16)
    )
}


## Figure 7: Equilibrium model estimation time benchmarks.
## -----------------------------------------------------------------------------
figure_for_obs_benchmarck("equilibrium_model")


figure_for_params_benchmarck("equilibrium_model")


## Figure 8: Basic model estimation time benchmarks.
## -----------------------------------------------------------------------------
figure_for_obs_benchmarck("diseq_basic")


figure_for_params_benchmarck("diseq_basic")


## Figure 9: Directional model estimation time benchmarks.
## -----------------------------------------------------------------------------
figure_for_obs_benchmarck("diseq_directional")


figure_for_params_benchmarck("diseq_directional")


## Figure 10: Deterministic adjustment model estimation time benchmarks.
## -----------------------------------------------------------------------------
figure_for_obs_benchmarck("diseq_deterministic_adjustment")


figure_for_params_benchmarck("diseq_deterministic_adjustment")


## Figure 11: Stochastic adjustment model estimation time benchmarks.
## -----------------------------------------------------------------------------
figure_for_obs_benchmarck("diseq_stochastic_adjustment")


figure_for_params_benchmarck("diseq_stochastic_adjustment")




## Section 6: Conclusion
## ==============================================================================





##  Acknowledgments
## ==============================================================================





## Appendix A: Installation
## ==============================================================================





## Appendix B: Simulation details
## ==============================================================================





## Appendix C: Comparison of equilibrium estimates
## ==============================================================================

## Listing 14: Equilibrium model simulation.
## -----------------------------------------------------------------------------
# Simulate data for the equilibrium model's estimation comparisons
seed <- 25
parameters <- list(
  nobs = 4000, tobs = 10,
  alpha_d = -1.7, beta_d0 = 14.9, beta_d = c(2.3, -1.2),
    eta_d = c(-1.3, -1.1),
  alpha_s = 1.6, beta_s0 = 10.2, beta_s = c(-1.3), eta_s = c(2.5, 2.2),
  sigma_d = 2.1, sigma_s = 2.5, rho_ds = -0.1
)
mdl <- simulate_model("equilibrium_model", parameters, seed, verbose = 2)


## Listing 15: Equilibrium model estimation.
## -----------------------------------------------------------------------------
# Estimate using maximum likelihood implemented with optim
optim_fit <- estimate(mdl)
# Estimate using maximum likelihood implemented with gsl
gsl_fit <- estimate(
  mdl, optimizer = "gsl", control = list(
    step = 1e-0, maxit = 1e+4,
    objective_tolerance = 1e-2, gradient_tolerance = 1e-2
  )
)
# Estimate using 2-stage least squares
ls_fit <- estimate(mdl, method = "2SLS")


## Listing 15: Equilibrium model estimation.
## -----------------------------------------------------------------------------
## Create a table summarizing the estimation results

#' Model coefficients tibble
#'
# Create an intermediate tibble for a single model. The tibble has two columns.
# One column records the coefficient name and the other one its estimated value.
#'
#' @param fit Fitted model
#' @return A tibble with estimated model coefficients
coef_tibble <- function(fit) {
  tibble::tibble(
    coef = names(coef(fit)),
    !!str2lang(paste0("`", name(fit), "`")) := coef(fit)
  )
}

# Reorder simulated parameters to match the order of estimated coefficients.
sim_coef <- c(
  unlist(parameters[c(
    "alpha_d", "beta_d0", "beta_d", "eta_d", "alpha_s",
    "beta_s0", "beta_s", "eta_s"
  )]),
  unlist(parameters[c("sigma_d", "sigma_s")])**2,
  unlist(parameters["rho_ds"])
)

# Merge tibble for the three estimated objects and calculate the absolute
# error of each estimated parameter (absolute value of difference from simulated
# parameter).
coefs <- tibble::tibble(coef = names(coef(ls_fit)), sim = sim_coef) |>
  dplyr::full_join(coef_tibble(gsl_fit), by = "coef") |>
  dplyr::full_join(coef_tibble(ls_fit), by = "coef") |>
  dplyr::full_join(coef_tibble(optim_fit), by = "coef") |>
  dplyr::rename(Coefficient = coef, Simulated = sim,
                gsl = Equilibrium.x, ls = Equilibrium.y, optim = Equilibrium) |>
  dplyr::mutate(gsl_err = abs(gsl - Simulated), ls_err = abs(ls - Simulated),
                optim_err = abs(optim - Simulated))

# Calculate the average absolute value for each of the three estimated objects.
mean_errors <- list(gsl = mean(coefs$gsl_err), ls = mean(coefs$ls_err),
                    optim = mean(coefs$optim_err))

# Format the results using the mask "estimate (absolute error)" and append
# row with mean absolute errors.
coefs <- coefs |>
  dplyr::mutate(gsl = sprintf("%.4f (%.4f)", gsl, gsl_err),
                ls = sprintf("%.4f (%.4f)", ls, ls_err),
                optim = sprintf("%.4f (%.4f)", optim, optim_err)) |>
  dplyr::select(Coefficient, Simulated, ls, gsl, optim) |>
  tibble::add_row(
    Coefficient = "Mean abs. error",
    gsl = sprintf("%.4f", mean_errors$gsl),
    ls = sprintf("%.4f", mean_errors$ls),
    optim = sprintf("%.4f", mean_errors$optim))

# Render the merged results as a table
knitr::kable(coefs, "pipe")



## Appendix D: Model initialization
## ==============================================================================

## Listing 16: Model initialization.
## -----------------------------------------------------------------------------
## Model initialization and estimation decoupling.
##
## The following lines exemplify the alternative initiation functionality
## provided by the package. The models are initialized with options
## matching the corresponding initialization options of the empirical
## example (see section 4.3).
eq <- new("equilibrium_model",
  quantity = HS, price = RM, subject = ID, time = TREND,
  demand = RM + TREND + W + CSHS + L1RM + L2RM + MONTH,
  supply = RM + TREND + W + L1RM + MA6DSF + MA3DHF + MONTH,
  house_data
)
da <- new("diseq_deterministic_adjustment",
  quantity = HS, price = RM, subject = ID, time = TREND,
  demand = RM + TREND + W + CSHS + L1RM + L2RM + MONTH,
  supply = RM + TREND + W + L1RM + MA6DSF + MA3DHF + MONTH,
  house_data, verbose = 2
)
dr <- new("diseq_directional",
  quantity = HS, price = RM, subject = ID, time = TREND,
  demand = TREND + W + CSHS + L1RM + L2RM,
  supply = RM + TREND + W + MA6DSF + MA3DHF + MONTH,
  house_data
)
bs <- new("diseq_basic",
  quantity = HS, price = RM, subject = ID, time = TREND,
  demand = RM + TREND + W + CSHS + L1RM + L2RM + MONTH,
  supply = RM + TREND + W + L1RM + MA6DSF + MA3DHF + MONTH,
  house_data, verbose = 2, correlated_shocks = FALSE
)
sa <- new("diseq_stochastic_adjustment",
  quantity = HS, price = RM, subject = ID, time = TREND,
  demand = RM + TREND + W + CSHS + MONTH,
  supply = RM + TREND + W + L1RM + L2RM + MA6DSF + MA3DHF + MONTH,
  price_dynamics = TREND + L2RM + L3RM,
  house_data |> dplyr::mutate(L3RM = dplyr::lag(RM, 3)),
  correlated_shocks = FALSE
)


## Listing 17: Model objects' output operations
## -----------------------------------------------------------------------------
show(dr)
summary(sa)


## Listing 18: Model estimation.
## -----------------------------------------------------------------------------
## Model estimation.
##
## The following lines estimate the previously initialized models.
eq <- estimate(eq, control = list(maxit = 5e3))

da <- estimate(da, control = list(maxit = 5e3))

dr <- estimate(dr, method = "Nelder-Mead", control = list(maxit = 5e3))

start <- coef(eq)
start <- start[names(start) != "RHO"]
bs <- estimate(bs, start = start, control = list(maxit = 5e3))

sa <- estimate(sa, control = list(maxit = 5e3), standard_errors = c("W"))



## Session Information
## ---
sessionInfo()
