# This file contains all of the code necessary to replicate the code examples as
# well as the figures and tables used in the JSS submission titled "logitr: Fast
# Estimation of Multinomial and Mixed Logit Models with Preference Space and
# Willingness to Pay Space Utility Parameterizations".

# ---- Install and Load Packages ----

# Packages must be installed once. To do so, uncomment the lines below. Once
# installed, these lines should be re-commented out.

# Install packages for creating figures
# install.packages(c(
#     "tidyverse", "cowplot", "ggrepel", "randtoolbox", "RColorBrewer"
# ))

# Install packages for benchmarking
# install.packages(c("logitr", "mlogit", "gmnl"))

# Load libraries for making figures
library("tidyverse")
library("cowplot")
library("ggrepel")
library("randtoolbox")
library("RColorBrewer")

# Load libraries for estimating models
library("logitr")
library("mlogit")
library("gmnl")


# This replication file is structured as follows:
#
# 1) Code to replicate the example models used in manuscript (Section 5). Since
# Section 5 is the first section manuscript that contains code, it is included
# first in this replication file even though Figure 1, Figure 2, and Table 1
# appear earlier in the manuscript. All other code after this section is not
# included in the manuscript but rather is used to replicate Figure 1, Figure 2,
# and Table 1.
#
# 2) Code to replicate Figure 1 (Section 3.1), which involves first estimating
# several models and then using the estimates from those models to create the
# figure itself.
#
# 3) Code to replicate Figure 2 (Section 4.2). This requires first estimating
# several models using different packages and then creating the figure based on
# the estimation times of each model. The exact estimation times will *always*
# vary for every run of each model, even when run on the same machine due to
# variations in background processes. As a result, the only way to perfectly
# reproduce Figure 2 in the manuscript is to use a saved data set of the run
# times exported from a specific run on a specific machine. To facilitate this,
# the code to estimate the models used to create Figure 2 was first run on the
# following publicly accessible Google colab notebook:
#
#    https://colab.research.google.com/drive/1vYlBdJd4xCV43UwJ33XXpO3Ys8xWkuxx?usp=sharing
#
#    The run times were then exported as a csv and are included in the logitr
#    package as a dataframe called "runtimes" that is loaded with the package.
#    This data set is used in the code in this section to replicate Figure 2 in
#    the manuscript.
#
# 4) Code to replicate Table 1 (Section 4.2). This is computed from the
# "runtimes" data frame used to create Figure 2.
#
#
# Note that logitr uses parallel processing to simultaneously estimate models
# that use a multi-start via the "numMultiStarts" argument. To ensure perfect
# reproducibility with the results in the manuscript, numCores is set to 1 for
# all models that use a multi-start so that all multi-start iterations are run
# in series rather than in parallel. This will result in slower estimation, but
# is necessary for precise reproduction of the manuscript results. In practice,
# it is recommended to use more cores for faster performance.


# 1) Replication of example models used in manuscript (Section 5) ----

# Section 5.2 - Data format ----

head(yogurt)

# Section 5.3 - Model specification interface ----

# Estimate a multinomial logit model in the preference space with fixed
# parameters
mnl_pref <- logitr(
  data    = yogurt,
  outcome = "choice",
  obsID   = "obsID",
  pars    = c("price", "feat", "brand")
)

# Estimate a multinomial logit model in the WTP space with fixed parameters
mnl_wtp <- logitr(
  data     = yogurt,
  outcome  = "choice",
  obsID    = "obsID",
  pars     = c("feat", "brand"),
  scalePar = "price"
)

# Comparing WTP space specification with the {gmnl} package
data_gmnl <- mlogit.data(
    data     = yogurt,
    shape    = "long",
    choice   = "choice",
    id.var   = "id",
    alt.var  = "alt",
    chid.var = "obsID",
    opposite = "price"
)

mnl_wtp <- gmnl(
  data = data_gmnl,
  formula = choice ~ price + feat + brand | 0 | 0 | 0 | 1,
  fixed = c(TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE),
  model = "smnl", method = "bhhh",
  start = c(1, 0, 0, 0, 0, 0, 0)
)

# Section 5.4 - Continuous and discrete variable coding ----

# Manually setting factor levels to set the default level for "brand"
yogurt2 <- logitr::yogurt
yogurt2$brand <- factor(
    x = yogurt2$brand,
    levels = c("weight", "hiland", "yoplait", "dannon")
)
levels(yogurt2$brand)

# Section 5.5 - Estimating multinomial logit models ----

# Preference space model
mnl_pref <- logitr(
  data    = yogurt,
  outcome = "choice",
  obsID   = "obsID",
  pars    = c("price", "feat", "brand")
)

# View summary of results
summary(mnl_pref)

# Extract coefficients using coef()
coef(mnl_pref)

# Extract standard errors using se()
se(mnl_pref)

# Extract the log-likelihood using coef()
logLik(mnl_pref)

# Extract variance-covariance matrix using vcov()
vcov(mnl_pref)

# Section 5.6 - Estimating willingness to pay ----

# Compute willingness to pay (WTP)
wtp(mnl_pref, scalePar = "price")

# Estimate a multinomial logit model in the WTP space with fixed parameters
set.seed(123)

mnl_wtp <- logitr(
  data     = yogurt,
  outcome  = "choice",
  obsID    = "obsID",
  pars     = c("feat", "brand"),
  scalePar = "price",
  numMultiStarts = 10,
  numCores = 1
)

# View summary of results
summary(mnl_wtp)

# Compare computed WTP versus directly estimated WTP
wtpCompare(
  model_pref = mnl_pref,
  model_wtp  = mnl_wtp,
  scalePar   = "price"
)

# Section 5.7 - Estimating mixed logit models ----

# Estimate a preference space mixed logit model with a normally-distributed
# brand parameter and a fixed price parameter
set.seed(456)

mxl_pref <- logitr(
  data     = yogurt,
  outcome  = "choice",
  obsID    = "obsID",
  panelID  = "id",
  pars     = c("price", "feat", "brand"),
  randPars = c(feat = "n", brand = "n"),
  numMultiStarts = 10,
  numCores = 1
)

# View summary of results
summary(mxl_pref)

# Section 5.8 - Estimating WTP space mixed logit models ----

# Estimate a WTP space mixed logit model with a normally-distributed brand WTP
# parameter and feat parameter, and a fixed price / scale parameter
set.seed(6789)

mxl_wtp <- logitr(
  data     = yogurt,
  outcome  = "choice",
  obsID    = "obsID",
  panelID  = "id",
  pars     = c("feat", "brand"),
  scalePar = "price",
  randPars = c(feat = "n", brand = "n"),
  numMultiStarts = 10,
  numCores = 1
)

# View summary of results
summary(mxl_wtp)

# Compare computed WTP versus directly estimated WTP
wtpCompare(
  model_pref = mxl_pref,
  model_wtp  = mxl_wtp,
  scalePar   = "price"
)

# Section 5.9 - Weighted models ----

# Estimate an *unweighted* WTP space multinomial logit model with fixed
# parameters using a multi-start loop
set.seed(5678)

mnl_wtp_unweighted <- logitr(
  data    = cars_us,
  outcome = "choice",
  obsID   = "obsnum",
  pars = c(
    "hev", "phev10", "phev20", "phev40", "bev75", "bev100", "bev150",
    "american", "japanese", "chinese", "skorean", "phevFastcharge",
    "bevFastcharge","opCost", "accelTime"),
  scalePar = "price",
  robust   = TRUE,
  numMultiStarts = 10,
  numCores = 1
)

# View summary of results
summary(mnl_wtp_unweighted)

# Estimate a *weighted* WTP space multinomial logit model with fixed
# parameters using a multi-start loop
set.seed(5678)

mnl_wtp_weighted <- logitr(
  data    = cars_us,
  outcome = "choice",
  obsID   = "obsnum",
  pars = c(
    "hev", "phev10", "phev20", "phev40", "bev75", "bev100", "bev150",
    "american", "japanese", "chinese", "skorean", "phevFastcharge",
    "bevFastcharge","opCost", "accelTime"),
  scalePar = "price",
  weights  = "weights",
  robust   = TRUE,
  numMultiStarts = 10,
  numCores = 1
)

# View summary of results
summary(mnl_wtp_weighted)

# Compare weighted versus unweighted coefficients
data.frame(
  Unweighted = coef(mnl_wtp_unweighted),
  Weighted   = coef(mnl_wtp_weighted)
)

# Section 5.10 - Predicting probabilities ----

# Predict probabilities and outcomes using estimated models.

# Predicted probabilities using the preferences space MNL model
probs <- predict(mnl_pref)
head(probs)

# Same predictions, but also returning the data
probs <- predict(mnl_pref, returnData = TRUE)
head(probs)

# Predict probabilities for a subset of alternatives
data <- subset(
  yogurt, obsID %in% c(42, 13),
  select = c("obsID", "alt", "price", "feat", "brand"))

probs_mnl_pref <- predict(
  mnl_pref,
  newdata = data,
  obsID = "obsID",
)

probs_mnl_pref

# Include a 95% confidence interval
set.seed(5678)

probs_mnl_pref <- predict(
  mnl_pref,
  newdata = data,
  obsID = "obsID",
  ci = 0.95
)

probs_mnl_pref

# Predict probabilities using WTP space model
set.seed(5678)

probs_mnl_wtp <- predict(
  mnl_wtp,
  newdata = data,
  obsID = "obsID",
  ci = 0.95
)

probs_mnl_wtp

# Section 5.11 - Predicting outcomes ----

# Predict outcomes using the estimated preference space model
set.seed(5678)

outcomes_pref <- predict(
  mnl_pref,
  type = "outcome",
  returnData = TRUE
)

head(outcomes_pref)

# Predict outcomes using the estimated WTP space model
set.seed(5678)

outcomes_wtp <- predict(
  mnl_wtp,
  type = "outcome",
  returnData = TRUE
)

head(outcomes_wtp)

# Compute accuracy of each model
chosen_pref <- subset(outcomes_pref, choice == 1)
chosen_pref$correct <- chosen_pref$choice == chosen_pref$predicted_outcome
accuracy_pref <- sum(chosen_pref$correct) / nrow(chosen_pref)
accuracy_pref

chosen_wtp <- subset(outcomes_wtp, choice == 1)
chosen_wtp$correct <- chosen_wtp$choice == chosen_wtp$predicted_outcome
accuracy_wtp <- sum(chosen_wtp$correct) / nrow(chosen_wtp)
accuracy_wtp

# Section 5.11 - Additional options ----

# Use the WTP values computed from preference space model as starting points in
# a WTP space model.

# First compute the WTP values from the mnl_pref model:
wtp_est <- wtp(mnl_pref, scalePar = "price")$Estimate

# Now use wtp_est as the starting values for the first iteration of the
# multi-start loop:
set.seed(5678)

mnl_wtp2 <- logitr(
  data      = yogurt,
  outcome   = "choice",
  obsID     = "obsID",
  pars      = c("feat", "brand"),
  scalePar  = "price",
  startVals = wtp_est,
  numMultiStarts = 10,
  numCores = 1
)


# 2) Replication of Figure 1 ----

# Figure 1 plots the coefficients from the next 6 models. Each model can
# take several minutes to estimate as each one involves a 30-iteration
# multi-start, which is used because some of the models (e.g., where
# price is log-normally distributed) can have trouble converging depending on
# the random starting points.

# Set up the data used to create the figure (copy of the yogurt data exported)
# from {logitr}
fig1_data <- logitr::yogurt

# Set the reference brand to "weight" for the fig1_data data frame
# This is done simply for stability purposes as some of the models
# converge better when "weight" is the reference level
brands <- c("weight", "hiland", "yoplait", "dannon")
fig1_data$brand <- factor(fig1_data$brand, levels = brands)

# Preference space mixed logit model with a normally-distributed brand
# parameter and a fixed price / scale parameter:
set.seed(1234)

mxl_pref_f <- logitr(
  data    = fig1_data,
  outcome = "choice",
  obsID   = "obsID",
  pars    = c("price", "brand"),
  randPars = c(brand = "n"),
  numMultiStarts = 30
)

# Preference space mixed logit model with a normally-distributed brand
# parameter and a normally-distributed price / scale parameter:
set.seed(1234)

mxl_pref_n <- logitr(
  data    = fig1_data,
  outcome = "choice",
  obsID   = "obsID",
  pars    = c("price", "brand"),
  randPars = c(price = "n", brand = "n"),
  numMultiStarts = 30
)

# Preference space mixed logit model with a normally-distributed brand
# parameter and a log-normally-distributed price / scale parameter.
# For this model, use negative of price since log-normal forces positivity:
fig1_data_neg_price <- fig1_data
fig1_data_neg_price$price <- -1*fig1_data$price

set.seed(1234)

mxl_pref_ln <- logitr(
  data    = fig1_data_neg_price,
  outcome = "choice",
  obsID   = "obsID",
  pars    = c("price", "brand"),
  randPars = c(price = "ln", brand = "n"),
  numMultiStarts = 30
)

# WTP space mixed logit model with a normally-distributed brand WTP
# parameter and a fixed price / scale parameter:
set.seed(1234)

mxl_wtp_f <- logitr(
  data     = fig1_data,
  outcome  = "choice",
  obsID    = "obsID",
  pars     = "brand",
  scalePar = "price",
  randPars = c(brand = "n"),
  numMultiStarts = 30
)

# WTP space mixed logit model with a normally-distributed brand WTP
# parameter and normally-distributed price / scale parameter:
set.seed(1234)

mxl_wtp_n <- logitr(
  data      = fig1_data,
  outcome   = "choice",
  obsID     = "obsID",
  pars      = "brand",
  scalePar  = "price",
  randPars  = c(brand = "n"),
  randScale = "n",
  numMultiStarts = 30
)

# WTP space mixed logit model with a normally-distributed brand WTP
# parameter and log-normally-distributed price / scale parameter:
set.seed(1234)

mxl_wtp_ln <- logitr(
  data      = fig1_data,
  outcome   = "choice",
  obsID     = "obsID",
  pars      = "brand",
  scalePar  = "price",
  randPars  = c(brand = "n"),
  randScale = "ln",
  numMultiStarts = 30
)

# Compare log-likelihood of each model to check that they all reached
# the same solution:
data.frame(
  pref_f  = logLik(mxl_pref_f),
  pref_n  = logLik(mxl_pref_n),
  pref_ln = logLik(mxl_pref_ln),
  wtp_f   = logLik(mxl_wtp_f),
  wtp_n   = logLik(mxl_wtp_n),
  wtp_ln  = logLik(mxl_wtp_ln)
)

# Generate simulated data from the estimated parameters using halton draws
pars_pref_f <- coef(mxl_pref_f)
pars_pref_n <- coef(mxl_pref_n)
pars_pref_ln <- coef(mxl_pref_ln)
pars_wtp_f <- coef(mxl_wtp_f)
pars_wtp_n <- coef(mxl_wtp_n)
pars_wtp_ln <- coef(mxl_wtp_ln)
N <- 10^4
draws <- randtoolbox::halton(N, normal = TRUE)
mu <- "brandyoplait"
sd <- "sd_brandyoplait"
pars <- tibble(
  alpha_f  = rep(pars_pref_f["price"], N),
  alpha_n  = pars_pref_n["price"] + draws*pars_pref_n["sd_price"],
  alpha_ln = exp(pars_pref_ln["price"] + draws*pars_pref_ln["sd_price"]),
  beta_f   = pars_pref_f[mu]  + draws*pars_pref_f[sd],
  beta_n   = pars_pref_n[mu]  + draws*pars_pref_n[sd],
  beta_ln  = pars_pref_ln[mu] + draws*pars_pref_ln[sd],
  omega_f  = pars_wtp_f[mu]   + draws*pars_wtp_f[sd],
  omega_n  = pars_wtp_n[mu]   + draws*pars_wtp_n[sd],
  omega_ln = pars_wtp_ln[mu]  + draws*pars_wtp_ln[sd]) %>%
  # Reshape simulated draws for plotting
  mutate(
    pref_f = beta_f / (-1*alpha_f),
    pref_n = beta_n / (-1*alpha_n),
    pref_ln = beta_ln / alpha_ln
  ) %>%
  select(starts_with(c("pref", "omega"))) %>%
  gather(key = "model", value = "wtp", everything()) %>%
  mutate(model = fct_recode(model,
    "pref_f" = "pref_f",
    "pref_n" = "pref_n",
    "pref_ln" = "pref_ln",
    "wtp_f" = "omega_f",
    "wtp_n" = "omega_n",
    "wtp_ln" = "omega_ln"
  )) %>%
  separate(model, into = c("space", "model"), sep = "_") %>%
  mutate(space = fct_recode(space,
    "Preference" = "pref",
    "WTP" = "wtp"
  )) %>%
  # Remove extreme values by dropping 0.1% and 99.9% quantiles
  group_by(space, model) %>%
  mutate(
    lower = quantile(wtp, 0.01),
    upper = quantile(wtp, 0.99)) %>%
  filter(wtp > lower, wtp < upper) %>%
  select(-lower, -upper)

# Create data frame for Figure 1 from estimated models
fig1data <- pars %>%
  # Add labels for facets
  left_join(data.frame(
    model = c("f", "n", "ln"),
    title = c(
      "A) Price Fixed",
      "B) Price Normally Distributed",
      "C) Price Log-normally Distributed"
    )
  )) %>%
  # Compute means and standard deviations
  group_by(space, model) %>%
  mutate(
    mean = mean(wtp),
    sd = sd(wtp),
    label = paste0(
      "Mean: ", round(mean, 2), "\nSD:     ", round(sd, 2))) %>%
  ungroup() %>%
  mutate(space = fct_relevel(space, c("Preference", "WTP")))

# Make plot
font <- "Fira Sans Condensed"
plotColors <- c("red", "grey42")
fig1 <- ggplot(fig1data) +
  geom_density(
    aes(x = wtp, y = ..density.., fill = space),
    color = "black", size = 0.1, alpha = 0.42) +
  geom_vline(
    mapping = aes(xintercept = mean, color = space),
    linetype = "dashed") +
  facet_wrap(vars(title), scales = "free") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  coord_cartesian(xlim = c(2, 8), ylim = c(0, 8)) +
  scale_fill_manual(
    values = plotColors,
    guide = guide_legend(reverse = TRUE)
  ) +
  scale_color_manual(values = plotColors, guide = "none") +
  theme_minimal_hgrid(font_family = font, font_size = 16) +
  panel_border() +
  labs(
    x = "WTP for Yoplait brand",
    y = "Density",
    fill = "Model space:") +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5, size = 14),
    axis.title.x = element_text(size = 13),
    axis.title.y = element_text(size = 13)) +
  geom_text(
    data = fig1data %>%
      group_by(space, model) %>%
      slice(1),
    mapping = aes(
      label = label,
      x = c(4.6, 5, 5.1, 2.8, 2.7, 2.7),
      y = 4,
      color = space),
    size = 5, family = font, hjust = 0.5)

if (!dir.exists("Figures"))
  dir.create("Figures")

# Save chart
ggsave("Figures/fig1.png", fig1, width = 13, height = 4.8)


# 2) Replication of Figure 2 (benchmarking) ----

# The code below compares the relative computation time to estimate
# a preference space mixed logit models using the
# {logitr}, {mixl}, {mlogit}, {gmnl}, and {apollo} packages
#
# The exact estimation times obtained will *always* vary for every run
# of each model, even when run on the same machine due to variations in
# background processes. As a result, the only way to perfectly reproduce
# Figure 2 in the manuscript is to use a data set of the run times from a
# specific run on a specific machine. To facilitate this, the code to estimate
# the models used to create Figure 2 was first run on the following publicly
# accessible Google colab notebook:
#
#    https://colab.research.google.com/drive/1vYlBdJd4xCV43UwJ33XXpO3Ys8xWkuxx?usp=sharing
#
# The run times were then exported as a csv and are included in the
# logitr package as a dataframe called "runtimes" that is loaded with the
# package. This data set is used in the code in this section to
# replicate Figure 2 in the manuscript.

# Visualize estimation times using "runtimes" data frame from Google
# colab notebook (Figure 2 in manuscript)

plotColors <- c("black", RColorBrewer::brewer.pal(n = 5, name = "Set1"), "gold")
fig2 <- logitr::runtimes %>%
  ggplot(aes(x = numDraws, y = time_sec, color = package)) +
  geom_line() +
  geom_point() +
  geom_text_repel(
    data = . %>% filter(numDraws == max(numDraws)),
    aes(label = package),
    hjust = 0, nudge_x = 40, direction = "y",
    size = 4.5, segment.size = 0
  ) +
  scale_x_continuous(
    limits = c(0, 1200),
    breaks = unique(logitr::runtimes$numDraws),
    labels = scales::comma) +
  scale_y_continuous(limits = c(0, 300), breaks = seq(0, 300, 100)) +
  scale_color_manual(values = plotColors) +
  guides(
    point = guide_legend(override.aes = list(label = "")),
    color = guide_legend(override.aes = list(label = ""))) +
  theme_bw(base_size = 18) +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),
    legend.position = "none",
    axis.line.x = element_blank(),
    axis.ticks.x = element_blank()
  ) +
  labs(
    x = "Number of random draws",
    y = "Computation time (seconds)"
  )

# Save chart
ggsave("Figures/fig2.png", fig2, width = 8, height = 5.5)


# 3) Replication of Table 1 ----

# Compute speed comparison tables using the "runtimes" data frame from
# the Google colab benchmark

logitr_time <- logitr::runtimes %>%
    filter(package == "logitr") %>%
    rename(time_logitr = time_sec)
temp <- logitr::runtimes %>%
    left_join(select(logitr_time, -package), by = "numDraws") %>%
    mutate(mult = round(time_sec/ time_logitr, 1)) %>%
    select(-time_logitr)
table_time <- temp %>%
    select(-mult) %>%
    pivot_wider(names_from = numDraws, values_from = time_sec)
table_mult <- temp %>%
    select(-time_sec) %>%
    pivot_wider(names_from = numDraws, values_from = mult)

# Print the tables
table_time
table_mult


# Fine

sessionInfo()