# Timing figure -----------------------------------------------------------

library("sdmTMB")
library("dplyr")
library("ggplot2")
library("INLA")
library("inlabru")
library("mgcv")
library("spaMM")
# install.packages("RSpectra") # for faster spaMM

dir.create("figs", showWarnings = FALSE)
dir.create("analysis", showWarnings = FALSE)

# using a non-optimized BLAS library will be considerably slower
is_optimized <- function() {
  m <- 1e4
  n <- 1e3
  k <- 3e2
  X <- matrix(rnorm(m * k), nrow = m)
  Y <- matrix(rnorm(n * k), ncol = n)
  s <- system.time(X %*% Y)
  s[[3]] < 0.5
}
(optimized_blas <- is_optimized())
if (!optimized_blas) {
  warning("Detected a non-optimized BLAS library. We recommend installing an optimized BLAS library for considerably faster fitting of TMB/sdmTMB models. There are additional details in the 'Installation' section of the sdmTMB README.md file. https://github.com/sdmTMB/sdmtmb?tab=readme-ov-file#installation")
}

# Set INLA to use 1 thread to match other package defaults:
INLA::inla.setOption(num.threads = "1:1")

# from remotes::install_github('seananderson/ggsidekick')
theme_sleek <- function(base_size = 11, base_family = "") {
  half_line <- base_size / 2
  theme_light(base_size = base_size, base_family = base_family) +
    theme(
      panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
      axis.ticks.length = grid::unit(half_line / 2.2, "pt"), strip.background = element_rect(
        fill = NA,
        colour = NA
      ), strip.text.x = element_text(colour = "grey30"),
      strip.text.y = element_text(colour = "grey30"), axis.text = element_text(colour = "grey30"),
      axis.title = element_text(colour = "grey30"), legend.title = element_text(
        colour = "grey30",
        size = rel(0.9)
      ), panel.border = element_rect(
        fill = NA,
        colour = "grey70", linewidth = 1
      ), legend.key.size = grid::unit(
        0.9,
        "lines"
      ), legend.text = element_text(
        size = rel(0.7),
        colour = "grey30"
      ), legend.key = element_rect(
        colour = NA,
        fill = NA
      ), legend.background = element_rect(
        colour = NA,
        fill = NA
      ), plot.title = element_text(
        colour = "grey30",
        size = rel(1)
      ), plot.subtitle = element_text(
        colour = "grey30",
        size = rel(0.85)
      )
    )
}
theme_set(theme_sleek())

# -------------------------------------------------------------------------
# Code in this chunk is from:
#
# Miller, D.L., Glennie, R. & Seaton, A.E. Understanding the Stochastic Partial
# Differential Equation Approach to Smoothing. JABES 25, 1–16 (2020).
# https://doi.org/10.1007/s13253-019-00377-z
#
# Re-used here under a Creative Commons Attribution 4.0 International License
# http://creativecommons.org/licenses/by/4.0/
smooth.construct.spde.smooth.spec <- function(object, data, knots) {
  dim <- length(object$term)
  if (dim > 2 | dim < 1) stop("SPDE Matern can only be fit in 1D or 2D.")
  if (dim == 1) {
    x <- data[[object$term]]
  } else {
    x <- matrix(0, nr = length(data[[1]]), nc = 2)
    x[, 1] <- data[[object$term[1]]]
    x[, 2] <- data[[object$term[2]]]
  }
  if (is.null(object$xt)) {
    if (dim == 1) {
      t <- seq(min(x), max(x), len = object$bs.dim)
      mesh <- INLA::inla.mesh.1d(loc = t, degree = 2, boundary = "free")
    } else {
      stop("For 2D, mesh must be supplied as argument xt$mesh in s(...,xt = )")
    }
  } else {
    if (!inherits(object$xt$mesh, "inla.mesh")) stop("xt must be NULL or an inla.mesh object")
    mesh <- object$xt$mesh
  }
  object$X <- as.matrix(INLA::inla.spde.make.A(mesh, loc = x))
  inlamats <- INLA::inla.mesh.fem(mesh)
  object$S <- list()
  object$S[[1]] <- as.matrix(inlamats$c1)
  object$S[[2]] <- 2 * as.matrix(inlamats$g1)
  object$S[[3]] <- as.matrix(inlamats$g2)
  object$L <- matrix(c(2, 2, 2, 4, 2, 0), ncol = 2)
  object$rank <- rep(object$bs.dim, 3)
  object$null.space.dim <- 0
  object$mesh <- mesh
  object$df <- ncol(object$X)
  class(object) <- "spde.smooth"
  return(object)
}
Predict.matrix.spde.smooth <- function(object, data) {
  dim <- length(object$term)
  if (dim > 2 | dim < 1) stop("SPDE Matern can only be fit in 1D or 2D.")
  if (dim == 1) {
    x <- data[[object$term]]
  } else {
    x <- matrix(0, nr = length(data[[1]]), nc = 2)
    x[, 1] <- data[[object$term[1]]]
    x[, 2] <- data[[object$term[2]]]
  }
  Xp <- INLA::inla.spde.make.A(object$mesh, x)
  return(as.matrix(Xp))
}
# End of code from
# Miller, D.L., Glennie, R. & Seaton, A.E. Understanding the Stochastic Partial
# Differential Equation Approach to Smoothing. JABES 25, 1–16 (2020).
# https://doi.org/10.1007/s13253-019-00377-z
#
# Re-used here under a Creative Commons Attribution 4.0 International License
# http://creativecommons.org/licenses/by/4.0/
# -------------------------------------------------------------------------

simulate_dat <- function(n_obs = 100,
                         cutoff = 0.1,
                         range = 0.5,
                         phi = 0.1,
                         tweedie_p = 1.5,
                         max.edge = 0.1,
                         family = gaussian(),
                         sigma_O = 0.2,
                         iter = 1,
                         plot = FALSE,
                         seed = sample.int(.Machine$integer.max, 1L)) {
  set.seed(seed)
  predictor_dat <- data.frame(
    X = runif(n_obs), Y = runif(n_obs),
    a1 = rnorm(n_obs)
  )

  loc.bnd <- matrix(c(0, 0, 1, 0, 1, 1, 0, 1), 4, 2, byrow = TRUE)
  segm.bnd <- inla.mesh.segment(loc.bnd)

  me <- INLA::inla.mesh.2d(
    boundary = segm.bnd,
    max.edge = c(max.edge, 0.2),
    offset = c(0.1, 0.05)
  )

  mesh <- make_mesh(predictor_dat, xy_cols = c("X", "Y"), mesh = me)
  sim_dat <- sdmTMB_simulate(
    formula = ~ 1 + a1,
    data = predictor_dat,
    mesh = mesh,
    family = family,
    range = range,
    phi = phi,
    tweedie_p = tweedie_p,
    sigma_O = sigma_O,
    seed = seed,
    B = c(0.2, -0.4) # B0 = intercept, B1 = a1 slope
  )
  if (plot) {
    g <- ggplot(sim_dat, aes(X, Y, colour = observed)) +
      geom_point() +
      scale_color_gradient2()
    print(g)
  }
  list(mesh = mesh, dat = sim_dat)
}

sim_fit_time <- function(n_obs = 1000, cutoff = 0.1, iter = 1, phi = 0.3, tweedie_p = 1.5,
                         seed = sample.int(.Machine$integer.max, 1), family = gaussian(), sigma_O = 0.2,
                         max.edge = 0.1) {
  cat("max.edge:", max.edge, "\n")
  cat("n_obs:", n_obs, "\n")
  cat("iter:", iter, "\n")
  cat("simulating...\n")

  s <- simulate_dat(
    n_obs = n_obs, cutoff = cutoff, seed = seed,
    iter = iter, family = family, max.edge = max.edge, tweedie_p = tweedie_p, phi = phi,
    sigma_O = sigma_O
  )
  sim_dat <- s$dat
  mesh <- s$mesh
  times <- list()
  cat("fitting...\n")

  cat("sdmTMB\n")
  out <- system.time({
    fit <- sdmTMB(
      observed ~ a1,
      data = sim_dat,
      mesh = mesh,
      family = family,
      control = sdmTMBcontrol(newton_loops = 0L, multiphase = FALSE),
      priors = sdmTMBpriors(matern_s = pc_matern(range_gt = 0.05, sigma_lt = 2))
    )
  })
  times$sdmTMB <- out[["elapsed"]]

  cat("inlabru EB\n")

  dat <- s$dat
  dat_sp <- sp::SpatialPointsDataFrame(cbind(dat$X, dat$Y),
    proj4string = sp::CRS("+proj=aea +lat_0=45 +lon_0=-126 +lat_1=50 +lat_2=58.5 +x_0=1000000
  +y_0=0 +datum=NAD83 +units=km +no_defs"), data = dat
  )
  loc.bnd <- matrix(c(0, 0, 1, 0, 1, 1, 0, 1), 4, 2, byrow = TRUE)
  segm.bnd <- INLA::inla.mesh.segment(loc.bnd)
  mesh <- INLA::inla.mesh.2d(
    boundary = segm.bnd,
    max.edge = c(max.edge, 0.2),
    offset = c(0.1, 0.05)
  )

  spde <- INLA::inla.spde2.pcmatern(
    mesh = mesh,
    prior.range = c(0.05, 0.05),
    prior.sigma = c(2, 0.05)
  )

  components <- observed ~ -1 + Intercept(1) + a1 +
    spatrf(main = coordinates, model = spde)

  dat_sp$observed <- dat_sp$observed * 10 # help convergence
  if (family$family == "gaussian") {
    like <- like(observed ~ Intercept + a1 + spatrf, family = "gaussian", data = dat_sp)
  } else if (family$family == "tweedie") {
    like <- like(observed ~ Intercept + a1 + spatrf, family = "tweedie", data = dat_sp)
  } else {
    stop("Family not found")
  }

  out <- system.time({
    tryCatch(
      {
        fit_bru <- bru(
          like,
          components = components,
          options = bru_options(
            control.inla = list(int.strategy = "eb", strategy = "gaussian"),
            bru_max_iter = 1, num.threads = "1:1"
          )
        )
      },
      error = function(e) NA
    )
  })
  if (!is.null(fit_bru$error)) {
    inla_eb_error <- TRUE
  } else {
    inla_eb_error <- FALSE
  }
  times$inlabru_eb <- out[["elapsed"]]

  cat("spaMM\n")

  loc.bnd <- matrix(c(0, 0, 1, 0, 1, 1, 0, 1), 4, 2, byrow = TRUE)
  segm.bnd <- INLA::inla.mesh.segment(loc.bnd)
  mesh2 <- INLA::inla.mesh.2d(
    boundary = segm.bnd,
    max.edge = c(max.edge, 0.2),
    offset = c(0.1, 0.05)
  )
  # note `<<-` below:
  # Avoids: `Hmmm... it looks like you should put some object(s) in control.HLfit$formula_env`
  spde_spaMM <<- INLA::inla.spde2.pcmatern(
    mesh = mesh2,
    prior.range = c(0.05, 0.05),
    prior.sigma = c(2, 0.05)
  )
  out <- system.time({
    fit_spaMM <- tryCatch(
      {
        fitme(observed ~ a1 + IMRF(1 | X + Y, model = spde_spaMM),
          family = family, data = sim_dat
        )
      },
      error = function(e) NA
    )
  })
  times$spaMM <- if (all(is.na(fit_spaMM))) NA else out[["elapsed"]]

  cat("mgcv disc.\n")
  loc.bnd <- matrix(c(0, 0, 1, 0, 1, 1, 0, 1), 4, 2, byrow = TRUE)
  segm.bnd <- inla.mesh.segment(loc.bnd)
  mesh2 <- INLA::inla.mesh.2d(
    boundary = segm.bnd,
    max.edge = c(max.edge, 0.2),
    offset = c(0.1, 0.05)
  )
  out <- system.time({
    fit <- tryCatch(
      {
        bam(
          observed ~ a1 + s(X, Y,
            bs = "spde", k = mesh2$n,
            xt = list(mesh = mesh2)
          ),
          data = sim_dat,
          family = family,
          method = "fREML",
          control = gam.control(scalePenalty = FALSE), discrete = TRUE
        )
      },
      error = function(e) NA
    )
  })
  times$mgcv_disc <- if (all(is.na(fit))) NA else out[["elapsed"]]

  out <- as_tibble(times)
  out$cutoff <- cutoff
  out$n_obs <- as.integer(n_obs)
  out$iter <- as.integer(iter)
  out$seed <- seed
  out$mesh_n <- as.integer(mesh$n)
  out$max.edge <- max.edge
  out$inla_eb_error <- inla_eb_error
  # out$inla_error <- inla_error
  out
}

# fast run as an example:
to_run <- expand.grid(
  n_obs = c(1e3),
  max.edge = c(0.075, 0.1, 0.15, 0.2),
  iter = seq_len(2L)
)

# full (slow) run to match paper:
if (FALSE) {
  iter <- c(25, 25, 25)

  to_run <- expand.grid(
    n_obs = c(1e3),
    max.edge = c(0.06, 0.075, 0.1, 0.15, 0.2),
    iter = seq_len(iter[1])
  )
  to_run_bigger_data <- expand.grid(
    n_obs = c(1e4),
    max.edge = c(0.06, 0.075, 0.1, 0.15, 0.2),
    iter = seq_len(iter[2])
  )
  to_run_biggish_data <- expand.grid(
    n_obs = c(1e5),
    max.edge = c(0.06, 0.075, 0.1, 0.15, 0.2),
    iter = seq_len(iter[3])
  )
  to_run <- bind_rows(to_run, to_run_bigger_data, to_run_biggish_data)
  # randomize for more accurate progress bar:
  set.seed(1)
  to_run <- to_run[sample(seq_len(nrow(to_run)), size = nrow(to_run), replace = FALSE), ]
}

to_run$seed <- to_run$iter * 29212
nrow(to_run)

# don't run in parallel to ensure all timing is comparable:
system.time({
  out <- purrr::pmap_dfr(
    to_run,
    sim_fit_time,
    .progress = "timing"
  )
})

out_long <- tidyr::pivot_longer(
  out,
  sdmTMB:mgcv_disc,
  names_to = "model",
  values_to = "time"
)
clean_names <- tribble(
  ~model, ~model_clean,
  "sdmTMB", "sdmTMB",
  "inlabru_eb", "inlabru EB",
  "spaMM", "spaMM",
  "mgcv_disc", "mgcv::bam\n(discretize = TRUE) SPDE"
)
clean_names$model_clean <- factor(clean_names$model_clean, levels = clean_names$model_clean)
out_long <- left_join(out_long, clean_names)
out_long <- out_long |>
  group_by(model_clean, cutoff) |>
  mutate(mean_mesh_n = paste0("Mesh n = ", round(mean(mesh_n))))
out_long$mean_mesh_n <- as.factor(out_long$mean_mesh_n)
out_long$mean_mesh_n <- forcats::fct_reorder(out_long$mean_mesh_n, -out_long$cutoff)
out_long_sum <- group_by(out_long, model_clean, n_obs, cutoff, max.edge) |>
  summarise(lwr = quantile(time, probs = 0.1, na.rm = TRUE), upr = quantile(time, probs = 0.9, na.rm = TRUE), time = mean(time, na.rm = TRUE), mean_mesh_n = mean(mesh_n, na.rm = TRUE), .groups = "drop")

cols <- RColorBrewer::brewer.pal(6, "Dark2")
colour_vect <- c(
  "sdmTMB" = cols[1],
  "inlabru EB" = cols[3],
  "spaMM" = cols[4],
  "mgcv::bam\n(discretize = TRUE) SPDE" = cols[2]
)

gg <- out_long_sum |>
  mutate(n_obs = paste0(formatC(n_obs, format = "d", big.mark = ","), " rows of data")) |>
  mutate(n_obs = gsub("^1,0", "(a) 1,0", n_obs)) |>
  mutate(n_obs = gsub("^10,0", "(b) 10,0", n_obs)) |>
  mutate(n_obs = gsub("^100,0", "(c) 100,0", n_obs)) |>
  ggplot(aes(mean_mesh_n, time, colour = model_clean)) +
  geom_ribbon(aes(ymin = lwr, ymax = upr, fill = model_clean),
    alpha = 0.2,
    colour = NA
  ) +
  geom_line(lwd = 0.7, na.rm = TRUE) +
  scale_x_log10(breaks = c(250, 500, 1000)) +
  theme_sleek() +
  theme(panel.grid.major = element_line(colour = "grey90")) +
  theme(legend.position.inside = c(0.4, 0.85)) +
  scale_fill_manual(values = colour_vect) +
  scale_colour_manual(values = colour_vect) +
  labs(y = "Time (s)", x = "Mesh nodes", colour = "Model", fill = "Model") +
  coord_cartesian(expand = FALSE, ylim = c(0.08, 14)) +
  theme(panel.spacing.x = unit(20, "pt"), legend.position = "top") +
  facet_grid(~n_obs)
print(gg)
# ggsave("timing-spatial-multipanel.pdf", width = 7, height = 3)

gg <- gg + scale_y_log10() +
  scale_x_log10() +
  coord_cartesian(expand = FALSE, ylim = c(0.08, 17))
print(gg)
# ggsave("timing-spatial-multipanel-log.pdf", width = 7, height = 3)

sessionInfo()