## ----setup, echo = TRUE, message = FALSE-----------------------------------
library("sensobol")
library("tidyverse")
library("data.table")
library("ggplot2")

theme_AP <- function() {
  theme_bw() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          legend.background = element_rect(fill = "transparent", color = NA),
          legend.key = element_rect(fill = "transparent", color = NA),
          strip.background = element_rect(fill = "white"), 
          legend.position = "top")
}

## ----binned_mean, cache=TRUE, echo = TRUE, fig.height=2, fig.cap = "Scatterplot of $y$ against $x_i$, $i=1,2,3$. The red dots show the mean $y$ value in each bin (we have set the number of bins arbitrarely at 30), and $N=2^{10}$. The model is the polynomial function in \\cite{Becker2014}, where $y=3 x_1 ^ 2 + 2 x_1 x_2 - 2 x_3$, $x_i \\sim \\mathcal{U}(0,1)$."----

poli <- function(X1, X2, X3) 3 * X1^2 + 2 * X1 * X2 - 2 * X3
poli_fun <- function(X) return(mapply(poli, X[, 1], X[, 2], X[, 3]))

set.seed(2)
N <- 2^10
params <- paste("$x_", 1:3, "$", sep = "")
mat <- sobol_matrices(N = N, params = params)
Y <- poli_fun(mat)

data.table(cbind(mat, Y)) %>%
  .[1:N] %>%
  melt(., measure.vars = params) %>%
  ggplot(., aes(value, Y)) + geom_point(size = 0.2) + stat_summary_bin(fun = "mean",
  geom = "point", colour = "red", size = 0.7) + facet_wrap(~variable) + labs(x = "$x$",
  y = "$y$") + theme_AP()


## ----ishi_plot, cache=TRUE, dependson="binned_mean", echo = TRUE, fig.height=2, fig.cap = "Scatterplot of $y$ against $x_i$, $i=1,2,3$. The red dots show the mean $y$ value in each bin (we have set the number of bins arbitrarely at 30), and $N=2^{10}$. The model is the \\cite{Ishigami1990} function."----

# PLOT ISHIGAMI FUNCTION TO ILLUSTRATE Ti ----------------------------------------

Y <- ishigami_Fun(mat)

data.table(cbind(mat, Y)) %>%
  .[1:N] %>%
  melt(., measure.vars = params) %>%
  ggplot(., aes(value, Y)) + geom_point(size = 0.2) + stat_summary_bin(fun = "mean",
  geom = "point", colour = "red", size = 0.7) + facet_wrap(~variable) + labs(x = "$x$",
  y = "$y$") + theme_AP()


## ----visualization_matrices, echo = TRUE, dependson=c("theme", "settings"), fig=TRUE, fig.height=2, cache=TRUE, fig.cap = "Sampling methods. Each dot is a sampling point. $N=2^{10}$."----

N <- 2^10
params <- paste("X", 1:3, sep = "")
type <- c("QRN", "LHS", "R")
set.seed(2)
prove <- lapply(type, function(type) sobol_matrices(N = N, params = params, type = type))
names(prove) <- type
lapply(prove, data.table) %>%
  lapply(., function(x) x[1:N]) %>%
  rbindlist(., idcol = "Method") %>%
  ggplot(., aes(X1, X2)) + geom_point(size = 0.2) + facet_wrap(~Method) + labs(x = "$x_1$",
  y = "$x_2$") + theme_AP()


## ----settings_sobolg, cache=TRUE-------------------------------------------
N <- 2^10
k <- 8
params <- paste("$x_", 1:k, "$", sep = "")
R <- 10^3
type <- "norm"
conf <- 0.95


## ----matrix_sobolg, cache=TRUE, dependson='settings_sobolg'----------------
set.seed(2)
mat <- sobol_matrices(N = N, params = params)


## ----model_sobolg, cache=TRUE, dependson='matrix_sobolg'-------------------
y <- sobol_Fun(mat)


## ----unc_sobolg, cache=TRUE, dependson="model_sobolg", fig.height=2, fig.cap="Empirical distribution of the Sobol' G model output.", message = FALSE----
plot_uncertainty(Y = y, N = N) + labs(y = "Counts", x = "$y$")


## ----scatter_sobolg, cache=TRUE, dependson=c("model_sobolg", "matrix_sobolg", "settings_sobolg"), fig.height=5, fig.cap="Scatter plots of model inputs against the model output for the Sobol' G function."----
plot_scatter(data = mat, N = N, Y = y, params = params)


## ----multiscatter_sobolg, cache=TRUE, dependson=c("model_sobolg", "matrix_sobolg", "settings_sobolg"), fig.height=5, fig.cap="Scatter plot matrix of pairs of model inputs for the Sobol' G function. The topmost and bottommost label facets refer to the $x$ and the $y$ axis respectively."----

plot_multiscatter(data = mat, N = N, Y = y, params = paste("$x_", 1:4, "$", sep = ""))


## ----indices_sobolg, cache=TRUE, dependson='model_sobolg'------------------

set.seed(2)
ind <- sobol_indices(Y = y, N = N, params = params, boot = TRUE, R = R, type = type,
  conf = conf)


## ----print_sobolg, cache=TRUE, dependson='indices_sobolg'------------------

cols <- colnames(ind$results)[1:5]
ind$results[, `:=`((cols), round(.SD, 3)), .SDcols = (cols)]
ind


## ----dummy_sobolg, cache=TRUE, dependson='indices_sobolg'------------------
set.seed(2)
ind.dummy <- sobol_dummy(Y = y, N = N, params = params, boot = TRUE, R = R)


## ----plot_indices_sobolg, cache=TRUE, dependson="indices_sobolg", fig.height=2.5, fig.cap="Sobol' indices of the Sobol' G function."----
plot(ind, dummy = ind.dummy)


## ----dynamics_population, cache=TRUE, echo = TRUE, fig.cap="Dynamics of the logistic population growth model for $N_0=3$, $r = 0.6$ and $K=100$.", fig.height=2.5----
X <- 3
r <- 0.6
K <- 100
for (i in 1:20) X[i + 1] <- X[i] + r * X[i] * (1 - X[i]/K)

dt <- data.table(X)[, `:=`(t, 0:20)]

ggplot(dt, aes(t, X)) + geom_line() + labs(x = "$t$", y = "$N$") + theme_bw() + theme(panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(), legend.background = element_rect(fill = "transparent",
    color = NA), legend.key = element_rect(fill = "transparent", color = NA))


## ----settings_population, cache=TRUE---------------------------------------
N <- 2^13
params <- c("$r$", "$K$", "$N_0$")
matrices <- c("A", "B", "AB", "BA")
first <- total <- "azzini"
order <- "second"
R <- 10^3
type <- "percent"
conf <- 0.95


## ----model_population, cache=TRUE------------------------------------------
population_growth <- function(r, K, X0) {
  X <- X0
  for (i in 0:20) X <- X + r * X * (1 - X/K)
  return(X)
}


## ----model_population_mapply, cache=TRUE, dependson='model:population'-----
population_growth_run <- function(dt) {
  return(mapply(population_growth, dt[, 1], dt[, 2], dt[, 3]))
}


## ----matrix_population, cache=TRUE, dependson='settings_population'--------
set.seed(2)
mat <- sobol_matrices(matrices = matrices, N = N, params = params, order = order,
  type = "LHS")


## ----transform_matrix_population, cache=TRUE, dependson="matrix_population"----
mat[, "$r$"] <- qnorm(mat[, "$r$"], 1.7, 0.3)
mat[, "$K$"] <- qnorm(mat[, "$K$"], 40, 1)
mat[, "$N_0$"] <- qunif(mat[, "$N_0$"], 10, 50)


## ----run_model_population, cache=TRUE, dependson=c("transform_matrix_population", "model_population_mapply", "model_population")----
y <- population_growth_run(mat)


## ----unc_population, cache=TRUE, dependson="run_model_population", fig.height=2, fig.cap="Empirical distribution of the logistic population growth model output.", message=FALSE----
plot_uncertainty(Y = y, N = N) + labs(y = "Counts", x = "$y$")


## ----quantiles, cache=TRUE, dependson='run_model_population'---------------
quantile(y, probs = c(0.01, 0.025, 0.5, 0.975, 0.99, 1))


## ----scatter_population, cache=TRUE, dependson=c("model_population", "matrix_population", "settings_population"), fig.height=3, fig.cap="Hexbin plot of model inputs against the model output for the population growth model."----
plot_scatter(data = mat, N = N, Y = y, params = params, method = "bin")


## ----multiscatter_population, cache=TRUE, dependson=c("model_population", "matrix_population", "settings_population"), fig.height=3, fig.cap="Scatterplot matrix of pairs of model inputs for the population growth model."----
plot_multiscatter(data = mat, N = N, Y = y, params = params, smpl = 2^11)


## ----indices_population, cache=TRUE, dependson=c("run_model_population", "settings_population")----
set.seed(2)
ind <- sobol_indices(matrices = matrices, Y = y, N = N, params = params, first = first,
  total = total, order = order, boot = TRUE, R = R, parallel = "no", type = type,
  conf = conf)


## ----print_population, cache=TRUE, dependson='indices_population'----------
cols <- colnames(ind$results)[1:5]
ind$results[, (cols):= round(.SD, 3), .SDcols = (cols)]
ind


## ----dummy_population, cache=TRUE, dependson=c("run_model_population", "indices_population")----
set.seed(2)
ind.dummy <- sobol_dummy(Y = y, N = N, params = params, boot = TRUE, R = R)


## ----plot_indices_population, cache=TRUE, dependson="indices_population", fig.height=3, fig.cap = "First and total-order Sobol' indices of the population growth model."----
plot(ind, dummy = ind.dummy)


## ----plot_indices2_population, cache=TRUE, dependson="indices_population", fig.height=2, fig.cap = "Second-order Sobol' indices."----
plot(ind, order = "second")


## ----settings_budworm, cache=TRUE------------------------------------------
N <- 2^9
params <- c("r_b", "K", "beta", "alpha", "r_s", "K_s", "K_e", "r_e", "P", "T_e")
order <- "first"
R <- 10^3
type <- "norm"
conf <- 0.95
times <- seq(0, 150, 1)
timeOutput <- seq(25, 150, 25)


## ----model_budworm, cache=TRUE---------------------------------------------
budworm_fun <- function(t, state, parameters) {
  with(as.list(c(state, parameters)), {
    dB <- r_b * B * (1 - B/(K * S) * (T_e^2 + E^2)/E^2) - beta * B^2/((alpha^S)^2 +
      B^2)
    dS <- r_s * S * (1 - (S * K_e)/(E * K_s))
    dE <- r_e * E * (1 - E/K_e) - P * (B/S) * E^2/(T_e^2 + E^2)
    list(c(dB, dS, dE))
  })
}


## ----matrix_budworm, cache=TRUE, dependson='settings_budworm'--------------
set.seed(2)
mat <- sobol_matrices(N = N, params = params, order = order)


## ----transform_matrix_budworm, cache=TRUE, dependson='matrix_budworm'------
mat[, "r_b"] <- qunif(mat[, "r_b"], 1.52, 1.6)
mat[, "K"] <- qunif(mat[, "K"], 100, 355)
mat[, "beta"] <- qunif(mat[, "beta"], 20000, 43200)
mat[, "alpha"] <- qunif(mat[, "alpha"], 1, 2)
mat[, "r_s"] <- qunif(mat[, "r_s"], 0.095, 0.15)
mat[, "K_s"] <- qunif(mat[, "K_s"], 24000, 25440)
mat[, "K_e"] <- qunif(mat[, "K_e"], 1, 1.2)
mat[, "r_e"] <- qunif(mat[, "r_e"], 0.92, 1)
mat[, "P"] <- qunif(mat[, "P"], 0.0015, 0.00195)
mat[, "T_e"] <- qunif(mat[, "T_e"], 0.7, 0.9)


## ----plot_dynamics_budworm, cache=TRUE, echo = TRUE, fig=TRUE, dependson="transform_matrix_budworm", fig.height=2, fig.cap = "Dynamics of the spruce budworm and forest model. The vertical, dashed lines mark the times at which we will conduct the sensitivity analysis. Initial state values: $B=1,S=0.07,E=1$. The parameter values are the mean values of the distributions shown in Table 6."----
y.diff <- data.table(deSolve::ode(y = c(B = 0.1, S = 0.07, E = 1), times = seq(0,
  200, 1), func = budworm_fun, parms = colMeans(mat)))

melt(y.diff, measure.vars = c("B", "S", "E")) %>%
  ggplot(., aes(time, value)) + geom_line(size = 1) + geom_vline(xintercept = timeOutput,
  lty = 2) + labs(x = expression(italic(t)), y = "Value") + facet_wrap(~variable,
  scales = "free_y") + theme_AP()


## ----parallel_budworm, cache=TRUE------------------------------------------
library("foreach")
library("parallel")
library("doParallel")


## ----run_model_budworm, cache=TRUE, dependson=c("transform_matrix_budworm", "model_budworm"), message = FALSE----
n.cores <- makeCluster(floor(detectCores() * 0.75))
registerDoParallel(n.cores)
y <- foreach(i = 1:nrow(mat), .combine = "rbind", .packages = "sensobol") %dopar%
  {
    sobol_ode(d = mat[i, ], times = times, timeOutput = timeOutput, state = c(B = 0.1,
      S = 7, E = 1), func = budworm_fun)
  }
stopCluster(n.cores)


## ----arrange_output_budworm, cache=TRUE, dependson='run_model_budworm'-----
full.dt <- data.table(y)
print(full.dt)


## ----melt_budworm, cache=TRUE, dependson='arrange_output_budworm'----------
indices.dt <- melt(full.dt, measure.vars = c("B", "S", "E"))
print(indices.dt)


## ----new_params_budworm, cache=TRUE, echo = TRUE----------------------------
params <- c("$r_B$", "$K$", "$\\beta$", "$\\alpha$", "$r_S$", "$K_S$", "$K_E$", "$r_E$",
  "$P$", "$T_E$")


## ----sobol_budworm, cache=TRUE, dependson=c('melt_budworm',
## 'new_params_budworm')----
ncpus <- floor(detectCores() * 0.75)
set.seed(2)
indices <- indices.dt[, sobol_indices(Y = value, N = N, params = params, order = order,
  boot = TRUE, first = "jansen", R = R, parallel = "multicore", ncpus = ncpus)$results,
  .(variable, time)]


## ----dummy_sensobol, cache=TRUE, dependson='sobol_budworm'-----------------
indices.dummy <- indices.dt[, sobol_dummy(Y = value, N = N, params = params), .(variable,
  time)]


## ----plot_sobol_budworm_t, cache=TRUE, dependson='sobol_budworm',fig.height=7, fig.cap='Evolution of Sobol' indices through time in the spruce budworm and forest model. The dashed, horizontal blue line shows the $T_i$ of the dummy parameter.'----
ggplot(indices, aes(time, original, fill = sensitivity, color = sensitivity, group = sensitivity)) +
  geom_line() +
  geom_ribbon(aes(ymin = indices[sensitivity %in% c("Si", "Ti")]$low.ci,
    ymax = indices[sensitivity %in% c("Si", "Ti")]$high.ci,
    color = sensitivity), alpha = 0.1, linetype = 0) +
  geom_hline(data = indices.dummy[, parameters:= NULL][sensitivity == "Ti"],
    aes(yintercept = original, color = sensitivity, group = time),
    lty = 2, size = 0.1) +
  guides(linetype = FALSE, color = FALSE) +
  facet_grid(parameters ~ variable) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
  labs(x = expression(italic(t)), y = "Sobol' indices") +
  theme_AP() + theme(legend.position = "top")


## ----benchmark_packages, cache=TRUE----------------------------------------
library("microbenchmark")


## ----benchmark_settings, cache=TRUE----------------------------------------
N <- 2^11
parameters <- c("N", "k")
R <- 10^2


## ----benchmark_matrix, cache=TRUE, dependson='benchmark_settings'----------
set.seed(2)
dt <- sobol_matrices(matrices = "A", N = N, params = parameters)
dt[, 1] <- floor(qunif(dt[, 1], 10, 10^2 + 1))
dt[, 2] <- floor(qunif(dt[, 2], 3, 100))


## ----benchmark_model, cache=TRUE, dependson='benchmark_matrix'-------------
n.cores <- makeCluster(floor(detectCores() * 0.75))
registerDoParallel(n.cores)
set.seed(2)
y <- foreach(i = 1:nrow(dt), .packages = c("sensobol", "sensitivity")
  ) %dopar% {
    params <- paste("x", 1:dt[i, "k"], sep = "")
    N <- dt[i, "N"]
    out <- microbenchmark::microbenchmark(
       "sensobol" = {
         params <- paste("X", 1:length(params), sep = "")
         mat <- sensobol::sobol_matrices(N = N, params = params, type = "R")
         y <- sensobol::metafunction(mat)
         ind <- sensobol::sobol_indices(Y = y, N = N, params = params,
           first = "jansen", total = "jansen", boot = TRUE, R = R)$results},
      "sensitivity" = {
        X1 <- data.frame(matrix(runif(length(params) * N), nrow = N))
        X2 <- data.frame(matrix(runif(length(params) * N), nrow = N))
        x <- sensitivity::soboljansen(model = sensobol::metafunction, 
          X1, X2, nboot = R)},
      times = 1)
  }
stopCluster(n.cores)


## ----benchmark_arrange, cache=TRUE, dependson='benchmark_model'------------
out <- rbindlist(y)[, time := time / 1e+06]


## ----plot_benchmark, cache=TRUE, dependson="benchmark", echo = TRUE, fig=TRUE, fig.height=2, fig.cap="Benchmark of the sensitivity and sensobol packages. The comparison has been done with the Jansen estimators."----
ggplot(out, aes(time, expr)) + geom_violin() + labs(x = "Time (Milliseconds)", y = "") +
  theme_AP()


## ----benchmark_time, cache=TRUE, dependson='benchmark_arrange'-------------
out[, median(time), expr]


## ----vars_settings, cache=TRUE---------------------------------------------
star.centers <- 100
h <- 0.1
params <- paste("X", 1:8, sep = "")


## ----vars_matrix, cache=TRUE, dependson = 'vars_settings'------------------
set.seed(2)
mat <- vars_matrices(star.centers = star.centers, h = h, params = params)


## ----vars_model, cache=TRUE, dependson='vars_matrix'-----------------------
y <- sobol_Fun(mat)


## ----vars_to, cache=TRUE, dependson='vars_model'---------------------------
ind <- vars_to(Y = y, star.centers = star.centers, params = params, h = h)
ind