## Note that in particular the simulation results may differ with
## linear algebra packages used.
## The sessionInfo() output for the results reported in the manuscript
## is provided commented out at the end of the script.

## load evgam
library("evgam")

## COprcp data
# The data can be loaded and conjoined with the metadata using
data("COprcp", package = "evgam")
COprcp <- cbind(COprcp, COprcp_meta[COprcp$meta_row, ])

# A plot of gridded elevations can be obtained with
brks <- pretty(COelev$z, 50)
cols <- hcl.colors(length(brks) - 1, "YlOrRd", rev = TRUE)
image(COelev, breaks = brks, col = cols)
# and station elevations can be superimposed 
# using 'colplot()' with
colplot(COprcp_meta$lon, COprcp_meta$lat, COprcp_meta$elev, 
        breaks = brks, add = TRUE)

# Before fitting any models, a data.frame for plotting
# called COprcp_plot is created using
COprcp_plot <- expand.grid(lon = COelev$x, lat = COelev$y)
COprcp_plot$elev <- as.vector(COelev$z)

## GEV model
# Create a data.frame comprising annual maxima at each 
# station, which can be done with the following since
# date is of class "Date"
COprcp$year <- format(COprcp$date, "%Y")
COprcp_gev <- aggregate(prcp ~ year + meta_row, COprcp, max)

# Then add the metadata
COprcp_gev <- cbind(COprcp_gev, COprcp_meta[COprcp_gev$meta_row, ])

# Set the formulae for the smooths for all GEV 
# parameters with
fmla_gev <- list(prcp ~ s(lon, lat, k = 30) + s(elev, bs = "cr"), 
  ~ s(lon, lat, k = 20), ~ 1)
# and then the model can be fitted with
m_gev <- evgam(fmla_gev, COprcp_gev, family = "gev")
# summarised with
summary(m_gev)
# and plotted with
plot(m_gev)

# Predictions based on the gridded elevation data
# can be obtained and the first few and viewed with
gev_pred <- predict(m_gev, COprcp_plot, type = "response")
head(gev_pred)

# Each estimated GEV parameter can be plotted with
# image() using
for (i in 1:2) {
  plot.list <- COelev
  plot.list$z[] <- gev_pred[, i]
  image(plot.list, asp = 1)
  title(paste("GEV", names(gev_pred)[i]))
}

# Then 100-year return level estimates can be
# obtained with
gev_rl100 <- predict(m_gev, COprcp_plot, prob = 0.99)
head(gev_rl100)

# Finally a map of the 100-year return level
# estimate can be obtained with
rl100 <- COelev
rl100$z[] <- gev_rl100[, 1]
image(rl100, asp = 1)
title("100-year return level")

## GPD model
# Set the threshold that defines an exceedance
threshold <- 11.4
# and then calculate the threshold excesses
COprcp$excess <- COprcp$prcp - threshold
is.na(COprcp$excess[COprcp$excess <= 0]) <- TRUE

# Set the formulae for the smooths of the two GPD
# parameters with
fmla_gpd <- list(excess ~ s(lon, lat, k = 20) + s(elev, bs = "cr"), 
  ~ s(lon, lat, k = 15))
# and then the model can be fitted with
m_gpd <- evgam(fmla_gpd, COprcp, family = "gpd")

## Poisson-GPD model
# Specify the point process model arguments for the
# numerical integration
pp_args <- list(id = "id", ny = 30, r = 45)
# and then the model can be fitted with
m_pp <- evgam(fmla_gev, COprcp, family = "pp", pp.args = pp_args)

## Censored data
# Set the coarseness of the data
delta <- 2.5
# and the set the censoring interval's lower limit
COprcp$lo <- pmax(COprcp$excess - delta, 1e-6)
# and upper limit
COprcp$hi <- COprcp$excess + delta

# Specify the GPD model's smooths, in particular with
# a tensor product to capture spatial variation
fmla_gpd_cens <- list(cens(lo, hi) ~ te(lon, lat, k = c(6, 8)) +
  s(elev, bs = "cr"), ~ te(lon, lat, k = c(6, 8)))
# and then fit the model
m_gpd_cens <- evgam(fmla_gpd_cens, COprcp, family = "gpd")
# and then plot it
plot(m_gpd_cens)

## Nonstationary modeling with monthly maxima

# Load the Fort Collins daily maximum tempereatures
data("FCtmax", package = "evgam")
# then add years and months of data to the data.frame
FCtmax$year <- format(FCtmax$date, "%Y")
FCtmax$month <- format(FCtmax$date, "%m")
# and then calculate monthly maxima
FCtmax_mnmax <- aggregate(tmax ~ year + month, FCtmax, max)

# Split the data.frame into separate monthly
# data.frames
FCtmax_mn <- split(FCtmax_mnmax, FCtmax_mnmax$month)
# and the fit a homogenous GEV model, specified with
fmla_simple <- list(tmax ~ 1, ~ 1, ~ 1)
# to each data.frame with
gev_fits <- lapply(FCtmax_mn, evgam, formula = fmla_simple, 
  family = "gev")
# Each model's coefficients can be extracted with
gev_pars <- sapply(gev_fits, coef)

# Set the weights for each model, according to days per month
weights <- (1/365.25) * c(31, 28.25, 30)[c(1, 2, 1, 3, 1, 3, 1, 1, 3, 1, 
  3, 1)]

# Then calculate the 100-year return level, using 
# formula (3) through qev()
rl_100_gev <- qev(0.99, gev_pars[1, ], exp(gev_pars[2, ]), gev_pars[3, ], 
  m = 12, alpha = weights, family = "gev")
rl_100_gev

## Nonstationary modeling with threshold exceedances
# Set the threshold exceedance rate
zeta <- 0.01
# and add a cyclic component to the data.frame
FCtmax$cyc <- as.integer(FCtmax$date) %% 365.25

# Specify cyclic smooths for the asymmetric Laplace
# distribution's (ALD) location and scale parameters
FC_fmla_ald <- list(tmax ~ s(cyc, bs = "cc", k = 15),  ~ s(cyc, bs = "cc"))
# and then fit the model with
FC_ald <- evgam(FC_fmla_ald, FCtmax, family = "ald", 
  ald.args = list(tau = 1 - zeta))

# Then use the ALD model to predict the varying
# threshold
FCtmax$threshold <- predict(FC_ald)$location
# and use it to define excesses
FCtmax$excess <- FCtmax$tmax - FCtmax$threshold
is.na(FCtmax$excess[FCtmax$excess <= 0]) <- TRUE

# Here's a little aside that plots the data for 2018
# and 2019 and superimposes the threshold estimate
use <- FCtmax$year %in% c("2018", "2019")
plot(FCtmax[use, c("date", "tmax")])
lines(FCtmax[use, c("date", "threshold")], col = "red")

# Then fit a GPD to the excesses with a cyclic smooth
# for the scale parameter and a constant shape parameter
FC_fmla_gpd <- list(excess ~ s(cyc, bs = "cc", k = 15), ~ 1)
FC_gpd <- evgam(FC_fmla_gpd, FCtmax, family = "gpd")

# Estimate the extremal index, based on the Ferro &
# Segers' moment-based estimator
theta <- extremal(!is.na(FCtmax$excess), FCtmax$date)

# Then put together a 50-point data.frame in order
# to estimate the distribution of the annual maximum
rl_df <- data.frame(cyc = seq(0, 365.25, l = 51)[-1])
rl_df$threshold <- predict(FC_ald, rl_df, type = "response")$location
rl_df[, c("psi", "xi")] <- predict(FC_gpd, rl_df, type = "response")
# and use this to estimate the 100-year return level
rl_100_gpd <- qev(0.99, rl_df$threshold, rl_df$psi, rl_df$xi, m = 365.25, 
  theta = theta, family = "gpd", tau = 1 - zeta)
rl_100_gpd

# 100-January return level
rl_df_jan <- data.frame(cyc = 1:31)
rl_df_jan$threshold <- predict(FC_ald, rl_df_jan, type = "response")$location
rl_df_jan[, c("psi", "xi")] <- predict(FC_gpd, rl_df_jan, type = "response")
# and use this to estimate the 100-January return level
rl_100_gpd_jan <- qev(0.99, rl_df_jan$threshold, rl_df_jan$psi, rl_df_jan$xi, m = 31, 
  theta = theta, family = "gpd", tau = 1 - zeta)
rl_100_gpd_jan

## Uncertainty estimation
# Fitted values and standard errors for GEV parameters for 
# the plotting data.frame, COprcp_plot
gev_pred <- predict(m_gev, COprcp_plot, type = "response", se.fit = TRUE)
head(gev_pred$se.fit)

# Fitted values and standard errors for the 0.95th and 0.99th
# quantile of the GEV distribution, i.e., 20- and 100-year
# return levels
gev_rl100_pred <- predict(m_gev, COprcp_plot, prob = c(0.95, 0.99), 
  se.fit = TRUE)
head(gev_rl100_pred$se.fit)

## Simulations of parameters from sampling distributions
## of stationary GEV model
# Simulate 5 sets of each GEV parameter for each row of COprcp_plot
set.seed(1)
gev_sim <- simulate(m_gev, nsim = 5, newdata = COprcp_plot, 
  type = "response")
lapply(gev_sim, head)

# Simulate 5 sets of the 0.99th quantile of the GEV
# distribution for each row of COprcp_plot
set.seed(2)
gev_rl_sim <- simulate(m_gev, nsim = 5, newdata = COprcp_plot, 
  prob = 0.99)
head(gev_rl_sim)

# Simulate 1e4 samples of 0.99th quantile of the 
# GEV distribution for Boulder, CO
set.seed(3)
gev_rl_boulder_sim <- simulate(m_gev, nsim = 1e4, 
  newdata = COprcp_meta[3, ], prob = 0.99)
# and then obtain and approximate 95% confidence interval
# based on the 0.025th and 0.975th quantiles of the sample
quantile(gev_rl_boulder_sim, c(0.025, 0.975))

## Simulations of parameters from sampling distributions
## of nonstationary GPD model
# Simulate 1e3 ALD and GPD parameters
set.seed(4)
FC_sim_ald <- simulate(FC_ald, newdata = rl_df, nsim = 1e3, 
  type = "response")
set.seed(5)
FC_sim_gpd <- simulate(FC_gpd, newdata = rl_df, nsim = 1e3, 
  type = "response")
# and then calculate 100-year return level estimate for
# each sample
rl_sim <- qev(0.99, FC_sim_ald[[1]], FC_sim_gpd[[1]], FC_sim_gpd[[2]], 
  m = 365.25, theta = theta, family = "gpd", tau = 1 - zeta)
# and then obtain and approximate 95% confidence interval
# based on the 0.025th and 0.975th quantiles of the sample
quantile(rl_sim, c(0.025, 0.975))

# > sessionInfo()
# R version 4.1.3 (2022-03-10)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Ubuntu 20.04.4 LTS
#
# Matrix products: default
# BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
# LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
# 
# locale:
#  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
#  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
#  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
#  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
#  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
# [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
#
# attached base packages:
# [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
# [1] evgam_0.1.5
#
# loaded via a namespace (and not attached):
# [1] compiler_4.1.3 Rcpp_1.0.7