## Note that in particular the simulation results may differ with ## linear algebra packages used. ## The sessionInfo() output for the results reported in the manuscript ## is provided commented out at the end of the script. ## load evgam library("evgam") ## COprcp data # The data can be loaded and conjoined with the metadata using data("COprcp", package = "evgam") COprcp <- cbind(COprcp, COprcp_meta[COprcp$meta_row, ]) # A plot of gridded elevations can be obtained with brks <- pretty(COelev$z, 50) cols <- hcl.colors(length(brks) - 1, "YlOrRd", rev = TRUE) image(COelev, breaks = brks, col = cols) # and station elevations can be superimposed # using 'colplot()' with colplot(COprcp_meta$lon, COprcp_meta$lat, COprcp_meta$elev, breaks = brks, add = TRUE) # Before fitting any models, a data.frame for plotting # called COprcp_plot is created using COprcp_plot <- expand.grid(lon = COelev$x, lat = COelev$y) COprcp_plot$elev <- as.vector(COelev$z) ## GEV model # Create a data.frame comprising annual maxima at each # station, which can be done with the following since # date is of class "Date" COprcp$year <- format(COprcp$date, "%Y") COprcp_gev <- aggregate(prcp ~ year + meta_row, COprcp, max) # Then add the metadata COprcp_gev <- cbind(COprcp_gev, COprcp_meta[COprcp_gev$meta_row, ]) # Set the formulae for the smooths for all GEV # parameters with fmla_gev <- list(prcp ~ s(lon, lat, k = 30) + s(elev, bs = "cr"), ~ s(lon, lat, k = 20), ~ 1) # and then the model can be fitted with m_gev <- evgam(fmla_gev, COprcp_gev, family = "gev") # summarised with summary(m_gev) # and plotted with plot(m_gev) # Predictions based on the gridded elevation data # can be obtained and the first few and viewed with gev_pred <- predict(m_gev, COprcp_plot, type = "response") head(gev_pred) # Each estimated GEV parameter can be plotted with # image() using for (i in 1:2) { plot.list <- COelev plot.list$z[] <- gev_pred[, i] image(plot.list, asp = 1) title(paste("GEV", names(gev_pred)[i])) } # Then 100-year return level estimates can be # obtained with gev_rl100 <- predict(m_gev, COprcp_plot, prob = 0.99) head(gev_rl100) # Finally a map of the 100-year return level # estimate can be obtained with rl100 <- COelev rl100$z[] <- gev_rl100[, 1] image(rl100, asp = 1) title("100-year return level") ## GPD model # Set the threshold that defines an exceedance threshold <- 11.4 # and then calculate the threshold excesses COprcp$excess <- COprcp$prcp - threshold is.na(COprcp$excess[COprcp$excess <= 0]) <- TRUE # Set the formulae for the smooths of the two GPD # parameters with fmla_gpd <- list(excess ~ s(lon, lat, k = 20) + s(elev, bs = "cr"), ~ s(lon, lat, k = 15)) # and then the model can be fitted with m_gpd <- evgam(fmla_gpd, COprcp, family = "gpd") ## Poisson-GPD model # Specify the point process model arguments for the # numerical integration pp_args <- list(id = "id", ny = 30, r = 45) # and then the model can be fitted with m_pp <- evgam(fmla_gev, COprcp, family = "pp", pp.args = pp_args) ## Censored data # Set the coarseness of the data delta <- 2.5 # and the set the censoring interval's lower limit COprcp$lo <- pmax(COprcp$excess - delta, 1e-6) # and upper limit COprcp$hi <- COprcp$excess + delta # Specify the GPD model's smooths, in particular with # a tensor product to capture spatial variation fmla_gpd_cens <- list(cens(lo, hi) ~ te(lon, lat, k = c(6, 8)) + s(elev, bs = "cr"), ~ te(lon, lat, k = c(6, 8))) # and then fit the model m_gpd_cens <- evgam(fmla_gpd_cens, COprcp, family = "gpd") # and then plot it plot(m_gpd_cens) ## Nonstationary modeling with monthly maxima # Load the Fort Collins daily maximum tempereatures data("FCtmax", package = "evgam") # then add years and months of data to the data.frame FCtmax$year <- format(FCtmax$date, "%Y") FCtmax$month <- format(FCtmax$date, "%m") # and then calculate monthly maxima FCtmax_mnmax <- aggregate(tmax ~ year + month, FCtmax, max) # Split the data.frame into separate monthly # data.frames FCtmax_mn <- split(FCtmax_mnmax, FCtmax_mnmax$month) # and the fit a homogenous GEV model, specified with fmla_simple <- list(tmax ~ 1, ~ 1, ~ 1) # to each data.frame with gev_fits <- lapply(FCtmax_mn, evgam, formula = fmla_simple, family = "gev") # Each model's coefficients can be extracted with gev_pars <- sapply(gev_fits, coef) # Set the weights for each model, according to days per month weights <- (1/365.25) * c(31, 28.25, 30)[c(1, 2, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1)] # Then calculate the 100-year return level, using # formula (3) through qev() rl_100_gev <- qev(0.99, gev_pars[1, ], exp(gev_pars[2, ]), gev_pars[3, ], m = 12, alpha = weights, family = "gev") rl_100_gev ## Nonstationary modeling with threshold exceedances # Set the threshold exceedance rate zeta <- 0.01 # and add a cyclic component to the data.frame FCtmax$cyc <- as.integer(FCtmax$date) %% 365.25 # Specify cyclic smooths for the asymmetric Laplace # distribution's (ALD) location and scale parameters FC_fmla_ald <- list(tmax ~ s(cyc, bs = "cc", k = 15), ~ s(cyc, bs = "cc")) # and then fit the model with FC_ald <- evgam(FC_fmla_ald, FCtmax, family = "ald", ald.args = list(tau = 1 - zeta)) # Then use the ALD model to predict the varying # threshold FCtmax$threshold <- predict(FC_ald)$location # and use it to define excesses FCtmax$excess <- FCtmax$tmax - FCtmax$threshold is.na(FCtmax$excess[FCtmax$excess <= 0]) <- TRUE # Here's a little aside that plots the data for 2018 # and 2019 and superimposes the threshold estimate use <- FCtmax$year %in% c("2018", "2019") plot(FCtmax[use, c("date", "tmax")]) lines(FCtmax[use, c("date", "threshold")], col = "red") # Then fit a GPD to the excesses with a cyclic smooth # for the scale parameter and a constant shape parameter FC_fmla_gpd <- list(excess ~ s(cyc, bs = "cc", k = 15), ~ 1) FC_gpd <- evgam(FC_fmla_gpd, FCtmax, family = "gpd") # Estimate the extremal index, based on the Ferro & # Segers' moment-based estimator theta <- extremal(!is.na(FCtmax$excess), FCtmax$date) # Then put together a 50-point data.frame in order # to estimate the distribution of the annual maximum rl_df <- data.frame(cyc = seq(0, 365.25, l = 51)[-1]) rl_df$threshold <- predict(FC_ald, rl_df, type = "response")$location rl_df[, c("psi", "xi")] <- predict(FC_gpd, rl_df, type = "response") # and use this to estimate the 100-year return level rl_100_gpd <- qev(0.99, rl_df$threshold, rl_df$psi, rl_df$xi, m = 365.25, theta = theta, family = "gpd", tau = 1 - zeta) rl_100_gpd # 100-January return level rl_df_jan <- data.frame(cyc = 1:31) rl_df_jan$threshold <- predict(FC_ald, rl_df_jan, type = "response")$location rl_df_jan[, c("psi", "xi")] <- predict(FC_gpd, rl_df_jan, type = "response") # and use this to estimate the 100-January return level rl_100_gpd_jan <- qev(0.99, rl_df_jan$threshold, rl_df_jan$psi, rl_df_jan$xi, m = 31, theta = theta, family = "gpd", tau = 1 - zeta) rl_100_gpd_jan ## Uncertainty estimation # Fitted values and standard errors for GEV parameters for # the plotting data.frame, COprcp_plot gev_pred <- predict(m_gev, COprcp_plot, type = "response", se.fit = TRUE) head(gev_pred$se.fit) # Fitted values and standard errors for the 0.95th and 0.99th # quantile of the GEV distribution, i.e., 20- and 100-year # return levels gev_rl100_pred <- predict(m_gev, COprcp_plot, prob = c(0.95, 0.99), se.fit = TRUE) head(gev_rl100_pred$se.fit) ## Simulations of parameters from sampling distributions ## of stationary GEV model # Simulate 5 sets of each GEV parameter for each row of COprcp_plot set.seed(1) gev_sim <- simulate(m_gev, nsim = 5, newdata = COprcp_plot, type = "response") lapply(gev_sim, head) # Simulate 5 sets of the 0.99th quantile of the GEV # distribution for each row of COprcp_plot set.seed(2) gev_rl_sim <- simulate(m_gev, nsim = 5, newdata = COprcp_plot, prob = 0.99) head(gev_rl_sim) # Simulate 1e4 samples of 0.99th quantile of the # GEV distribution for Boulder, CO set.seed(3) gev_rl_boulder_sim <- simulate(m_gev, nsim = 1e4, newdata = COprcp_meta[3, ], prob = 0.99) # and then obtain and approximate 95% confidence interval # based on the 0.025th and 0.975th quantiles of the sample quantile(gev_rl_boulder_sim, c(0.025, 0.975)) ## Simulations of parameters from sampling distributions ## of nonstationary GPD model # Simulate 1e3 ALD and GPD parameters set.seed(4) FC_sim_ald <- simulate(FC_ald, newdata = rl_df, nsim = 1e3, type = "response") set.seed(5) FC_sim_gpd <- simulate(FC_gpd, newdata = rl_df, nsim = 1e3, type = "response") # and then calculate 100-year return level estimate for # each sample rl_sim <- qev(0.99, FC_sim_ald[[1]], FC_sim_gpd[[1]], FC_sim_gpd[[2]], m = 365.25, theta = theta, family = "gpd", tau = 1 - zeta) # and then obtain and approximate 95% confidence interval # based on the 0.025th and 0.975th quantiles of the sample quantile(rl_sim, c(0.025, 0.975)) # > sessionInfo() # R version 4.1.3 (2022-03-10) # Platform: x86_64-pc-linux-gnu (64-bit) # Running under: Ubuntu 20.04.4 LTS # # Matrix products: default # BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 # LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3 # # locale: # [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C # [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8 # [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8 # [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C # [9] LC_ADDRESS=C LC_TELEPHONE=C # [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C # # attached base packages: # [1] stats graphics grDevices utils datasets methods base # # other attached packages: # [1] evgam_0.1.5 # # loaded via a namespace (and not attached): # [1] compiler_4.1.3 Rcpp_1.0.7