########################################################### ## Section 3.2 ########################################################### ## Table 1 library("extremefit") set.seed(3110) n <- 1000 # Can be changed to a higher number to improve the value of the critical value NMC <- 5000 ## Biweight CriticalValue(NMC, n, Biweight.kernel, prob = 0.99) ## Epanechnikov CriticalValue(NMC, n, Epa.kernel, prob = 0.99) ## Rectangular CriticalValue(NMC, n, Rectangular.kernel, prob = 0.99) ## Triangular CriticalValue(NMC, n, Triang.kernel, prob = 0.99) ## Truncated gaussian sigma = 1/3 CriticalValue(NMC, n, TruncGauss.kernel, kpar = 1/3, prob = 0.99) ## Truncated gaussian sigma = 1 CriticalValue(NMC, n, TruncGauss.kernel, kpar = 1, prob = 0.99) ########################################################### library("extremefit") set.seed(3110) n <- 1000 NMC <- 5000 CriticalValue(NMC, n, TruncGauss.kernel, prob = 0.99, plot = TRUE) ########################################################### ## Section 4 set.seed(5) X <- abs(rcauchy(200)) n <- 100 HA <- hill.adapt(X) predict(HA, newdata = c(3, 5, 7), type = "survival")$p predict(HA, newdata = c(0.9, 0.99, 0.999, 0.9999), type = "quantile")$y set.seed(5) n <- 50000; tau <- 3 theta <- function(t) { 0.5 + 0.25 * sin(2 * pi * t) } Ti <- 1:n / n; Theta <- theta(Ti) X <- rparetoCP(n, a0 = 1 / (Theta * 2), a1 = 1 / Theta, x1 = tau) a <- 0.005; b <- 0.05; Mh <- 20 hl <- bandwidth.grid(a, b, Mh, type = "geometric") Tgrid <- seq(0, 1, 0.02) Hcv <- bandwidth.CV(X, Ti, Tgrid, hl, pcv = 0.99, kernel = TruncGauss.kernel, CritVal = 3.4, plot = FALSE) Hcv$h.cv Tgrid <- seq(0, 1, 0.01) hillTs <- hill.ts(X, Ti, Tgrid, h = Hcv$h.cv, kernel = TruncGauss.kernel, CritVal = 3.4) plot(Tgrid, hillTs$Theta) lines(Ti, Theta, col = "red") p <- c(0.99, 0.999) PredQuant <- predict(hillTs, newdata = p, type = "quantile") TrueQuant <- matrix(0, ncol = 2, nrow = n) for (i in 1:length(p)) { TrueQuant[, i] <- qparetoCP(p[i], a0 = 1 / (Theta * 2), a1 = 1 / Theta, x1 = tau) } plot(Tgrid, log(as.numeric(PredQuant$y[1, ])), ylim = c(1.5, 6.3), ylab = "estimated log-quantiles") points(Tgrid, log(as.numeric(PredQuant$y[2, ])), pch = "+") lines(Ti, log(TrueQuant[, 1])) lines(Ti, log(TrueQuant[, 2]), col = "red") x <- c(20, 30) PredSurv <- predict(hillTs, newdata = x, type = "survival") TrueSurv <- matrix(0, ncol = 2, nrow = n) for (i in 1:length(x)) { TrueSurv[, i] <- 1 - pparetoCP(x[i], a0 = 1 / (Theta * 2), a1 = 1 / Theta, x1 = tau) } plot(Tgrid, as.numeric(PredSurv$p[1, ]), ylab = "estimated survival probabilities") points(Tgrid, as.numeric(PredSurv$p[2, ]), pch = "+") lines(Ti, TrueSurv[, 1]) lines(Ti, TrueSurv[, 2], col = "red") ########################################################### ## Figure 5 (Careful of the computation time. We fixed the value of the bandwidth h as it would take too much time) NMC <- 1000 Tgrid <- seq(0, 1, 0.05) PredQuant <- matrix(0, ncol = NMC, nrow = length(Tgrid)) for (nmc in 1:NMC) { n <- 50000; tau <- 3 theta <- function(t) { 0.5 + 0.25 * sin(2 * pi * t) } Ti <- 1:n / n; Theta <- theta(Ti) X <- rparetoCP(n, a0 = 1 / (Theta * 2), a1 = 1 / Theta, x1 = tau) a <- 0.005; b <- 0.05; Mh <- 20 hl <- bandwidth.grid(a, b, Mh, type = "geometric") hillTs <- hill.ts(X, Ti, Tgrid, h = 0.02727797, kernel = TruncGauss.kernel, CritVal = 3.4) p <- 0.99 PredQuant[, nmc] <- as.numeric(predict(hillTs, newdata = p, type = "quantile")$y) } TrueQuant <- matrix(0, ncol = 2, nrow = n) for (i in 1:length(p)) { TrueQuant[, i] <- qparetoCP(p[i], a0 = 1 / (Theta * 2), a1 = 1 / Theta, x1 = tau) } boxplot(log(t(PredQuant)), ylim = c(1.5, 4.5), ylab = "log(0.99-quantiles)", names = Tgrid, xlab = "t") lines((Ti*20)+1, log(TrueQuant[, 1]), col = "red") ########################################################### ## Section 5.1 data("dataWind", package = "extremefit") attach(dataWind) pred <- vector("numeric", 12) for (m in 1:12) { indices <- which(Month == m) X <- Speed[indices] * 60 * 60 / 1000 H <- hill.adapt(X) pred[m] <- predict(H, newdata = 100, type = "survival")$p } plot(pred, ylab = "Estimated survival probability", xlab = "Month") ########################################################### ## Section 5.2 data("dataOyster", package = "extremefit") Velocity <- dataOyster$data[, 3] time <- dataOyster$data[, 1] plot(time, Velocity, type = "l", xlab = "time (hour)", ylab = "Velocity (mm/s)") new.Tgrid <- dataOyster$Tgrid X <- Velocity + (-min(Velocity)) ## Finding the cross validation value of the bandwidth (can differ ## from the fixed value) hgrid <- bandwidth.grid(0.05, 0.5, 50, type = "geometric") H <- bandwidth.CV(X, time, new.Tgrid, hgrid, TruncGauss.kernel, kpar = c(sigma = 1), pcv = 0.99, CritVal = 3.4, plot = FALSE) H$h.cv hcv <- 0.3 TS.Oyster <- hill.ts(X, t = time, new.Tgrid, h = hcv, TruncGauss.kernel, CritVal = 3.4) pred.quant.Oyster <- predict(TS.Oyster, newdata = 0.999, type = "quantile") plot(time, Velocity, type = "l", ylim = c(-0.5, 1), xlab ="Time (hour)", ylab = "Velocity (mm/s)") quant0.999 <- rep(0, length(seq(0, 24, 0.05))) quant0.999[match(new.Tgrid, seq(0, 24, 0.05))] <- as.numeric(pred.quant.Oyster$y) - (-min(Velocity)) lines(seq(0, 24, 0.05), quant0.999, col = "red") ########################################################### ## Section 5.3 data("LoadCurve", package = "extremefit") X <- LoadCurve$data$Value Date <- as.POSIXct(LoadCurve$data$Time * 86400, origin = "1970-01-01") days <- LoadCurve$data$Time plot(Date, LoadCurve$data$Value / 1000, type = "l", ylab = "Electric consumption (kVA)", xlab = "Date") Tgrid <- seq(min(days), max(days), length = 400) new.Tgrid <- LoadCurve$Tgrid ## Finding the value of the bandwidth h by cross validation (can ## differ from the fixed value) hgrid <- bandwidth.grid(0.8, 5, 60) hcv <- bandwidth.CV(X, days, new.Tgrid, hgrid, pcv = 0.99, kernel = Gaussian.kernel, CritVal = 3.4, plot = FALSE) hcv$h.cv HH <- hill.ts(LoadCurve$data$Value, LoadCurve$data$Time, new.Tgrid, h = 3.78, kernel = TruncGauss.kernel, CritVal = 3.4) Quant <- rep(NA, length(Tgrid)) Quant[match(new.Tgrid, Tgrid)] <- as.numeric(predict(HH, newdata = 0.99, type = "quantile")$y) plot(Date, LoadCurve$data$Value/1000, ylim = c(0, 8), type = "l", ylab = "Electric consumption (kVA)", xlab = "Time") lines(as.POSIXlt((Tgrid) * 86400, origin = "1970-01-01", tz = "Europe/Paris"), Quant / 1000, col = "red") ########################################################### ## Figure 11 transform <- function(x) { as.double((as.POSIXct(x, format = "%Y-%m-%d", tx = "Europe/Paris"))) / 86400 } ## Finding the point on the grid corresponding to the time of reducted power ec1 <- max(which(trunc(Tgrid) == trunc(transform("2016-01-11")) + 1)) ec2 <- max(which(trunc(Tgrid) == trunc(transform("2016-01-13")) + 1)) ec3 <- max(which(trunc(Tgrid) == trunc(transform("2016-01-18")) + 1)) ec4 <- max(which(trunc(Tgrid) == trunc(transform("2016-02-25")) + 1)) ec5 <- max(which(trunc(Tgrid) == trunc(transform("2016-03-01")) + 1)) ec6 <- max(which(trunc(Tgrid) == trunc(transform("2016-03-07")) + 1)) ec7 <- min(which(trunc(Tgrid) == trunc(transform("2016-03-18")) + 1)) ## Finding the point on the new grid corresponding to the time of reducted power ecn1 <- max(which(trunc(new.Tgrid) == trunc(transform("2016-01-11")) + 1)) ecn2 <- max(which(trunc(new.Tgrid) == trunc(transform("2016-01-13")) + 1)) ecn3 <- max(which(trunc(new.Tgrid) == trunc(transform("2016-01-18")) + 1)) ecn4 <- max(which(trunc(new.Tgrid) == trunc(transform("2016-02-25")) + 1)) ecn5 <- max(which(trunc(new.Tgrid) == trunc(transform("2016-03-01")) + 1)) ecn6 <- max(which(trunc(new.Tgrid) == trunc(transform("2016-03-07")) + 1)) ecn7 <- min(which(trunc(new.Tgrid) == trunc(transform("2016-03-18")) + 1)) Surv <- rep(0, length(Tgrid)) Surv <- as.numeric(predict(HH, newdata = 9*1000, type = "survival")$p) Surv[ec1] <- as.numeric(predict(HH, newdata = 6.3*1000, type = "survival")$p)[ecn1] Surv[c(ec2, ec3, ec4)] <- as.numeric(predict(HH, newdata = 4.5*1000, type = "survival")$p)[c(ecn2, ecn3, ecn4)] Surv[ec5] <- as.numeric(predict(HH, newdata = 3.6*1000, type = "survival")$p)[ecn5] Surv[c(ec6, ec7)] <- as.numeric(predict(HH, newdata = 2.7*1000, type = "survival")$p)[c(ecn6, ecn7)] plot(as.POSIXlt((LoadCurve$Tgrid)*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv, type = "l", ylab = "Survival probability", xlab = "Time") points(as.POSIXlt((LoadCurve$Tgrid[ec1])*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv[ec1], pch = "+", col = "blue") points(as.POSIXlt((LoadCurve$Tgrid[ec2])*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv[ec2], pch = "+", col = "blue") points(as.POSIXlt((LoadCurve$Tgrid[ec3])*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv[ec3], pch = "+", col = "blue") points(as.POSIXlt((LoadCurve$Tgrid[ec4])*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv[ec4], pch = "+", col = "blue") points(as.POSIXlt((LoadCurve$Tgrid[ec5])*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv[ec5], pch = "+", col = "blue") points(as.POSIXlt((LoadCurve$Tgrid[ec6])*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv[ec6], pch = "+", col = "blue") points(as.POSIXlt((LoadCurve$Tgrid[ec7])*86400, origin = "1970-01-01", tz = "Europe/Paris"), Surv[ec7], pch = "+", col = "blue")