## Multivariate Normal Variance Mixtures in R: The R Package 'nvmix' ## Erik Hintz, Marius Hofert, Christiane Lemieux ## R package 'nvmix' (Version 0.0-6) if (!require("nvmix")) { install.packages("nvmix") library("nvmix") } library("RColorBrewer") set.seed(1) # reproducibility doPDF <- TRUE ### 1. Introduction ############################################################ ## Example calls 'rnvmix()', 'pnvmix()', 'dnvmix()', 'fitnvmix()' d <- 3 # dimension of the distribution scale <- diag(d) # scale matrix loc <- rep(0, d) # location vector n <- 200 # sample size for sampling df <- 4.1 # dof parameter x <- 1:d # evaluation point for the density ## Specify 'qmix' as the quantile function of an IG(df/2, df/2) distribution ## => Resulting mixture is multivariate t qmix <- function(u, df) 1 / qgamma(1-u, shape = df/2, rate = df/2) ## Sample 'n' realizations from the multivariate t rt <- rnvmix(n, qmix = qmix, loc = loc, scale = scale, df = df) ## Evaluate distribution- and density function at 'x' pt <- pnvmix(x, qmix = qmix, loc = loc, scale = scale, df = df) dt <- dnvmix(x, qmix = qmix, loc = loc, scale = scale, df = df) ## Given sample 'rt', estimate the parameters ( ~30sec) system.time(fit_t <- fitnvmix(rt, qmix = qmix, mix.param.bounds = c(0.5, 10))) ## Aside: ## The function 'fitStudent()' which uses the closed formula for the ## t density gives a similar result: fit_t2 <- fitStudent(rt) stopifnot(all.equal(fit_t\$nu, fit_t2\$df, tol = 1e-2), all.equal(fit_t\$scale, fit_t2\$scale, tol = 1e-2)) ### 3. The underlying algorithms ############################################ ## The following code reproduces Figure 1, which displays the integrand h ## for various Mahalanobis distances df <- 2.2 qmix <- function(u, df) 1 / qgamma(1-u, shape = df/2, rate = df/2) d <- 10 h <- function(u, mahasq){ mixings <- qmix(u, df = df) (2*pi*mixings)^(-d/2)*exp(-mahasq/(2*mixings)) } h. <- function(u) h(u, mahasq = 12) if (doPDF) pdf(file = "fig_h_maha12.pdf", width = 6, height = 6) plot(h., from = 0, to = 1, xlab = expression(u), ylab = expression(h(u))) mtext(expression(paste((bold(x)-bold(mu))^T, Sigma^-1, (bold(x)-bold(mu)), " = 12"))) if (doPDF) dev.off() h. <- function(u) h(u, mahasq = 120) if (doPDF) pdf(file = "fig_h_maha120.pdf", width = 6, height = 6) plot(h., from = 0, to = 1, xlab = expression(u), ylab = expression(h(u))) mtext(expression(paste((bold(x)-bold(mu))^T, Sigma^-1, (bold(x)-bold(mu)), " = 120"))) if (doPDF) dev.off() h. <- function(u) h(u, mahasq = 1200) if (doPDF) pdf(file = "fig_h_maha1200.pdf", width = 6, height = 6) plot(h., from = 0, to = 1, xlab = expression(u), ylab = expression(h(u))) mtext(expression(paste((bold(x)-bold(mu))^T, Sigma^-1, (bold(x)-bold(mu)), " = 1200"))) if (doPDF) dev.off() ### 4. Usage of the R package nvmix ############################################ ## 4.1. Sampling normal variance mixtures using rnvmix() ####################### ## Example 1: Sample from a normal variance mixture where W ~ Exp(1) rate <- 1 n <- 500 set.seed(42) ## Method 1: Specify 'rmix' (RVG for W) as a list r.exp.1 <- rnvmix(n, rmix = list("exp", rate = rate)) set.seed(42) ## Method 2: Specify 'rmix' (RVG for W) as a function r.exp.2 <- rnvmix(n, rmix = function(n) rexp(n, rate = rate)) stopifnot(all.equal(r.exp.1, r.exp.2)) ## Example 2: Sample from a normal variance mixture where W ~ Stable(alpha, 1) library("copula") # for 'rstable1()'(= random variate generator for the stable) scale <- matrix(c(1, 0.5, 0.5, 1), ncol = 2) set.seed(42) r.stable.heavy <- rnvmix(n, rmix = rstable1, scale = scale, alpha = 0.5, beta = 1) r.stable.light <- rnvmix(n, rmix = rstable1, scale = scale, alpha = 0.9, beta = 1) ## Plot if (doPDF) pdf(file = "fig_rstable_heavy.pdf", width = 6, height = 6) plot(r.stable.heavy, xlab = expression(X[1]), ylab = expression(X[2]), pch = "*") if (doPDF) dev.off() if (doPDF) pdf(file = "fig_rstable_light.pdf", width = 6, height = 6) plot(r.stable.light, xlab = expression(X[1]), ylab = expression(X[2]), pch = "*") if (doPDF) dev.off() ## Example 3: 3-point-mixture x <- c(1, 3, 5) # support p <- c(0.2, 0.3, 0.5) # probability mass function at 'x' qmix <- function(u) (u <= p[1]) * x[1] + (u > p[1] & u <= p[1]+p[2]) * x[2] + (u > p[1]+p[2]) * x[3] r.3pm.pseudo <- rnvmix(n, qmix = qmix) r.3pm.ghalton <- rnvmix(n, qmix = qmix, method = "ghalton") # quasi-random sample r.3pm.sobol <- rnvmix(n, qmix = qmix, method = "sobol") # quasi-random sample ## Plot (reproduces Figure 3) ran <- range(r.3pm.pseudo, r.3pm.ghalton, r.3pm.sobol) if (doPDF) pdf(file = "fig_r3pm_pseudo.pdf", width = 5, height = 5) plot(r.3pm.pseudo, xlab = expression(X[1]), ylab = expression(X[2]), xlim = ran, ylim = ran, pch = "*") if (doPDF) dev.off() if (doPDF) pdf(file = "fig_r3pm_ghalton.pdf", width = 5, height = 5) plot(r.3pm.ghalton, xlab = expression(X[1]), ylab = expression(X[2]), xlim = ran, ylim = ran, pch = "*") if (doPDF) dev.off() if (doPDF) pdf(file = "fig_r3pm_sobol.pdf", width = 5, height = 5) plot(r.3pm.sobol, xlab = expression(X[1]), ylab = expression(X[2]), xlim = ran, ylim = ran, pch = "*") if (doPDF) dev.off() ## 4.2. Estimating probabilities with pnvmix() ################################# ## Example 1: W ~ Exp(1) ## Evaluate P(a <= X <= b) a <- -(1:3) b <- 3:1 scale <- matrix(c(2, 0.5, 0, 0.5, 2, 1, 0, 1, 2), ncol = 3) rate <- 1 set.seed(42) ## Method 1: Specify 'qmix' (quantile function of W) as list p1 <- pnvmix(b, lower = a, qmix = list("exp", rate = rate), scale = scale) set.seed(42) ## Method 2: Specify 'qmix' as a function p2 <- pnvmix(b, lower = a, qmix = function(u, lambda) -log(1-u)/lambda, scale = scale , lambda = rate) stopifnot(all.equal(p1, p2)) str(p1) ## Default absolute error tolerance is 1e-3; can be changed by the user: pnvmix(b, lower = a, qmix = function(u, lambda) -log(1-u)/lambda, lambda = rate, scale = scale, control = list(pnvmix.abstol = 1e-5)) ## Example 2: W constant, so X multivariate normal ## For the important cases of a multivariate normal or t distribution, one can ## use the wrappers pNorm() and pStudent() A <- matrix(c(1, 0, 0, 0, 2, 1, 0, 0, 3, 0, 0, 0, 4, 1, 0, 1), ncol = 4, byrow = TRUE) scale <- A %*% t(A) # *singular* scale! upper <- 2:5 df <- 1.5 pn <- pNorm(upper, scale = scale) ## 4.3. Estimating the (log-)density with dnvmix() ############################# ## Example: Evaluate density of 20-dimensional t distribution ## Note that the density is *known* in this case so that the estimated and ## true density values can be compared set.seed(271) d <- 20 df <- 3.9 n <- 2000 x <- rnvmix(n, qmix = "inverse.gamma", df = df/3, scale = diag(d)) dt.1 <- dnvmix(x, qmix = "inverse.gamma", df = df, log = TRUE) # true density dt.2 <- dnvmix(x, qmix = function(u, df) 1/qgamma(1-u, shape = df/2, rate = df/2), df = df, log = TRUE) # estimated density, adaptive ## Estimate density without adaptive procedure: dt.3 <- dnvmix(x, qmix = function(u, df) 1/qgamma(1-u, shape = df/2, rate = df/2), df = df, control = list(dnvmix.doAdapt = FALSE), log = TRUE) ## Prepare plot m <- sqrt(mahalanobis(x, center = rep(0, d), cov = diag(d))) order.m <- order(m) m <- m[order.m] # sorted for plotting dt.1 <- dt.1[order.m] dt.2 <- dt.2[order.m] dt.3 <- dt.3[order.m] pal <- colorRampPalette(c("#000000", brewer.pal(8, name = "Dark2")[c(7, 3, 5)])) cols <- pal(3) lwds <- c(1, 1.4, 1.8, 1.4, 1.4) ## Plot (reproduces Figure 4) if (doPDF) pdf(file = "fig_dnvmix.pdf", width = 6, height = 6) plot(m, dt.1, type = 'p', pch = 3, col = cols[1], xlab = expression(paste("Mahalanobis distance ", D, "(", bold(x), "; ", bold(mu), ", ", Sigma, ")")), ylab = "log-density") lines(m, dt.2, col = cols[2], lty = 2, lwd = lwds[2]) lines(m, dt.3, col = cols[3], lty = 3, lwd = lwds[3]) legend("topright", c("True log-density", "Estimated log-density (adaptive)", "Estimated log-density (non-adaptive)"), lty = c(NA, 2, 3), pch = c(3, NA, NA), lwd = c(NA, lwds[2:3]), col = cols, bty = 'n') if (doPDF) dev.off() ## 4.4. Fitting normal variance mixtures with fitnvmix() ####################### ## Example 1: W ~ Pareto(1.5, 1) set.seed(42) d <- 4 n <- 100 nu. <- 1.5 scale <- cov2cor(tcrossprod(matrix(runif(d * d), ncol = d))) x <- rnvmix(n, qmix = "pareto", alpha = nu., scale = scale) m.p.b <- c(0.1, 50) ## Estimate parameters using *analytical* densities/weights (known here) system.time(fit.par1 <- fitnvmix(x, qmix = "pareto", mix.param.bounds = m.p.b)) ## Estimate parameters using *estimated* densities/weights (known here) system.time(fit.par2 <- fitnvmix(x, qmix = function(u, nu) (1-u)^(-1/nu), mix.param.bounds = m.p.b)) fit.par1 fit.par2 ## Create qq-plot of empirical versus theoretical quantiles of the maha-distances set.seed(1) qq.par <- qqplot_maha(x, qmix = "pareto", alpha = fit.par1\$nu, loc = fit.par1\$loc, scale = fit.par1\$scale, plot = FALSE) ## Plot (reproduces Figure 5) if (doPDF) pdf(file = "fig_qq_parmix.pdf", width = 6, height = 6) plot(qq.par) # use method 'plot()' of the class 'qqplot_maha' if (doPDF) dev.off() ## Now on log-log-scale if (doPDF) pdf(file = "fig_qq_parmix_log.pdf", width = 6, height = 6) plot(qq.par, plot.pars = list(log = "xy")) # log-log scale if (doPDF) dev.off() qq.par ## Example 2: W ~ InverseBurr(2, 5) (~3 min) ## Note: The distribution of W has two parameters here qmix <- function(u, nu) (u^(-1/nu[2])-1)^(-1/nu[1]) # quantile function set.seed(274) nu <- c(2, 5) d <- 5 n <- 500 scale <-cov2cor(tcrossprod(matrix(runif(d*d), ncol = d))) m.p.b <- matrix(c(0.1, 0.1, 8, 8), ncol = 2) x <- rnvmix(n, qmix = qmix, nu = nu, scale = scale) system.time(fit.burr <- fitnvmix(x, qmix = qmix, mix.param.bounds = m.p.b)) fit.burr ## Create qq-plot (reproduces Figure 6) set.seed(1) system.time(qq.burr <- qqplot_maha(fitnvmix_object = fit.burr, plot = FALSE)) if (doPDF) pdf(file = "fig_qq_burrmix.pdf", width = 6, height = 6) plot(qq.burr) if (doPDF) dev.off() ## Now on log-log-scale if (doPDF) pdf(file = "fig_qq_burrmix_log.pdf", width = 6, height = 6) plot(qq.burr, plot.pars = list(log = "xy")) if (doPDF) dev.off() ### 5. Example application ##################################################### library("qrmtools") # for returns() library("rugarch") # for fit.ARMA.GARCH() set.seed(123) # reproducibility ## Load negative return data of the REITs in the SP500 index data("SP500_const", package = "qrmdata") time <- c("2010-01-01", "2012-12-31") x <- SP500_const[paste0(time, collapse = "/"), SP500_const_info\$Subsector == "REITs"] X <- -returns(x) ## deGARCHing uspec <- rep(list(ugarchspec(distribution.model = "std")), ncol(X)) fit.ARMA.GARCH <- fit_ARMA_GARCH(X, ugarchspec.list = uspec, verbose = FALSE) fits <- fit.ARMA.GARCH\$fit resi <- lapply(fits, residuals, standardize = TRUE) X <- as.matrix(do.call(merge, resi)) colnames(X) <- colnames(x) n <- nrow(X) # sample size ## Specify various normal variance mixture distributions along with bounds ## on the mixing parameter(s) qmix_ <- list(constant = "constant", inverse.gamma = "inverse.gamma", inverse.burr = function(u, nu) (u^(-1/nu[2])-1)^(-1/nu[1]), pareto = "pareto") m.p.b_ <- list(constant = c(0, 1e8), # irrelevant inverse.gamma = c(1, 8), inverse.burr = matrix(c(0.1, 0.1, 8, 8), ncol = 2), pareto = c(1, 8)) ## Call 'fitnvmix()' to estimate 'loc', 'scale' and the mixing parameter(s) ## for each mixture separately (~6 min) system.time(fit.results <- lapply(1:4, function(i) fitnvmix(X, qmix = qmix_[[i]], mix.param.bounds = m.p.b_[[i]]))) ## Evaluate the density of the fitted normal variance mixtures at the (fitted) ## Mahalanobis distances l.dens <- matrix(NA_real_, ncol = 4, nrow = n) mahas <- matrix(NA_real_, ncol = 4, nrow = n) for(i in 1:4){ mahas[, i] <- sqrt(mahalanobis(X, center = fit.results[[i]]\$loc, cov = fit.results[[i]]\$scale)) order.maha <- order(mahas[, i]) mahas[, i] <- mahas[order.maha, i] # sorted for plotting l.dens[, i] <- dnvmix(X[order.maha, ], qmix = qmix_[[i]], loc = fit.results[[i]]\$loc, scale = fit.results[[i]]\$scale, nu = fit.results[[i]]\$nu, log = TRUE) } ## Plot names <- c("Multiv. normal", "Inverse-gamma mixture", "Inverse-Burr mixture", "Pareto mixture") pal <- colorRampPalette(c("#000000", brewer.pal(8, name = "Dark2")[c(7, 3, 5)])) cols <- pal(4) # colors ## Reproduce Figure 7 if (doPDF) pdf(file = "fig_REIT_logdens.pdf", width = 6, height = 6) plot(NA, xlim = range(mahas), ylim = range(l.dens), xlab = expression(paste("Mahalanobis distance ", D, "(", bold(x), "; ", bold(mu), ", ", Sigma, ")")), ylab = 'log-density') for(i in 1:4){ lines(mahas[, i], l.dens[, i], lty = i, type = 'b', pch = i, col = cols[i]) } legend("topright", names, lty = 1:4, pch = 1:4, col = cols, bty = 'n') if (doPDF) dev.off() ## Create qq-plots for the different models (~ 8min) system.time(qq.results <- lapply(1:4, function(i) qqplot_maha(X, qmix = qmix_[[i]], loc = fit.results[[i]]\$loc, scale = fit.results[[i]]\$scale, nu = fit.results[[i]]\$nu))) ## Plot (reproduces Figure 8) for(i in 1:4){ if (doPDF) pdf(file = paste0("fig_REIT_qq_", i, ".pdf"), width = 6, height = 6) plot(qq.results[[i]]) if (doPDF) dev.off() } ## Compute tail probabilites P(X_1 > F_1^-(u),..., X_d > F_d^-(u)) ## for the different models n. <- 50 u. <- seq(0.95, to = 0.999, length.out = n.) u.matrix <- matrix(u., nrow = n., ncol = ncol(X)) set.seed(2) system.time(tailprobs <- sapply(1:4, function(i) pnvmixcopula( 1 - u.matrix, qmix = qmix_[[i]], scale = cov2cor(fit.results[[i]]\$scale), nu = fit.results[[i]]\$nu, control = list(pnvmix.abstol = 1e-5)))) # ~ 20sec ## Plot (reproduces Figure 9) if (doPDF) pdf(file = "fig_REIT_tailprobs.pdf", width = 6, height = 6) plot(NA, xlim = range(u.), ylim = range(tailprobs), xlab = "u", ylab = expression(P(X[1]>q[u],...,X[15]>q[u])), log = "y") for(i in 1:4){ lines(u., tailprobs[, i], lty = i, col = cols[i], lwd = lwds[i]) } legend("bottomleft", names, lty = 1:4, lwd = lwds[1:4], col = cols, bty = 'n') if (doPDF) dev.off() ## Plot tail probabilities standardized by Gaussian copula to see differences if (doPDF) pdf(file = "fig_REIT_tailprobs_standardized.pdf", width = 6, height = 6) plot(NA, xlim = range(u.), ylim = range(tailprobs/tailprobs[, 1]), xlab = "u", ylab = expression(P(X[1]>q[u],...,X[15]>q[u])~"standardized by Gauss case"), log = "y") for(i in 1:4){ lines(u., tailprobs[, i]/tailprobs[, 1], lty = i, lwd = lwds[i], col = cols[i]) } legend("topleft", names, lty = 1:4, lwd = lwds[1:4], col = cols, bty = 'n') if (doPDF) dev.off() ## Estimate value-at-risk and expected shortfall for an equal weight portfolio ## for the different fitted models alpha <- 1 - 1/10^seq(0.5, 3.5, by = 0.05) VaRs <- matrix(NA_real_, ncol = 5, nrow = length(alpha)) ESs <- matrix(NA_real_, ncol = 5, nrow = length(alpha)) for(i in 1:4){ VaRs[, i] <- VaR_nvmix(alpha, qmix = qmix_[[i]], loc = sum(fit.results[[i]]\$loc), scale = sum(fit.results[[i]]\$scale), nu = fit.results[[i]]\$nu) ESs[, i] <- ES_nvmix(alpha, qmix = qmix_[[i]], loc = sum(fit.results[[i]]\$loc), scale = sum(fit.results[[i]]\$scale), nu = fit.results[[i]]\$nu) } ## Estimate VaR and ES non-parametrically sum.obs <- rowSums(X) VaRs[, 5] <- quantile(sum.obs, probs = alpha) ESs[, 5] <- sapply(seq_along(alpha), function(i) mean(sum.obs[sum.obs > VaRs[i, 5]])) ## Plot (reproduces Figure 9) cols <- pal(5) # colors if (doPDF) pdf(file = "fig_REIT_VaR.pdf", width = 6, height = 6) plot(1-alpha, VaRs[, 1], type = "l", ylim = range(VaRs), log = "xy", xlab = expression(1-alpha), ylab = expression(VaR[alpha])) for(i in 2:5) lines(1-alpha, VaRs[, i], col = cols[i], lty = i, lwd = lwds[i]) legend("topright", c(names, "empirical"), col = cols, lty = 1:5, lwd = lwds, bty = 'n') if (doPDF) dev.off() if (doPDF) pdf(file = "fig_REIT_ES.pdf", width = 6, height = 6) plot(1-alpha, ESs[, 1], type = "l", ylim = range(ESs), log = "xy", xlab = expression(1-alpha), ylab = expression(ES[alpha])) for(i in 2:5) lines(1-alpha, ESs[, i], col = cols[i], lty = i, lwd = lwds[i]) legend("topright", c(names, "empirical"), col = cols, lty = 1:5, lwd = lwds, bty = 'n') if (doPDF) dev.off() ### 6. Grouped normal variance mixtures ######################################## ## Example 1: Sample realizations from a grouped mixture d <- 5 df <- 2 n <- 1e3 set.seed(157) A <- matrix(runif(d * d), ncol = d) scale <- cov2cor(A %*% t(A)) # scale matrix ## Group structure (here): Components 1+5 in the same group, all others individual groupings <- c(1, 2, 3, 4, 1) ## Specify mixing distribution for each group qmix <- list(function(u, df) 1 / qgamma(1-u, shape = df/2, rate = df/2), function(u) rep(1, length(u)), list("exp", rate = 1), function(u) (1-u)^(-1/2)) ## Sample r.gnvm <- rgnvmix(n, groupings = groupings, qmix = qmix, scale = scale, df = df) ## Plot (reproduces Figure 11) if (doPDF) pdf(file = "fig_rgnvm.pdf", width = 6, height = 6) pairs(r.gnvm, gap = 0, pch = "*", labels = as.expression( sapply(1:5, function(j) bquote(italic(X[.(j)]))))) if (doPDF) dev.off() ## Example 2: Evaluate distribution function of a grouped mixture ## ... same mixture as above b <- 3 * runif(d) * sqrt(d) # random upper limit (pg <- pgnvmix(b, groupings = groupings, qmix = qmix, scale = scale, df = df)) (dg <- dgnvmix(b, groupings = groupings, qmix = qmix, scale = scale, df = df)) ## Example 3: Bivariate grouped t copula d <- 2 set.seed(123) rho <- 0.7 scale <- matrix(c(1, rho, rho, 1), ncol = 2) df <- c(0.5, 35) # df for components 1 and 2 r.gStdcop <- rgStudentcopula(1e4, df = df, scale = scale) # sample ## Evaluate copula and its density n.grid <- 33 u <- seq(0, 1, length.out = n.grid) grid <- expand.grid("u[1]" = u, "u[2]" = u) dC <- dgStudentcopula(as.matrix(grid), df = df, scale = scale) pC <- pgStudentcopula(as.matrix(grid), df = df, scale = scale) val.pC <- cbind(grid, "C(u[1],u[2])" = pC) val.dC <- cbind(grid, "c(u[1],u[2])" = dC) ## Plot (reproduces Figure 12) opar <- par(pty = "s") if (doPDF) pdf(file = "fig_rgstd.pdf", width = 6, height = 6) plot(r.gStdcop, xlab = expression(U[1]), ylab = expression(U[2]), pch = "*", pty = "s") if (doPDF) dev.off() par(opar) if (doPDF) pdf(file = "fig_rgstd_dens.pdf", width = 6, height = 6) contourplot2(val.dC, xlim = 0:1, ylim = 0:1) if (doPDF) dev.off() if (doPDF) pdf(file = "fig_rgstd_df.pdf", width = 6, height = 6) contourplot2(val.pC, xlim = 0:1, ylim = 0:1) if (doPDF) dev.off()