## install the required packages
# install.packages("BiocManager")
# BiocManager::install("survcomp")
# BiocManager::install("EmpiricalBrownsMethod")
# BiocManager::install("multtest")
# install.packages("poolr")
# install.packages("genetics")
# install.packages("metap")
# install.packages("aggregation")
# install.packages("gap")
# install.packages("CombinePValue")
# install.packages("TFisher")
# install.packages("harmonicmeanp")
# install.packages("RcppEigen")
# install.packages("xtable")

### page 9

## correlation between p-values from two- and one-sided t-tests
set.seed(2468)
n <- 20
Rmat <- matrix(0.8, nrow = 2, ncol = 2)
diag(Rmat) <- 1
P <- replicate(1000, {
  X <- mvtnorm::rmvnorm(n, mean = c(0, 0), sigma = Rmat)
  p.two.sided.1 <- t.test(X[, 1], alternative = "two.sided")$p.value
  p.two.sided.2 <- t.test(X[, 2], alternative = "two.sided")$p.value
  p.one.sided.1 <- t.test(X[, 1], alternative = "greater")$p.value
  p.one.sided.2 <- t.test(X[, 2], alternative = "greater")$p.value
  c(p.two.sided.1, p.two.sided.2, p.one.sided.1, p.one.sided.2)
})

Rmat[1, 2]^3
cor(P[1, ], P[2, ])
Rmat[1, 2]
cor(P[3, ], P[4, ])

## creating Figure 1
rhos <- seq(-0.99, 0.99, by = 0.02)
if (F) {
  ## note: code for the simulation, but not run because it takes too much time
  n <- 20
  R <- sapply(rhos, function(rho) {
    Rmat <- matrix(rho, nrow = 2, ncol = 2)
    diag(Rmat) <- 1
    P <- replicate(1e+06, {
      X <- mvtnorm::rmvnorm(n, mean = c(0, 0), sigma = Rmat)
      p.one.sided.1 <- t.test(X[, 1], alternative = "greater")$p.value
      p.one.sided.2 <- t.test(X[, 2], alternative = "greater")$p.value
      p.two.sided.1 <- t.test(X[, 1], alternative = "two.sided")$p.value
      p.two.sided.2 <- t.test(X[, 2], alternative = "two.sided")$p.value
      c(p.one.sided.1, p.one.sided.2, p.two.sided.1, p.two.sided.2)
    })
    c(cor(P[1, ], P[2, ]), cor(P[3, ], P[4, ]))
  })
  ## using the output from this for the generation of R below
  dput(round(R, 4))
}

R <- structure(c(-0.9886, 0.9593, -0.9658, 0.8886, -0.9435, 0.8275, -0.9211, 0.7719,
  -0.899, 0.7202, -0.8773, 0.6732, -0.8563, 0.6313, -0.8343, 0.5897, -0.8125, 0.553,
  -0.792, 0.5175, -0.771, 0.4853, -0.7495, 0.4516, -0.7279, 0.4232, -0.7084, 0.3959,
  -0.6869, 0.368, -0.6679, 0.3463, -0.6469, 0.3207, -0.6266, 0.3011, -0.6071, 0.2795,
  -0.5863, 0.2583, -0.568, 0.2387, -0.547, 0.2206, -0.5263, 0.2028, -0.5075, 0.1874,
  -0.4877, 0.1726, -0.4687, 0.1599, -0.447, 0.1443, -0.4314, 0.1334, -0.4089, 0.1198,
  -0.3902, 0.1083, -0.3699, 0.097, -0.3518, 0.0865, -0.3314, 0.0767, -0.3112, 0.0696,
  -0.2938, 0.0605, -0.2743, 0.0525, -0.2545, 0.0448, -0.2355, 0.0384, -0.2189,
  0.0323, -0.1976, 0.0279, -0.1799, 0.0221, -0.1609, 0.0184, -0.1405, 0.0147, -0.1238,
  0.0094, -0.1032, 0.0062, -0.084, 0.0047, -0.0664, 0.0033, -0.0499, 0.0011, -0.0288,
  9e-04, -0.0079, 1e-04, 0.0098, -9e-04, 0.0278, 5e-04, 0.0466, 0.0038, 0.0642,
  0.0041, 0.0835, 0.0049, 0.1055, 0.0101, 0.1222, 0.0106, 0.1418, 0.0136, 0.1596,
  0.0166, 0.1798, 0.0223, 0.1987, 0.0268, 0.2168, 0.033, 0.2368, 0.0398, 0.2568,
  0.0446, 0.2749, 0.0523, 0.294, 0.0586, 0.3129, 0.0702, 0.3327, 0.0764, 0.3525,
  0.0882, 0.3685, 0.0973, 0.3896, 0.1063, 0.409, 0.1201, 0.4288, 0.1316, 0.4465,
  0.1443, 0.4674, 0.1581, 0.4871, 0.1713, 0.5073, 0.1888, 0.5277, 0.2036, 0.5457,
  0.2193, 0.5667, 0.239, 0.5846, 0.2571, 0.6071, 0.277, 0.6262, 0.2984, 0.6471,
  0.3211, 0.6674, 0.3457, 0.6876, 0.3686, 0.7085, 0.3967, 0.7292, 0.4239, 0.7496,
  0.4528, 0.7701, 0.4834, 0.792, 0.5185, 0.8137, 0.5538, 0.834, 0.5894, 0.8555,
  0.6309, 0.8773, 0.6738, 0.8994, 0.7207, 0.9213, 0.7715, 0.9436, 0.8274, 0.9658,
  0.8888, 0.9886, 0.9592), .Dim = c(2L, 100L))
# pdf("../Figures/poolr_rev1_oc-figCorP.pdf", height = 5, width = 9.5)
par(mfrow = c(1, 2))
plot(rhos, R[1, ], xlab = expression(rho), ylab = "Estimated Correlation", type = "o",
  pch = 19, cex = 0.4, main = "(a) One-Sided Tests")
lines(rhos, rhos, lwd = 5, col = "gray70")
points(rhos, R[1, ], type = "o", pch = 19, cex = 0.4)

plot(rhos, R[2, ], xlab = expression(rho), ylab = "Estimated Correlation", type = "o",
  pch = 19, cex = 0.4, main = "(a) Two-Sided Tests")
lines(rhos, abs(rhos)^3, lwd = 5, col = "gray70")
points(rhos, R[2, ], type = "o", pch = 19, cex = 0.4)
# dev.off()

### page 15

## install and load the "poolr" package
# install.packages("poolr")
library("poolr")

### page 16

## arguments of one of the base functions
args(fisher)

## illustrate the use of the fisher() function for combining independent
## p-values
pvals <- c(0.02, 0.03, 0.08, 0.2)
(res <- fisher(pvals))

## combine the p-values with all base methods
fun <- c("fisher", "stouffer", "invchisq", "binomtest", "bonferroni", "tippett")
round(sapply(fun, function(f) do.call(f, list(p = pvals))$p), digits = 5)

### page 17

## power comparison of the base methods when there is only one false null
## hypothesis
k <- 10
iters <- 10000
set.seed(8780)
Z <- replicate(iters, rnorm(k, mean = c(3, rep(0, k - 1)), sd = 1))
P <- 2 * pnorm(abs(Z), lower.tail = FALSE)
sapply(fun, function(f)
  mean(apply(P, 2, function(pvals) do.call(f, list(p = pvals))$p) <= .05))

## power comparison of the base methods when all null hypotheses are false
Z <- replicate(iters, rnorm(k, mean = rep(1, k), sd = 1))
P <- 2 * pnorm(abs(Z), lower.tail = FALSE)
sapply(fun, function(f)
  mean(apply(P, 2, function(pvals) do.call(f, list(p = pvals))$p) <= .05))


### page 18

## generate the correlation matrix for 5 test statistics
Rmat <- matrix(0.7, nrow = 5, ncol = 5)
diag(Rmat) <- 1
Rmat

## generate the test statistics based on the given correlation matrix
ti <- c(mvtnorm::rmvnorm(1, mean = rep(0, 5), sigma = Rmat))
round(ti, digits = 5)

## convert the test statistics into two-sided p-values
pvals <- 2 * pnorm(abs(ti), lower.tail = FALSE)
round(pvals, digits = 5)

## combine the p-values with the base functions
round(sapply(fun, function(f) do.call(f, list(p = pvals))$p), digits = 5)

## adjust Fisher's method with the Li & Ji adjustment
fisher(pvals, adjust = "liji", R = Rmat)

### page 19

## obtain the estimate of the effective number of tests with all methods
methods <- c("nyholt", "liji", "gao", "galwey")
sapply(methods, function(method) meff(Rmat, method = method))

## convert the correlation matrix of the test statistics into the correlation
## matrix of the (two-sided) p-values based on multivariate theory
(Rpmat <- mvnconv(Rmat, target = "p", cov2cor = TRUE))


### page 20

## adjust Fisher's method with the Li & Ji adjustment using this correlation
## matrix
fisher(pvals, adjust = "liji", R = Rpmat)

## illustrate the use of the "m" argument
fisher(pvals, adjust = "liji", R = Rmat)
fisher(pvals, m = meff(Rmat, method = "liji"))

## adjust Fisher's method based on an empirically-derived null distribution
## using pseudo replicates (with the default settings)
fisher(pvals, adjust = "empirical", R = Rmat)

### page 21

## illustrate the use of the stepwise algorithm
fisher(pvals, adjust = "empirical", R = Rmat,
  size = c(1000, 10000, 100000), threshold = c(.10, .01))

## obtain the running time when repeatedly using the adjustment based on an
## empirically-derived null distribution (with the default settings)

## Note: To reduce the amount of time it takes to "compile" the paper using
## Sweave() and to provide results that correspond to the CPU indicated in the
## paper (an Intel Xeon E5-2630v4 processor), the timing results shown in the
## paper are not generated "on the fly" but are hard-coded. However, if the code
## below is run, the random number seed changes, making some of the results that
## follow further down non-reproducible. Therefore, we store the current status
## of the random seed before running the following simulations and restore it
## afterwards.

rand_seed <- .Random.seed

Z <- mvtnorm::rmvnorm(1000, mean = rep(0, 5), sigma = Rmat)
P <- 2 * pnorm(abs(Z), lower.tail = FALSE)
system.time(p <- apply(P, 1, function(pvals) fisher(pvals, adjust = "empirical",
  R = Rmat)$p))

## obtain the running time when repeatedly using the adjustment based on an
## empirically-derived null distribution (with the stepwise algorithm)
system.time(p <- apply(P, 1, function(pvals) fisher(pvals, adjust = "empirical",
  R = Rmat, size = c(1000, 10000, 1e+05), threshold = c(0.1, 0.01))$p))

### page 22

## obtain the running time when repeatedly using the adjustment based on an
## empirically-derived null distribution (generated in batches of size 100)
system.time(p <- apply(P, 1, function(pvals) fisher(pvals, adjust = "empirical",
  R = Rmat, batchsize = 100)$p))

## restore the random number seed
.Random.seed <- rand_seed

## show first six lines of the lookup tables
head(mvnlookup)

### page 23

## convert the correlation matrix of the test statistics into the matrix with
## the correlations among the -2*log(p) values
mvnconv(Rmat, target = "m2lp")

## adjust the Fisher, Stouffer, and inverse chi-square methods based on their
## generalizations under the assumption that the test statistics follow a
## multivariate normal distribution (i.e., Brown's method, Strube's method, and
## the generalized inverse chi-square method, respectively)
fisher(pvals, adjust = "generalized", R = mvnconv(Rmat))
stouffer(pvals, adjust = "generalized", R = mvnconv(Rmat))
invchisq(pvals, adjust = "generalized", R = mvnconv(Rmat))

### page 24

## convert the test statistics into one-sided p-values
pvals <- pnorm(ti, lower.tail = FALSE)
round(pvals, digits = 5)

## combine the one-sided p-values with Brown's method
fisher(pvals, adjust = "generalized", R = mvnconv(Rmat, side = 1))

## combine the one-sided p-values with Fisher's method using the adjustment
## based on an empirically-derived null distribution
fisher(pvals, adjust = "empirical", R = Rmat, side = 1)

### page 25

## genotypes of the first six subjects for the first five SNPs
grid2ip.geno[1:6, 1:5]

## (log-transforemd) CES-D values of the first six subjects
head(grid2ip.pheno)

## convert genotypes into the number of minor alleles
G <- as.data.frame(lapply(grid2ip.geno, function(snp) genetics::genotype(snp)))
X <- as.data.frame(lapply(G, function(snp) genetics::allele.count(snp)[, 2]))
X[1:6, 1:5]

## using the number of minor alleles as predictors, fit an additive model to
## each SNP, with the (log-transformed) CES-D values as response variable
pvals <- sapply(X, function(x) coef(summary(lm(grid2ip.pheno ~ x)))[2, 4])
pvals[1:5]

### page 26

## construct the 23x23 LD correlation matrix based on the genotypes
LD <- genetics::LD(G)$r
LD[lower.tri(LD)] <- t(LD)[lower.tri(LD)]
diag(LD) <- 1
LD[1:5, 1:5]

## creating the Figure 2
# pdf("../Figures/poolr_rev1_oc-figLDmat.pdf", width = 7, height = 4.8)
cols <- hcl.colors(101)

par(mar = c(1, 6, 1, 5))

image(1:nrow(LD), 1:ncol(LD), LD, axes = FALSE, xlab = "", ylab = "", col = cols,
  zlim = c(-1, 1))
axis(2, 1:23, labels = rownames(LD), las = 1, tick = FALSE, cex.axis = 0.8)

UT0 <- LD
UT0[upper.tri(UT0, diag = TRUE)] <- NA
UT0[lower.tri(UT0)] <- 0
image(1:nrow(UT0), 1:ncol(UT0), UT0, axes = FALSE, xlab = "", ylab = "", col = "white",
  zlim = c(-1, 1), add = TRUE)

abline(h = seq(0.5, 23.5, by = 1), col = "white")
abline(v = seq(0.5, 23.5, by = 1), col = "white")

par(xpd = TRUE)
nc <- length(cols)
y.lo <- 3
y.hi <- 20
r.height <- (y.hi - y.lo)/(nc - 1)
rect(24, seq(y.lo - r.height/2, y.hi - r.height/2, length = nc), 25, seq(y.lo + r.height/2,
  y.hi + r.height/2, length = nc), col = cols, border = cols)
l.pos <- seq(-1, 1, by = 0.1)
y.pos <- (l.pos + 1)/2
text(25, y.lo + y.pos * (y.hi - y.lo), formatC(l.pos, format = "f", digits = 1, flag = "+"),
  pos = 4, cex = 0.7)
par(xpd = FALSE)
# dev.off()

## check that pvals is identical to grid2ip.p and LD to grid2ip.ld
c(all.equal(pvals, grid2ip.p), all.equal(LD, grid2ip.ld))

### page 27

## combine the p-values with Fisher's method adjusted with a variety of
## techniques
c(fisher = fisher(pvals)$p, liji = fisher(pvals, adjust = "liji", R = LD)$p, emp = fisher(pvals,
  adjust = "emp", R = LD)$p, brown = fisher(pvals, adjust = "gen", R = mvnconv(LD))$p)

## creating Table 1

## For the reasons described earlier, the code below is not run "on the fly"
## when compiling the paper with Sweave(). Hence, the random number seed is
## again stored and restored afterwards. Note that running the permutation tests
## can take hours (depending on the CPU).

rand_seed <- .Random.seed

perm_iter <- 1e+06
res <- matrix(0, nrow = 6, ncol = 8)

set.seed(1234)

res[1, 1:7] <- c(fisher(grid2ip.p)$p, fisher(grid2ip.p, adjust = "nyholt", R = grid2ip.ld)$p,
  fisher(grid2ip.p, adjust = "liji", R = grid2ip.ld)$p, fisher(grid2ip.p, adjust = "gao",
    R = grid2ip.ld)$p, fisher(grid2ip.p, adjust = "galwey", R = grid2ip.ld)$p,
  fisher(grid2ip.p, adjust = "empirical", R = grid2ip.ld, size = perm_iter)$p,
  fisher(grid2ip.p, adjust = "gen", R = mvnconv(grid2ip.ld))$p)

set.seed(1234)

res[2, 1:7] <- c(stouffer(grid2ip.p)$p, stouffer(grid2ip.p, adjust = "nyholt", R = grid2ip.ld)$p,
  stouffer(grid2ip.p, adjust = "liji", R = grid2ip.ld)$p, stouffer(grid2ip.p, adjust = "gao",
    R = grid2ip.ld)$p, stouffer(grid2ip.p, adjust = "galwey", R = grid2ip.ld)$p,
  stouffer(grid2ip.p, adjust = "empirical", R = grid2ip.ld, size = perm_iter)$p,
  stouffer(grid2ip.p, adjust = "gen", R = mvnconv(grid2ip.ld))$p)

set.seed(1234)

res[3, 1:7] <- c(invchisq(grid2ip.p)$p, invchisq(grid2ip.p, adjust = "nyholt", R = grid2ip.ld)$p,
  invchisq(grid2ip.p, adjust = "liji", R = grid2ip.ld)$p, invchisq(grid2ip.p, adjust = "gao",
    R = grid2ip.ld)$p, invchisq(grid2ip.p, adjust = "galwey", R = grid2ip.ld)$p,
  invchisq(grid2ip.p, adjust = "empirical", R = grid2ip.ld, size = perm_iter)$p,
  invchisq(grid2ip.p, adjust = "gen", R = mvnconv(grid2ip.ld))$p)

set.seed(1234)

res[4, 1:6] <- c(binomtest(grid2ip.p)$p, binomtest(grid2ip.p, adjust = "nyholt",
  R = grid2ip.ld)$p, binomtest(grid2ip.p, adjust = "liji", R = grid2ip.ld)$p, binomtest(grid2ip.p,
  adjust = "gao", R = grid2ip.ld)$p, binomtest(grid2ip.p, adjust = "galwey", R = grid2ip.ld)$p,
  binomtest(grid2ip.p, adjust = "empirical", R = grid2ip.ld, size = perm_iter)$p)

set.seed(1234)

res[5, 1:6] <- c(bonferroni(grid2ip.p)$p, bonferroni(grid2ip.p, adjust = "nyholt",
  R = grid2ip.ld)$p, bonferroni(grid2ip.p, adjust = "liji", R = grid2ip.ld)$p,
  bonferroni(grid2ip.p, adjust = "gao", R = grid2ip.ld)$p, bonferroni(grid2ip.p,
    adjust = "galwey", R = grid2ip.ld)$p, bonferroni(grid2ip.p, adjust = "empirical",
    R = grid2ip.ld, size = perm_iter)$p)

set.seed(1234)

res[6, 1:6] <- c(tippett(grid2ip.p)$p, tippett(grid2ip.p, adjust = "nyholt", R = grid2ip.ld)$p,
  tippett(grid2ip.p, adjust = "liji", R = grid2ip.ld)$p, tippett(grid2ip.p, adjust = "gao",
    R = grid2ip.ld)$p, tippett(grid2ip.p, adjust = "galwey", R = grid2ip.ld)$p,
  tippett(grid2ip.p, adjust = "empirical", R = grid2ip.ld, size = perm_iter)$p)

## conduct "proper" permutation tests

## obtain the SNP-level p-values when repeatedly permuting the outcome variable
set.seed(1234)
P <- t(replicate(perm_iter, {
  yperm <- sample(grid2ip.pheno)
  sapply(X, function(snp) coef(summary(lm(yperm ~ snp)))[2, 4])
}))

## obtain the permutation distributions using the various base methods
fis_perm <- apply(P, 1, function(snp) fisher(snp)$p)
sto_perm <- apply(P, 1, function(snp) stouffer(snp)$p)
ics_perm <- apply(P, 1, function(snp) invchisq(snp)$p)
bin_perm <- apply(P, 1, function(snp) binomtest(snp)$p)
bon_perm <- apply(P, 1, function(snp) bonferroni(snp)$p)
tip_perm <- apply(P, 1, function(snp) tippett(snp)$p)

## compute the combined p-values based on the permutation distributions
perm_res <- c((sum(fis_perm <= fisher(grid2ip.p)$p) + 1) / (perm_iter + 1),
  (sum(sto_perm <= stouffer(grid2ip.p)$p) + 1) / (perm_iter + 1),
  (sum(ics_perm <= invchisq(grid2ip.p)$p) + 1) / (perm_iter + 1),
  (sum(bin_perm <= binomtest(grid2ip.p)$p) + 1) / (perm_iter + 1),
  (sum(bon_perm <= bonferroni(grid2ip.p)$p) + 1) / (perm_iter + 1),
  (sum(tip_perm <= tippett(grid2ip.p)$p) + 1) / (perm_iter + 1))

## put together the table
names(perm_res) <- c("fisher", "stouffer", "invchisq", "binomtest", "bonferroni",
  "tippett")
perm_res

res[, 8] <- perm_res
res[4:6, 7] <- NA

rownames(res) <- c("fisher", "stouffer", "invchisq", "binomtest", "bonferroni", "tippett")
colnames(res) <- c("base", "nyholt", "liji", "gao", "galwey", "empirical", "generalized",
  "permutation")

res
round(-log10(res), 3)

## restore the random number seed
.Random.seed <- rand_seed

### page 28

## compare the Bonferroni method with Holm's procedure (using the p.adjust()
## function)
c(p.adjust_bonferroni = min(p.adjust(pvals, method = "bonferroni")),
  p.adjust_holm = min(p.adjust(pvals, method = "holm")),
  poolr_bonferroni = bonferroni(pvals)$p)

## combine the p-values with a variety of packages / functions to combine
## independent p-values available in R
k <- length(pvals)
invisible(capture.output(pkgs_ind <- matrix(c(
  metap::sumlog(pvals)$p,
  metap::sumz(pvals)$p,
  metap::invchisq(pvals, 1)$p,
  metap::minimump(pvals)$p,
  metap::logitp(pvals)$p,
  survcomp::combine.test(pvals, method = "fisher"),
  survcomp::combine.test(pvals, method = "z.transform"), NA, NA,
  survcomp::combine.test(pvals, method = "logit"),
  aggregation::fisher(pvals), NA,
  aggregation::lancaster(pvals, rep(1, k)),
  aggregation::sidak(pvals), NA,
  gap::metap(data.frame(p = rbind(unname(pvals)), n = rbind(rep(1, k))),
    N = k, prefixp = "p.", prefixn = "n.")$p,
  gap::metap(data.frame(p = rbind(2 * unname(pvals)), n = rbind(rep(1, k))),
    N = k, prefixp = "p.", prefixn = "n.")$p1,
  NA, NA, NA,
  poolr::fisher(pvals)$p,
  poolr::stouffer(pvals)$p,
  poolr::invchisq(pvals)$p,
  poolr::tippett(pvals)$p, NA), nrow = 5, byrow = TRUE,
  dimnames = list(c("metap", "survcomp", "aggregation", "gap", "poolr"),
    c("Fisher", "Stouffer", "Invchisq", "Tippett", "Logit")))))
-log10(pkgs_ind)

### page 29

## comparison with the CombinePValue package
P <- replicate(200, {
  yperm <- sample(grid2ip.pheno)
  sapply(X, function(x) coef(summary(lm(yperm ~ x)))[2, 4])
})
CombinePValue::selfcontained.test(pvals, p_permu = P)[[1]]
fisher(pvals, adjust = "gen", R = cov(t(-2 * log(P))))$p

### page 30

## comparison with the EmpiricalBrownsMethod package
EmpiricalBrownsMethod::empiricalBrownsMethod(t(X), p_values = pvals)
V <- EmpiricalBrownsMethod:::calculateCovariances(t(X))
diag(V) <- 4
fisher(pvals, adjust = "gen", R = V)$p

### page 31

S <- mvtnorm::rmvnorm(886, mean = rep(0, k), sigma = LD)
EmpiricalBrownsMethod::empiricalBrownsMethod(t(S), p_values = pvals)
V <- EmpiricalBrownsMethod:::calculateCovariances(t(S))
diag(V) <- 4
fisher(pvals, adjust = "gen", R = V)$p

## compare the covariance matrix of the -2*log(p) values obtained above with the
## one we obtain using multivariate theory (assuming one-sided tests)
round(unname(V), digits = 4)[1:4, 1:8]
mvnconv(LD, target = "m2lp", side = 1)[1:4, 1:8]

## obtain the covariance matrix using multivariate theory (assuming two-sided
## tests)
mvnconv(LD, target = "m2lp")[1:4, 1:8]

## apply Brown's method
fisher(pvals, adjust = "gen", R = mvnconv(LD))$p

### page 32

## comparison with the TFisher package
1 - TFisher::p.tpm(sum(-2 * log(pvals)), n = k, tau1 = 1, M = LD)
fisher(pvals, adjust = "gen", R = 4 * LD^2)$p

## combine the p-values with the harmonicmeanp package
harmonicmeanp::p.hmp(pvals, L = k)

### page 33

## creating Figure 3
# pdf("poolr_rev1_oc-figTFdiff", height = 5, width = 7)
plot(mvnlookup$rhos, mvnlookup$m2lp_2 - 4 * mvnlookup$rhos^2, xlab = expression(rho),
  ylab = "Difference", type = "o", pch = 19, cex = 0.4)
# dev.off()

### page 34

## show that the correlations among the test statistics are quite similar to the
## correlations among the interchanging elements for one particular scenario

## correlation (phi coefficient) among the interchanging (binary) elements
p11 <- 0.4
p10 <- 0.2
p01 <- 0.1
p00 <- 0.3
(p11 * p00 - p10 * p01)/sqrt((p11 + p10) * (p01 + p00) * (p11 + p01) * (p10 + p00))

## obtain the correlation among the test statistics by simulation
n <- 100
Z <- replicate(10000, {
  X <- rmultinom(n, 1, c(p11, p10, p01, p00))
  x1 <- X[1, ] + X[2, ]
  x2 <- X[1, ] + X[3, ]
  y <- rnorm(n)
  res1 <- RcppEigen::fastLmPure(cbind(1, x1), y)
  res2 <- RcppEigen::fastLmPure(cbind(1, x2), y)
  c(res1$coefficients[2]/res1$se[2], res2$coefficients[2]/res2$se[2])
})
cor(Z[1, ], Z[2, ])

### page 35

## creating Figure 4 (left panel)
# png("../Figures/poolr-figContour.png", units = "in", width = 6.5, height = 5.8)
res <- MASS::kde2d(Z[1, ], Z[2, ], n = 50)
filled.contour(res, xlab = "Variable 1", ylab = "Variable 2", color = hcl.colors,
  nlevels = 50, xlim = c(-3, 3), ylim = c(-3, 3))
## dev.off()

## creating Figure 4 (right panel)
# png("../Figures/poolr-figQQ.png", units = "in", width = 6.5, height = 5.8)
V <- var(t(Z))
Vi <- solve(V)
z2 <- apply(Z, 2, function(z) t(z) %*% Vi %*% z)
sav <- qqplot(qchisq(ppoints(10000), df = 2), z2, plot.it = FALSE)
par(mar = c(5, 5, 4, 4))
plot(NA, xlim = c(0, max(c(sav$x, sav$y))), ylim = c(0, max(c(sav$x, sav$y))),
  xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(z2, distribution = function(p) qchisq(p, df = 2), lwd = 3, col = "gray")
points(sav$x, sav$y, pch = 19, cex = 0.5)
# dev.off()