##########################################
#### R code to run the examples in
#### Spatio-Temporal Areal Unit Modeling in R with Conditional
#### Autoregressive Priors Using the CARBayesST Package
##########################################

set.seed(1234)

################################################
#### Section 4.1 Generating and simulating data
################################################
#### Create the regular spatial grid and time domain
n.space <- 20
N <- 20
x.easting <- 1:n.space
x.northing <- 1:n.space
Grid <- expand.grid(x.easting, x.northing)
K <- nrow(Grid)
N.all <- N * K

#### Create the binary spatial adjacency (rook) matrix W
distance <- as.matrix(dist(Grid))
W <- array(0, c(K, K))
W[distance == 1] <- 1

#### Create the binary temporal adjacency matrix D
distance <- as.matrix(dist(1:N))
D <- array(0, c(N, N))
D[distance == 1] <- 1
    
#### Create the spatial precision matrix based on the CAR prior proposed by Leroux et al. (2000)
Q.W <- 0.8 * (diag(apply(W, 2, sum)) - W) + 0.2 * diag(rep(1, K))

#### Create the spatial covariance matrix and simulate a set of spatial random effects
Q.W.inv <- solve(Q.W)
library("mvtnorm")
phi <- rmvnorm(n = 1, mean = rep(0, K), sigma = (0.01 * Q.W.inv))
phi.long <- rep(phi, N)
    
#### Create the temporal covariance matrix and simulate a set of temporal random effects
Q.D <- 0.8 * (diag(apply(D, 2, sum)) - D) + 0.2 * diag(rep(1, N))
Q.D.inv <- solve(Q.D)
delta <- rmvnorm(n = 1, mean = rep(0, N), sigma = (0.01 * Q.D.inv))
delta.long <- kronecker(delta, rep(1, K))

#### Generate a single covariate and a set of interactions
x <- rnorm(n = N.all, mean = 0, sd = 1)
gamma <- rnorm(n = N.all, mean = 0, sd = 0.1)

#### Fix the remaining parameters in the simulation and generate a data set
beta1 <- 0
beta2 <- 0.1
n <- rep(50, N.all)
LP <- beta1 + beta2 * x + phi.long +  delta.long + gamma
theta.true <- exp(LP) / (1 + exp(LP))
Y <- rbinom(n = N.all, size = n, prob = theta.true)

#### Fit the ST.CARanova() model to the simulated data
library("CARBayesST")
model <- ST.CARanova(formula = Y ~ x, family = "binomial", trials = n,
                     W = W, burnin = 20000, n.sample = 120000, thin = 10)

#### Plot the posterior distributions of the regression parameters
colnames(model$samples$beta) <- c("beta1", "beta2")
plot(model$samples$beta)

#### View a model summary
model

###############################################
#### Section 4.2 Small simulation study
###############################################
#### This section repeats the simuation set up from above and 
#### Generate 100 data sets and computes bias and coverage probabilities of the
#### 95% uncertainty intervals

#### Specify fixed parameters
tau2 <- 0.01
rho <- 0.8

#### Matrices to save results
n.sim <- 100
results.beta <- array(NA, c(n.sim, 4))
results.rho <- array(NA, c(n.sim, 4))
results.tau2 <- array(NA, c(n.sim, 6))
results.re <- array(NA, c(n.sim, 6))
results.fitted <- array(NA, c(n.sim, 2))

#######################
#### Run the simulation
#######################
for (i in 1:n.sim) {
    ##################
    #### Generate data
    ##################
    #### Random effects
    phi <- rmvnorm(n = 1, mean = rep(0,K), sigma = (tau2 * Q.W.inv))
    phi <- phi - mean(phi)
    phi.long <- rep(phi, N)
    delta <- rmvnorm(n = 1, mean = rep(0,N), sigma = (tau2 * Q.D.inv))
    delta <- delta - mean(delta)
    delta.long <- kronecker(delta, rep(1,K))
    gamma <- rnorm(n = N.all, mean = 0, sd = sqrt(tau2))
    gamma <- gamma - mean(gamma)
    
    #### Linear predictor and data
    x <- rnorm(n = N.all, mean = 0, sd = 1)
    LP <- beta1 + beta2 * x + phi.long +  delta.long + gamma
    theta.true <- exp(LP) / (1 + exp(LP))
    Y <- rbinom(n = N.all, size = n, prob = theta.true)
    
    #### Fit the model
    model <- ST.CARanova(formula = Y ~ x, family = "binomial",
                         trials = n, W = W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
    
    #### Store the results
    results.beta[i, 1:2] <- as.numeric(apply(model$samples$beta, 2, median))
    results.beta[i, 3] <- as.numeric(quantile(model$samples$beta[, 1], 0.025) < beta1 & quantile(model$samples$beta[, 1], 0.975) > beta1)
    results.beta[i, 4] <- as.numeric(quantile(model$samples$beta[, 2], 0.025) < beta2 & quantile(model$samples$beta[, 2], 0.975) > beta2)
    
    results.rho[i, 1:2] <- as.numeric(apply(model$samples$rho, 2, median))     
    results.rho[i, 3] <- as.numeric(quantile(model$samples$rho[, 1], 0.025) < rho & quantile(model$samples$rho[, 1], 0.975) > rho)
    results.rho[i, 4] <- as.numeric(quantile(model$samples$rho[, 2], 0.025) < rho & quantile(model$samples$rho[, 2], 0.975) > rho)
    
    results.tau2[i, 1:3] <- as.numeric(apply(model$samples$tau2, 2, median))    
    results.tau2[i, 4] <- as.numeric(quantile(model$samples$tau2[, 1], 0.025) < tau2 & quantile(model$samples$tau2[, 1], 0.975) > tau2)
    results.tau2[i, 5] <- as.numeric(quantile(model$samples$tau2[, 2], 0.025) < tau2 & quantile(model$samples$tau2[, 2], 0.975) > tau2)
    results.tau2[i, 6] <- as.numeric(quantile(model$samples$tau2[, 3], 0.025) < tau2 & quantile(model$samples$tau2[, 3], 0.975) > tau2)
    
    results.re[i, 1] <- mean(apply(model$samples$phi, 2, median) - phi)
    quants <- t(apply(model$samples$phi, 2, quantile, c(0.025, 0.975)))
    results.re[i, 4] <- sum(as.numeric(quants[, 1] < phi & quants[, 2] > phi))
    
    results.re[i, 2] <- mean(apply(model$samples$delta, 2, median) - delta)
    quants <- t(apply(model$samples$delta, 2, quantile, c(0.025, 0.975)))
    results.re[i, 5] <- sum(as.numeric(quants[, 1] < delta & quants[, 2] > delta))
    
    results.re[i, 3] <- mean(apply(model$samples$gamma, 2, median) - gamma)
    quants <- t(apply(model$samples$gamma, 2, quantile, c(0.025, 0.975)))
    results.re[i, 6] <- sum(as.numeric(quants[, 1] < gamma & quants[, 2] > gamma))  
    
    results.fitted[i, 1] <- mean(model$fitted.values - theta.true * n)
    quants <- t(apply(model$samples$fitted, 2, quantile, c(0.025, 0.975)))
    results.fitted[i, 2] <- sum(as.numeric(quants[, 1] < theta.true * n & quants[, 2] > theta.true * n))  
    
    #### Remove the model object
    rm(model)
}

##########################
#### Summarise the results
##########################
#### beta
mean(results.beta[, 1]) - beta1
mean(results.beta[, 2]) - beta2
mean(results.beta[, 3])
mean(results.beta[, 4])

#### rho
mean(results.rho[, 1]) - rho
mean(results.rho[, 2]) - rho
mean(results.rho[, 3])
mean(results.rho[, 4])

#### tau2
mean(results.tau2[, 1]) - tau2
mean(results.tau2[, 2]) - tau2
mean(results.tau2[, 3]) - tau2
mean(results.tau2[, 4])
mean(results.tau2[, 5])
mean(results.tau2[, 6])

#### RE
mean(results.re[, 1])
mean(results.re[, 2])
mean(results.re[, 3])
mean(results.re[, 4]) / K
mean(results.re[, 5]) / N
mean(results.re[, 6]) / N.all

#### Fitted
mean(results.fitted[, 1])
mean(results.fitted[, 2]) / N.all

#####################################################
#### Section 4.3 timing based on different data sizes
#####################################################
#### First create a function to simulate data of various sizes
dat.sim <- function(n.space, N, rho, tau2, beta1, beta2, trials) {
    #### Create the regular spatial grid and time domain
    x.easting <- 1:n.space
    x.northing <- 1:n.space
    Grid <- expand.grid(x.easting, x.northing)
    K <- nrow(Grid)
    N.all <- N * K
  
    #### Create the binary spatial adjacency (rook) matrix W
    distance <- as.matrix(dist(Grid))
    W <- array(0, c(K, K))
    W[distance == 1] <- 1
    
    #### Create the binary temporal adjacency matrix D
    distance <- as.matrix(dist(1:N))
    D <- array(0, c(N, N))
    D[distance == 1] <- 1
    
    #### Create the spatial precision matrix based on the CAR prior proposed by Leroux et al. (2000)
    Q.W <- rho * (diag(apply(W, 2, sum)) - W) + (1 - rho) * diag(rep(1, K))
    Q.W.inv <- solve(Q.W)
    phi <- rmvnorm(n = 1, mean = rep(0, K), sigma = (tau2 * Q.W.inv))
    phi.long <- rep(phi, N)
    
    #### Create the temporal covariance matrix and simulate a set of temporal random effects
    Q.D <- rho * (diag(apply(D, 2, sum)) - D) + (1 - rho) * diag(rep(1, N))
    Q.D.inv <- solve(Q.D)
    delta <- rmvnorm(n = 1, mean = rep(0, N), sigma = (tau2 * Q.D.inv))
    delta.long <- kronecker(delta, rep(1, K))
    
    #### Generate a single covariate and a set of interactions
    x <- rnorm(n = N.all, mean = 0, sd = 1)
    gamma <- rnorm(n = N.all, mean = 0, sd = 0.1)
    
    #### Fix the remaining parameters in the simulation and generate a data set
    n <- rep(trials, N.all)
    LP <- beta1 + beta2 * x + phi.long +  delta.long + gamma
    theta.true <- exp(LP) / (1 + exp(LP))
    Y <- rbinom(n = N.all, size = n, prob = theta.true)
    
    #### Return the results
    results <- list(Y = Y, n = n, x = x, W = W)
    return(results)
}

#### 10 by 10 grid and 10 time periods
dat <- dat.sim(n.space = 10, N = 10, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 10 by 10 grid and 20 time periods
dat <- dat.sim(n.space = 10, N = 20, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 20 by 20 grid and 10 time periods
dat <- dat.sim(n.space = 20, N = 10, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 20 by 20 grid and 20 time periods
dat <- dat.sim(n.space = 20, N = 20, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 30 by 30 grid and 20 time periods
dat <- dat.sim(n.space = 30, N = 20, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 30 by 30 grid and 30 time periods
dat <- dat.sim(n.space = 30, N = 30, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 40 by 40 grid and 30 time periods
dat <- dat.sim(n.space = 40, N = 30, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 40 by 40 grid and 40 time periods
dat <- dat.sim(n.space = 40, N = 40, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

#### 50 by 50 grid and 40 time periods
dat <- dat.sim(n.space = 50, N = 40, rho = 0.8, tau2 = 0.01, beta1 = 0, beta2 = 1, trials = 50)
system.time(
    model <- ST.CARanova(formula = Y ~ x, data = dat, family = "binomial",
                         trials = dat$n, W = dat$W, burnin = 20000,
                         n.sample = 120000, thin = 10, verbose = FALSE)
)

##############################################################
#### Section 5 of the paper - air pollution and health example
##############################################################

set.seed(1234)

#### Load the data
library("CARBayesdata")
library("sp")
data("GGHB.IG", package = "CARBayesdata")
data("pollutionhealthdata", package = "CARBayesdata")

#### View the structure of the data
head(pollutionhealthdata)

#### Pairs plot of the data
pollutionhealthdata$SMR <- with(pollutionhealthdata, observed / expected)
pollutionhealthdata$logSMR <- log(pollutionhealthdata$SMR)
par(pty = "s", cex.axis = 1.5)
pairs(pollutionhealthdata[, c(9, 5:7)], pch = 19, cex = 0.5, lower.panel = NULL, panel = panel.smooth,
      labels = c("ln(SMR)", "PM10", "JSA", "Price (*100,000)"))

#### Aggregate the data to produce a map and add to the GGHB.IG object
library("dplyr")
SMR.av <- summarise(group_by(pollutionhealthdata, IG), SMR.mean = mean(SMR))
GGHB.IG@data$SMR <- SMR.av$SMR.mean

#### Plot the map of the average SMR
l1 <- list("SpatialPolygonsRescale", layout.north.arrow(),
           offset = c(220000, 647000), scale = 4000)
l2 <- list("SpatialPolygonsRescale", layout.scale.bar(),
           offset = c(225000, 647000), scale = 10000, fill = c("transparent", "black"))
l3 <- list("sp.text", c(225000,649000), "0")
l4 <- list("sp.text", c(230000,649000), "5000 m")
breakpoints <- seq(min(SMR.av$SMR.mean) - 0.1, max(SMR.av$SMR.mean) + 0.1, length.out = 11)
spplot(GGHB.IG, "SMR", sp.layout = list(l1, l2, l3, l4),
       xlab = "Easting", ylab = "Northing", 
       scales = list(draw = TRUE), at = breakpoints, 
       col.regions = terrain.colors(n = length(breakpoints) - 1),
       par.settings = list(fontsize = list(text = 20)))

#### Create the spatial information
library("spdep")
W.nb <- poly2nb(GGHB.IG, row.names = SMR.av$IG)
W.list <- nb2listw(W.nb, style = "B")
W <- nb2mat(W.nb, style = "B")

#### Run a simple Poisson model
formula <- observed ~ offset(log(expected)) + jsa + price + pm10
model1 <- glm(formula = formula, family = "quasipoisson", data = pollutionhealthdata)
resid.glm <- residuals(model1)
coef(summary(model1))
summary(model1)$dispersion

#### Conduct a Moran's I test on the residuals
moran.mc(x = resid.glm[1:271], listw = W.list, nsim = 10000)

#### Run the CARBayesST model
library("CARBayesST")
model2 <- ST.CARar(formula = formula, family = "poisson", data = pollutionhealthdata,
                   W = W, burnin = 20000, n.sample = 220000, thin = 10)

#### Print the summary of the model
model2

#### Show how the samples are stored
summary(model2$samples)

#### Plot the relative risk distributions
par(pty = "m")
colnames(model2$samples$beta) <- c("Intercept", "JSA", "Price", "PM10")
plot(exp(model2$samples$beta[, -1]))

#### Summarise the relative risk distributions
library("CARBayes")
parameter.summary <- summarise.samples(exp(model2$samples$beta[, -1]),
                                       quantiles = c(0.5, 0.025, 0.975))
round(parameter.summary$quantiles, 3)

####################################################
#### Section 6 of the paper - housing market example
####################################################

set.seed(1234)

#### Load the data
library("CARBayesdata")
library("sp")
data("GGHB.IG", package = "CARBayesdata")
data("salesdata", package = "CARBayesdata")

#### Draw boxplots of the data
salesdata$salesprop <- with(salesdata, sales / stock)
boxplot(salesprop ~ year, data = salesdata, range = 0, xlab = "Year",
        ylab = "Property sales rate", col = "darkseagreen", border = "navy")

#### Draw a map of the average spatial pattern
library("dplyr")
salesprop.av <- summarise(group_by(salesdata,IG), salesprop.mean = mean(salesprop))
GGHB.IG@data$sales <- salesprop.av$salesprop.mean
l1 <- list("SpatialPolygonsRescale", layout.north.arrow(), offset = c(220000, 647000), 
           scale = 4000)
l2 <- list("SpatialPolygonsRescale", layout.scale.bar(), offset = c(225000, 647000), 
          scale = 10000, fill = c("transparent","black"))
l3 <- list("sp.text", c(225000, 649000), "0")
l4 <- list("sp.text", c(230000, 649000), "5000 m")
breakpoints <- c(0, quantile(salesprop.av$salesprop.mean, seq(0.1, 0.9, 0.1)), 0.1)
spplot(GGHB.IG, "sales", sp.layout = list(l1, l2, l3, l4),
       xlab = "Easting", ylab = "Northing", 
       scales = list(draw  =  TRUE), at = breakpoints,
       col.regions = terrain.colors(n = length(breakpoints) - 1),
       par.settings = list(fontsize = list(text = 20)))

#### Create the neighbourhood matrix
library("spdep")
W.nb <- poly2nb(GGHB.IG, row.names = salesprop.av$salesprop.mean)
W <- nb2mat(W.nb, style = "B")

#### Fit the model
library("CARBayesST")
formula <- sales ~ offset(log(stock))
model1 <- ST.CARsepspatial(formula = formula, family = "poisson", data = salesdata,
                           W = W, burnin = 20000, n.sample = 220000, thin = 10)

#### Calculate the temporal trends
trend.mean <- array(NA, c(11, 3))
trend.sd <- array(NA, c(11, 3))
for (i in 1:11) {
    posterior <- exp(model1$samples$phi[, ((i-1)*271 + 1):(i*271)] +
                     matrix(rep(model1$samples$beta + model1$samples$delta[, i], 271),
                            ncol = 271, byrow = FALSE))
    trend.mean[i, ] <- quantile(apply(posterior, 1, mean), c(0.5, 0.025, 0.975))
    trend.sd[i, ] <- quantile(apply(posterior, 1, sd), c(0.5, 0.025, 0.975))
}

#### Plot the temporal trends
par(mfrow = c(2, 1))
plot(jitter(salesdata$year), salesdata$salesprop, pch = 19, cex = 0.2, col = "blue",
     xlab = "Year", main = "(a)", ylab = "Average sales rate", ylim = c(0, 0.11),
     cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5)
lines(2003:2013, trend.mean[, 1], col = "red", type = "l")
lines(2003:2013, trend.mean[, 2])
lines(2003:2013, trend.mean[, 3])

plot(2003:2013, trend.sd[, 1], col = "red", type = "l", xlab = "Year", main = "(b)",
     ylab = "Spatial standard deviation", ylim = c(0, 0.06), cex.axis = 1.5,
     cex.lab = 1.5, cex.main = 1.5)
lines(2003:2013, trend.sd[, 2])
lines(2003:2013, trend.sd[, 3])

#### Map the fitted proportions for the odd numbered years
rate.est <- matrix(model1$fitted.values / salesdata$stock, nrow = nrow(W), byrow = FALSE)
rate.est <- as.data.frame(rate.est)
colnames(rate.est) <- c("r2003", "r2004", "r2005", "r2006", "r2007", "r2008", "r2009",
                        "r2010", "r2011", "r2012", "r2013")
GGHB.IG@data <- data.frame(GGHB.IG@data, rate.est)
spplot(GGHB.IG, c("r2011", "r2013", "r2007", "r2009", "r2003", "r2005"),
       names.attr = c("Rate 2011", "Rate 2013", "Rate 2007", "Rate 2009", "Rate 2003", "Rate 2005"),
       sp.layout = list(l1, l2, l3, l4), xlab = "Easting", ylab = "Northing",
       scales = list(draw  =  TRUE), at = breakpoints,
       col.regions = terrain.colors(n = length(breakpoints - 1)),
       par.settings = list(fontsize = list(text = 13)))