# Replication code for the paper
# "Getting started with Particle Metropolis-Hastings
# for Inference In Nonlinear Dynamical Models"
#
# by Johan Dahlin and Thomas B. Schön

library("pmhtutorial")
library("Quandl")
library("MASS")
library("mvtnorm")

# Set long = TRUE to obtain the results in the tutorial; set long =
# FALSE for a quicker runtime which reduces the number of iterations.
long <- TRUE

##############################################################################
# Section 3, Figure 2
# Generating data from a linear Gaussian state space model
##############################################################################

# Set the random seed to replicate results in tutorial
set.seed(10)

phi <- 0.75
sigmav <- 1.00
sigmae <- 0.10
T <- 250
initialState <- 0

data <- generateData(c(phi, sigmav, sigmae), T, initialState)
x <- data$x
y <- data$y

# Plot the latent state and observations
grid <- seq(0, T)

cairo_pdf("lgss-data.pdf", height = 3, width = 8)
layout(matrix(1:3, 1, 3, byrow = TRUE))
par(mar = c(4, 5, 0, 0))

plot(grid, x, col = "#D95F02", lwd = 1, type = "l",
  xlab = "time", ylab = expression("latent state " * x[t]),
  bty = "n", ylim = c(-4, 6))

polygon(c(grid, rev(grid)), c(x, rep(-4, T + 1)), border = NA,
  col = rgb(t(col2rgb("#D95F02")) / 256, alpha = 0.25))

plot(grid[-1], y[-1], col = "#1B9E77", lwd = 1, type = "l",
  xlab = "time", ylab = expression("observation " * y[t]),
  bty = "n", ylim = c(-4, 6))

polygon(c(grid[-1], rev(grid[-1])), c(y[-1], rep(-4, T)), border = NA,
  col = rgb(t(col2rgb("#1B9E77")) / 256, alpha = 0.25))

foo <- acf(y[-1], plot = FALSE, lag.max = 25)
plot(foo$lag, foo$acf, col = "#66A61E", lwd = 1.5,
  type = "l", xlab = "time", ylab = expression("ACF of " * y[t]),
  bty = "n", ylim = c(-0.2, 1), xlim = c(0, 25))

polygon(c(foo$lag, rev(foo$lag)), c(foo$acf, rep(0.0, length(foo$lag))),
  border = NA, col = rgb(t(col2rgb("#66A61E")) / 256, alpha = 0.25))

abline(h = -1.96 / sqrt(T), lty = "dotted")
abline(h = 1.96 / sqrt(T), lty = "dotted")
dev.off()

##############################################################################
# Section 3.2 (Figure 3 and Table 1)
# State inference in the linear Gaussian state space model
##############################################################################

cairo_pdf("example1-lgss.pdf", height = 10, width = 8)
res_ex1 <- example1_lgss()
print(res_ex1$logBiasMSE)
dev.off()


##############################################################################
# Section 4.2 (Figure 4)
# Parameter inference in the linear Gaussian state space model
##############################################################################

cairo_pdf("example2-lgss.pdf", height = 10, width = 8)
res_ex2 <- example2_lgss(noBurnInIterations = ifelse(long, 1000, 100),
                         noIterations = ifelse(long, 5000, 1000))
dev.off()


##############################################################################
# Section 4.4 (Table 2)
# Parameter inference in the linear Gaussian state space model
##############################################################################

# Set the random seed to replicate results in tutorial
set.seed(10)

initialPhi <- 0.50
noParticles <- 100
noBurnInIterations <- ifelse(long, 1000, 100)
noIterations <- ifelse(long, 5000, 1000)
stepSize <- 0.10

# Define the model
phi <- 0.75
sigmav <- 1.00
sigmae <- 0.10
T <- 250
initialState <- 0

TT <- c(10, 20, 50, 100, 200, 500)
Tmean <- matrix(0, nrow = length(TT), ncol = 1)
Tvar <- matrix(0, nrow = length(TT), ncol = 1)

# Estimate the posterior of different data lengths
for (i in 1:length(TT)) {
  set.seed(10)
  data <- generateData(c(phi, sigmav, sigmae), TT[i], initialState)
  res <- particleMetropolisHastings(data$y, initialPhi, sigmav,
                                    sigmae, noParticles, initialState,
                                    noIterations, stepSize)

  Tmean[i] <- mean(res[noBurnInIterations:noIterations])
  Tvar[i]  <- var(res[noBurnInIterations:noIterations])
}

# Table with (T, estimate of posterior mean, estimate of posterior variance)
cbind(TT, Tmean, Tvar)


##############################################################################
# Section 5 (Figure 5)
# Parameter inference in the stochastic volatility model
##############################################################################

cairo_pdf("example3-sv.pdf", height = 10, width = 8)
res_ex3 <- example3_sv(noBurnInIterations = ifelse(long, 2500, 250),
                       noIterations = ifelse(long, 7500, 750))
dev.off()

# Write out the estimate of the posterior mean and covariance
res_ex3$thhat
res_ex3$thhatSD

# Write out the estimate of IACT for each parameter
res_ex3$iact


##############################################################################
# Section 6.3.1 (Figure 6)
# Parameter inference in the stochastic volatility model
# with the proposal tailored to the estimate of the posterior covariance
##############################################################################

# The covariance matrix for the unadapted and adapted proposal
stepSize1 <- diag(c(0.10, 0.01, 0.05)^2)
stepSize2 <- 0.8 * res_ex3$estCov

# Estimate the posterior mean and covariance
estThe <- colMeans(res_ex3$theta)
estCov <- var(res_ex3$theta)

# Create a grid for each parameter
gridth1 <- seq(-1, 1, 0.01)
gridth2 <- seq(0.90, 1.05, 0.01)
gridth3 <- seq(0.01, 0.35, 0.01)

grid1 <- matrix(0, length(gridth1) * length(gridth2), 2)
grid2 <- matrix(0, length(gridth1) * length(gridth3), 2)
grid3 <- matrix(0, length(gridth2) * length(gridth3), 2)

kk <- 1
for (ii in 1:length(gridth1)) {
  for (jj in 1:length(gridth2)) {
    grid1[kk, ] <- c(gridth1[ii], gridth2[jj])
    kk <- kk + 1
  }
}

kk <- 1
for (ii in 1:length(gridth1)) {
  for (jj in 1:length(gridth3)) {
    grid2[kk, ] <- c(gridth1[ii], gridth3[jj])
    kk <- kk + 1
  }
}

kk <- 1
for (ii in 1:length(gridth2)) {
  for (jj in 1:length(gridth3)) {
    grid3[kk, ] <- c(gridth2[ii], gridth3[jj])
    kk <- kk + 1
  }
}

# Evaluate the proposal distribution over the grid centered at posterior mean
dgrid1 <- matrix(dmvnorm(grid1, mean = estThe[-3], sigma = stepSize1[-3, -3]),
                 length(gridth1), length(gridth2), byrow = TRUE)

dgrid2 <- matrix(dmvnorm(grid2, mean = estThe[-2], sigma = stepSize1[-2, -2]),
                 length(gridth1), length(gridth3), byrow = TRUE)

dgrid3 <- matrix(dmvnorm(grid3, mean = estThe[-1], sigma = stepSize1[-1, -1]),
                 length(gridth2), length(gridth3), byrow = TRUE)

dgrid4 <- matrix(dmvnorm(grid1, mean = estThe[-3], sigma = stepSize2[-3, -3]),
                 length(gridth1), length(gridth2), byrow = TRUE)

dgrid5 <- matrix(dmvnorm(grid2, mean = estThe[-2], sigma = stepSize2[-2, -2]),
                 length(gridth1), length(gridth3), byrow = TRUE)

dgrid6 <- matrix(dmvnorm(grid3, mean = estThe[-1], sigma = stepSize2[-1, -1]),
                 length(gridth2), length(gridth3), byrow = TRUE)

# Compute the 2-dimensional kernel density estimate of the posterior
post_muphi <- kde2d(res_ex3$theta[, 1], res_ex3$theta[, 2], n = 50)
post_musigma <- kde2d(res_ex3$theta[, 1], res_ex3$theta[, 3], n = 50)
post_phisigma <- kde2d(res_ex3$theta[, 2], res_ex3$theta[, 3], n = 50)


cairo_pdf("example4-sv-plotProposals.pdf", height = 6, width = 8)
layout(matrix(1:6, 2, 3, byrow = TRUE))
par(mar = c(4, 5, 0, 0))

# Mu versus phi (old proposal)
contour(post_muphi, xlim = c(-1, 1), ylim = c(0.88, 1.05), labels = NULL,
        bty = "n", col = "#7570B3", lwd = 1.5, labcex = 0.001,
        xlab = expression(mu), ylab = expression(phi))

contour(gridth1, gridth2, dgrid1, labels = NULL, nlevels = 5, add = TRUE,
        col = "grey20", labcex = 0.001, lwd = 2)

# Mu versus sigma_v (old proposal)
contour(post_musigma, xlim = c(-1, 1), ylim = c(0.00, 0.35), labels = NULL,
        bty = "n", col = "#E7298A", lwd = 1.5, labcex = 0.001,
        xlab = expression(mu), ylab = expression(sigma[v]))

contour(gridth1, gridth3, dgrid2, labels = NULL, nlevels = 5, add = TRUE,
        col = "grey20", labcex = 0.001, lwd = 2)

# Phi versus sigma_v (old proposal)
contour(post_phisigma, xlim = c(0.88, 1.05), ylim = c(0.00, 0.35), labels = NULL,
        bty = "n", col = "#66A61E", lwd = 1.5, labcex = 0.001,
        xlab = expression(phi), ylab = expression(sigma[v]))

contour(gridth2, gridth3, dgrid3, labels = NULL, nlevels = 5, add = TRUE,
        col = "grey20", labcex = 0.001, lwd = 2)

# Mu versus phi (new proposal)
contour(post_muphi, xlim = c(-1, 1), ylim = c(0.88, 1.05), labels = NULL,
        bty = "n", col = "#7570B3", lwd = 1.5, labcex = 0.001,
        xlab = expression(mu), ylab = expression(phi))

contour(gridth1, gridth2, dgrid4, labels = NULL, nlevels = 5, add = TRUE,
        col = "grey20", labcex = 0.001, lwd = 2)

# Mu versus sigma_v (new proposal)
contour(post_musigma, xlim = c(-1, 1), ylim = c(0.00, 0.35), labels = NULL,
        bty = "n", col = "#E7298A", lwd = 1.5, labcex = 0.001,
        xlab = expression(mu), ylab = expression(sigma[v]))

contour(gridth1, gridth3, dgrid5, labels = NULL, nlevels = 5, add = TRUE,
        col = "grey20", labcex = 0.001, lwd = 2)

# Phi versus sigma_v (new proposal)
contour(post_phisigma, xlim = c(0.88, 1.05), ylim = c(0.00, 0.35), labels = NULL,
        bty = "n", col = "#66A61E", lwd = 1.5, labcex = 0.001,
        xlab = expression(phi), ylab = expression(sigma[v]))

contour(gridth2, gridth3, dgrid6, labels = NULL, nlevels = 5, add = TRUE,
        col = "grey20", labcex = 0.001, lwd = 2)

dev.off()

##############################################################################
# Section 6.3.1 (Figure 7)
# Parameter inference in the stochastic volatility model
# with the proposal tailored to the estimate of the posterior covariance
##############################################################################

# Display the estimate of the posterior covariance obtained from example3_sv
res_ex3$estCov

cairo_pdf("example4-sv.pdf", height = 10, width = 8)
res_ex4 <- example4_sv(noBurnInIterations = ifelse(long, 2500, 250),
                       noIterations = ifelse(long, 7500, 750))
dev.off()

# Write out the estimate of the posterior mean and covariance
res_ex4$thhat
res_ex4$thhatSD

# Write out the estimate of IACT for each parameter
res_ex4$iact


##############################################################################
# Section 6.3.2
# Parameter inference in the stochastic volatility model
# with the reparametrised model
##############################################################################

cairo_pdf("example5-sv.pdf", height = 10, width = 8)
res_ex5 <- example5_sv(noBurnInIterations = ifelse(long, 2500, 250),
                       noIterations = ifelse(long, 7500, 750))
dev.off()

# Write out the estimate of the posterior mean and covariance
res_ex5$thhat
res_ex5$thhatSD

# Write out the estimate of IACT for each parameter
res_ex5$iact


##############################################################################
# Section 6.3.3
# Parameter inference in the stochastic volatility model
# estimation the performance while varying noParticles
##############################################################################

# Set the random seed to replicate results in tutorial
set.seed(10)

# True parameters estimated in example5-sv.R
theta <- c(-0.12, 0.96, 0.17)

# No. particles in the particle filter to try out
noParticles <- c(50, 100, 200, 300, 400, 500)

# No. repetitions of log-likelihood estimate
noSimulations <- ifelse(long, 1000, 100)

# Load data
d <- Quandl("NASDAQOMX/OMXS30", start_date = "2012-01-02",
            end_date = "2014-01-02", type = "zoo")
y <- as.numeric(100 * diff(log(d$"Index Value")))

# Likelihood estimation using particle filter
logLikelihoodEstimates <- matrix(0, nrow = length(noParticles), ncol = noSimulations)
logLikelihoodVariance <- rep(0, length(noParticles))
computationalTimePerSample <- rep(0, length(noParticles))

for (k in 1:length(noParticles)) {
  # Save the current time
  ptm <- proc.time()

  for (i in 1:noSimulations) {
    # Run the particle filter
    res <- particleFilterSVmodel(y, theta, noParticles[k])

    # Save the log-Likelihood estimate
    logLikelihoodEstimates[k, i] <- res$logLikelihood
  }

  # Compute the variance of the log-likelihood and computational time per sample
  logLikelihoodVariance[k] <- var(logLikelihoodEstimates[k, ])
  computationalTimePerSample[k] <- (proc.time() - ptm)[3] / noSimulations

  # Print to screen
  print(paste(paste(paste(paste("Simulation: ", k, sep = ""), " of ", sep = ""), length(noParticles), sep = ""), " completed.", sep = ""))
  print(paste(paste(paste(paste("No. particles: ", noParticles[k], sep = ""), " requires ", sep = ""), computationalTimePerSample[k], sep = ""), " seconds for computing one sample.", sep = ""))
}

# The inital guess of the parameter (use the estimate of the posterior mean to
# accelerated the algorithm, i.e., so less PMH iterations can be used).
initialTheta <- theta

noBurnInIterations <- ifelse(long, 2000, 250)
noIterations <- ifelse(long, 17000, 750)

resTheta <- array(0, dim = c(length(noParticles), noIterations - noBurnInIterations + 1, 3))
computationalTimePerIteration <- rep(0, length(noParticles))
acceptProbability <- rep(0, length(noParticles))

for (k in 1:length(noParticles)) {
  # Save the current time
  ptm <- proc.time()

  # Run the PMH algorithm
  res <- particleMetropolisHastingsSVmodel(y, initialTheta, noParticles[k], noIterations, stepSize = diag(c(0.10, 0.01, 0.05)^2))

  # Save the parameter trace
  resTheta[k, ,] <- res$theta[noBurnInIterations:noIterations,]

  # Compute acceptance probability and computational time per sample
  computationalTimePerIteration[k] <- (proc.time() - ptm)[3] / noIterations
  acceptProbability[k] <- mean(res$proposedThetaAccepted[noBurnInIterations:noIterations])

  # Print to screen
  print(paste(paste(paste(paste("Simulation: ", k, sep = ""), " of ", sep = ""), length(noParticles), sep = ""), " completed.", sep = ""))
}

# Post-processing (computing IACT and IACT * time)
resThetaIACT <- matrix(0, nrow = length(noParticles), ncol = 3)
resThetaIACTperSecond <- matrix(0, nrow = length(noParticles), ncol = 3)

for (k in 1:length(noParticles)) {
  acf_mu <- acf(resTheta[k, , 1], plot = FALSE, lag.max = 100)
  acf_phi <- acf(resTheta[k, , 2], plot = FALSE, lag.max = 100)
  acf_sigmav <- acf(resTheta[k, , 3], plot = FALSE, lag.max = 100)

  resThetaIACT[k, ] <- 1 + 2 * c(sum(acf_mu$acf), sum(acf_phi$acf), sum(acf_sigmav$acf))
  resThetaIACTperSecond[k, ] <- resThetaIACT[k, ] / computationalTimePerIteration[k]
}

table <- rbind(noParticles, sqrt(logLikelihoodVariance), 100 * acceptProbability, apply(resThetaIACT, 1, max), computationalTimePerIteration, apply(resThetaIACT, 1, max) * computationalTimePerIteration)
table <- round(table, 2)
rownames(table) <- c("noParticles", "StDev of Log-likelihood est.", "Acceptance probability", "Maximum IACT", "Time per PMH iteration", "Time per sample")
print(table)