## R code from the JSS paper:
## A Major Update of the bayesTFR R Package: 
## Probabilistic Estimation and Projection of the Annual Total Fertility Rate 
## Accounting for Past Uncertainty
##
## Peiran Liu, Hana Sevcikova, Adrian E. Raftery
##
## bayesTFR package version 7.3-0

library("bayesTFR")

## Section 4.1
data("rawTFR", package = "bayesTFR")
head(rawTFR)

## Section 4.2
## Two-step estimation
set.seed(123) 

annual <- TRUE
nr.chains <- 3
total.iter <- 5000 
thin <- 1
simu.dir <- file.path(getwd(), "annual")
m2 <- run.tfr.mcmc(output.dir = simu.dir, nr.chains = nr.chains, iter = total.iter,
  thin = thin, annual = annual)
m3 <- run.tfr3.mcmc(sim.dir = simu.dir, nr.chains = nr.chains, iter = total.iter,
  thin = thin)

## For further processing we set the RNG seed for reproducibility
set.seed(123)

## One-step estimation
ar.phase2 <- TRUE
simu.dir.unc <- file.path(getwd(), "annual_unc")
m <- run.tfr.mcmc(output.dir = simu.dir.unc, nr.chains = nr.chains, iter = total.iter, annual = annual,
  thin = thin, uncertainty = TRUE, ar.phase2 = ar.phase2, iso.unbiased = c(124, 840))

## Continuing an existing simulation
m <- continue.tfr.mcmc(output.dir = simu.dir.unc, iter = 100) 

## Section 4.3
## Generating projections
pred <- tfr.predict(sim.dir = simu.dir.unc, end.year = 2100, 
  burnin = 2100, nr.traj = 1000, uncertainty = TRUE) 

## Section 4.4
m <- get.tfr.mcmc(simu.dir.unc)
m3 <- get.tfr3.mcmc(simu.dir.unc)
pred <- get.tfr.prediction(simu.dir.unc)

## Summary functions
thin <- 3 
burnin <- 2100

summary(m, thin = thin, burnin = burnin)
summary(m, par.names = c("rho_phase2", "sigma0"), thin = thin, burnin = burnin)

tfr.parameter.names(meta = m$meta)
tfr3.parameter.names()

summary(m, country = "Nigeria", par.names.cs = "tfr", thin = thin, burnin = burnin)
summary(m, country = "Nigeria", par.names.cs = c("tfr_1", "tfr_30", "tfr_70"),
  thin = thin, burnin = burnin)

## Exploring TFR estimation
nigeria_obj <- get.tfr.estimation(country = "NG", sim.dir = simu.dir.unc, burnin = burnin, 
  thin = thin, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))
dim(nigeria_obj$tfr_table)
head(nigeria_obj$tfr_quantile)

## Figure 1
plot <- tfr.estimation.plot(country = 566, sim.dir = simu.dir.unc, 
  burnin = burnin, thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot)

plot.usa <- tfr.estimation.plot(country = 840, sim.dir = simu.dir.unc, 
  burnin = burnin, thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot.usa)

## Exploring bias and standard deviation of observations (Table 2-3)
bias_sd <- tfr.bias.sd(sim.dir = simu.dir.unc, country = 566)
summary(bias_sd$model_bias)
head(bias_sd$table)

## Exploring TFR prediction (Figure 2)
tfr.trajectories.plot(pred, country = "Burkina Faso", nr.traj = 20, pi = c(80, 95),
  uncertainty = TRUE)
tfr.trajectories.plot(pred, country = "Burkina Faso", nr.traj = 20, pi = c(80, 95),
  uncertainty = FALSE)

tfr.trajectories.table(pred, country = "Burkina Faso")
summary(pred, country = "Burkina Faso")

## Exploring double logistic function (Figure 3)
DLcurve.plot(country = "BFA", mcmc.list = m, burnin = burnin, pi = c(95, 80), nr.curves = 100)

DLcurve.plot(country = "THA", mcmc.list = m, burnin = burnin, pi = c(95, 80), nr.curves = 100)

## MCMC traces, density and diagnosis (Figure 4 and 5)
tfr.partraces.plot(m, par.names = "rho_phase2", nr.points = 200)
tfr.partraces.cs.plot(m, country = "Nigeria", par.names = "tfr_36", nr.points = 200)
tfr.pardensity.plot(m, par.names = "rho_phase2", burnin = burnin)
tfr.pardensity.cs.plot(m, country = "Nigeria", par.names = "tfr_36", burnin = burnin)

tfr.diagnose(simu.dir.unc, thin = thin, burnin = burnin, express = TRUE) # cannot run for a toy simulation

## Section 4.5
countries <- c(566, 840)
myrawTFR <- subset(rawTFR, country_code %in% countries)
myrawTFR <- subset(myrawTFR, !(country_code == 566 & method == "Indirect" & source == "DHS-NS"))
write.csv(myrawTFR, file = "raw_tfr_user.csv", row.names = FALSE)

simu.dir.extra <- paste0(simu.dir.unc, "_extra")
dir.create(simu.dir.extra)
file.copy(list.files(simu.dir.unc, full.names = TRUE), simu.dir.extra, recursive = TRUE)

run.tfr.mcmc.extra(sim.dir = simu.dir.extra, countries = countries, iter = total.iter, burnin = burnin,
  uncertainty = TRUE, my.tfr.raw.file = "raw_tfr_user.csv", covariates = c("source", "method"))

## to create similar plots in Figure 6 (Figure 6)
plot1 <- tfr.estimation.plot(country = 566, sim.dir = simu.dir.extra, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
plot2 <- tfr.estimation.plot(country = 840, sim.dir = simu.dir.extra, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot1)
print(plot2)

## Section 5.2
## Recommendations for a real simulation
set.seed(123)
sim.dir.base <- file.path(getwd(), "baseline")

annual <- FALSE
nr.chains <- 3
total.iter <- 62000
thin <- 10
burnin <- 2000
m2.base <- run.tfr.mcmc(output.dir = sim.dir.base, nr.chains = nr.chains, iter = total.iter,
  thin = thin, annual = annual)
m3.base <- run.tfr3.mcmc(sim.dir = sim.dir.base, nr.chains = nr.chains, iter = total.iter,
  thin = thin)
pred.base <- tfr.predict(sim.dir = sim.dir.base, end.year = 2100, burnin = burnin,
  nr.traj = 1000)

## Figure 7a/8a
tfr.trajectories.plot(pred.base, country = "Switzerland", nr.traj = 20, pi = c(80, 95),
  half.child.variant = FALSE)

## Figure 9a
tfr.trajectories.plot(pred.base, country = "Nigeria", nr.traj = 20, pi = c(80, 95),
  half.child.variant = FALSE)


## basic update
sim.dir.update <- file.path(getwd(), "update")
set.seed(123)

annual <- TRUE
nr.chains <- 3
total.iter <- 62000
thin <- 10
burnin <- 2000
m.update <- run.tfr.mcmc(output.dir = sim.dir.update, nr.chains = nr.chains, sigma0.min = 0.001,
  iter = total.iter, annual = annual, thin = thin, uncertainty = TRUE, ar.phase2 = FALSE, parallel = FALSE)
set.seed(123)

pred.update <- tfr.predict(sim.dir = sim.dir.update, end.year = 2100, burnin = burnin,
  nr.traj = 1000, uncertainty = TRUE)

## Figure 7b
tfr.trajectories.plot(pred.update, country = "Switzerland", nr.traj = 20, pi = c(80, 95),
  uncertainty = TRUE, half.child.variant = FALSE)

## Figure 9b/11a
tfr.trajectories.plot(pred.update, country = "Nigeria", nr.traj = 20, pi = c(80, 95),
  uncertainty = TRUE, half.child.variant = FALSE)

## Figure 10
get.phase2.residual <- function(mcmc.list, index)
{
  start_c <- max(mcmc.list$meta$tau_c[index], 1)
  end_c <- mcmc.list$meta$lambda_c[index]
  country_obj <- get.country.object(country = index, meta = mcmc.list$meta, index = TRUE)

  tfr_estimates <- get.tfr.estimation(mcmc.list, country = country_obj$code, burnin = 2000, thin = 10)$tfr_table
  tfr_phase2 <- tfr_estimates[,start_c:(end_c-1)]
  tfr_phase2 <- apply(tfr_phase2, 2, median)

  # tfr_phase2 <- matrix(rep(tfr_phase2, 3000), nrow = 3000, byrow = T)
  tfr_phase2_diff <- tfr_estimates[,start_c:(end_c-1)] - tfr_estimates[,(start_c + 1):end_c]
  tfr_phase2_diff <- apply(tfr_phase2_diff, 2, median)
  # tfr_phase2_diff <- matrix(rep(tfr_phase2_diff, 3000), nrow = 3000, byrow = T)

  tfr_parameters <- get.tfr.parameter.traces.cs(mcmc.list$mcmc.list, country_obj, burnin = 2000, thin = 10,
    par.names = c("Triangle_c1", "Triangle_c2", "Triangle_c3", "Triangle_c4", "d_c"))
  tfr_parameters <- apply(tfr_parameters, 2, median)
  dl <- -tfr_parameters[2]/(1 + exp(-2 * log(9)/tfr_parameters[3] * (tfr_phase2 -
    0.5 * tfr_parameters[3] - tfr_parameters[4] - tfr_parameters[5] - tfr_parameters[1]))) +
    tfr_parameters[2]/(1 + exp(- 2 * log(9)/tfr_parameters[5] *
    (tfr_phase2 - tfr_parameters[1] - 0.5 * tfr_parameters[5])))

  # res_median <-apply(tfr_phase2_diff - dl, 2, median)
  res_median <- tfr_phase2_diff - dl
  return(res_median)
}
res_medians <- list()
acfs <- c()
for (i in 1:201) {
  res_medians[[i]] <- get.phase2.residual(m.update, index = i)
  acfs[i] <- acf(res_medians[[i]], plot = F)$acf[2, 1, 1]
}

hist(acfs, nclass = 20)

## Add AR1 for Phase II update
sim.dir.ar_phase2 <- file.path(getwd(), "ar_phase2")
set.seed(123)
annual <- TRUE
nr.chains <- 3
total.iter <- 62000
thin <- 10
burnin <- 2000
m.ar_phase2 <- run.tfr.mcmc(output.dir = sim.dir.ar_phase2, nr.chains = nr.chains, sigma0.min = 0.001,
  iter = total.iter, annual = annual, thin = thin, uncertainty = TRUE,
  ar.phase2 = TRUE, parallel = FALSE)

set.seed(123)
pred.ar_phase2 <- tfr.predict(sim.dir = sim.dir.ar_phase2, end.year = 2100, burnin = burnin,
  nr.traj = 1000, uncertainty = TRUE)

## Figure 11b/12a
tfr.trajectories.plot(pred.ar_phase2, country = "Nigeria", nr.traj = 20, pi = c(80, 95),
  uncertainty = TRUE, half.child.variant = FALSE)

## Figure 12c
plot_12c <- tfr.estimation.plot(country = 566, sim.dir = sim.dir.ar_phase2, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot_12c)

## Gold
sim.dir.gold <- file.path(getwd(), "sim_gold")
set.seed(123)
annual <- TRUE
nr.chains <- 3
total.iter <- 62000
thin <- 10
burnin <- 2000
iso.goodvr <- c(36, 40, 56, 124, 203, 208, 246, 250, 276, 300, 352, 372, 380,
  392, 410, 428, 442, 528, 554, 578, 620, 724, 752, 756, 792, 826, 840)
## There was an unexpected raw data change, and to show the exact result of the paper
## We need to manually adjust for it.
rawTFR_data <- rawTFR
rawTFR_data[rawTFR_data$country_code==756, "source"] <- "VR"
write.csv(rawTFR_data, file = "rawTFR_fixed.csv")
m.gold <- run.tfr.mcmc(output.dir = sim.dir.gold, nr.chains = nr.chains,
  iter = total.iter, annual = annual, thin = thin, uncertainty = TRUE,
  ar.phase2 = TRUE, iso.unbiased = iso.goodvr, parallel = FALSE, my.tfr.raw.file = "rawTFR_fixed.csv")

set.seed(123)
pred.gold <- tfr.predict(sim.dir = sim.dir.gold, end.year = 2100, burnin = burnin,
  nr.traj = 1000, uncertainty = TRUE)

## Figure 8b
tfr.trajectories.plot(pred.gold, country = "Switzerland", nr.traj = 20, pi = c(80, 95),
  uncertainty = TRUE, half.child.variant = FALSE)

## Figure 12b
tfr.trajectories.plot(pred.gold, country = "Nigeria", nr.traj = 20, pi = c(80, 95), uncertainty = TRUE,
  half.child.variant = FALSE)

## Figure 12d
plot_12d <- tfr.estimation.plot(country = 566, sim.dir = sim.dir.gold, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot_12d)

## session info
sessionInfo()
