## R code from the JSS paper:
## A Major Update of the bayesTFR R Package: 
## Probabilistic Estimation and Projection of the Annual Total Fertility Rate 
## Accounting for Past Uncertainty
##
## Peiran Liu, Hana Sevcikova, Adrian E. Raftery
##
## bayesTFR package version 7.3-0

library("bayesTFR")

## Section 4.1
data("rawTFR", package = "bayesTFR")
head(rawTFR)

## Section 4.2
## Two-step estimation
set.seed(123) 

annual <- TRUE
nr.chains <- 3
total.iter <- 50 # decreased from 5000 for faster processing (referred to as a toy simulation)
thin <- 1
simu.dir <- file.path(getwd(), "annual")
m2 <- run.tfr.mcmc(output.dir = simu.dir, nr.chains = nr.chains, iter = total.iter,
  thin = thin, annual = annual)
m3 <- run.tfr3.mcmc(sim.dir = simu.dir, nr.chains = nr.chains, iter = total.iter,
  thin = thin)

## For further processing we set the RNG seed for reproducibility
set.seed(123)

## One-step estimation
ar.phase2 <- TRUE
simu.dir.unc <- file.path(getwd(), "annual_unc")
m <- run.tfr.mcmc(output.dir = simu.dir.unc, nr.chains = nr.chains, iter = total.iter, annual = annual,
  thin = thin, uncertainty = TRUE, ar.phase2 = ar.phase2, iso.unbiased = c(124, 840))

## Continuing an existing simulation
m <- continue.tfr.mcmc(output.dir = simu.dir.unc, iter = 10)  # using a toy simulation

## Section 4.3
## Generating projections
pred <- tfr.predict(sim.dir = simu.dir.unc, end.year = 2100, burnin = 20, nr.traj = 50,
  uncertainty = TRUE) # using a toy simulation

## Section 4.4
m <- get.tfr.mcmc(simu.dir.unc)
m3 <- get.tfr3.mcmc(simu.dir.unc)
pred <- get.tfr.prediction(simu.dir.unc)

## Summary functions
thin <- 1 # working with a toy simulation
burnin <- 20 # using a toy simulation

summary(m, thin = thin, burnin = burnin)
summary(m, par.names = c("rho_phase2", "sigma0"), thin = thin, burnin = burnin)

tfr.parameter.names(meta = m$meta)
tfr3.parameter.names()

summary(m, country = "Nigeria", par.names.cs = "tfr", thin = thin, burnin = burnin)
summary(m, country = "Nigeria", par.names.cs = c("tfr_1", "tfr_30", "tfr_70"), thin = thin,
  burnin = burnin)

## Exploring TFR estimation
nigeria_obj <- get.tfr.estimation(country = "NG", sim.dir = simu.dir.unc, burnin = burnin,
  thin = thin, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))
dim(nigeria_obj$tfr_table)
head(nigeria_obj$tfr_quantile)

## Figure 1
plot <- tfr.estimation.plot(country = 566, sim.dir = simu.dir.unc, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot)

plot.usa <- tfr.estimation.plot(country = 840, sim.dir = simu.dir.unc, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot.usa)

## Exploring bias and standard deviation of observations (Table 2-3)
bias_sd <- tfr.bias.sd(sim.dir = simu.dir.unc, country = 566)
summary(bias_sd$model_bias)
head(bias_sd$table)

## Exploring TFR prediction (Figure 2)
tfr.trajectories.plot(pred, country = "Burkina Faso", nr.traj = 20, pi = c(80, 95),
  uncertainty = TRUE)
tfr.trajectories.plot(pred, country = "Burkina Faso", nr.traj = 20, pi = c(80, 95),
  uncertainty = FALSE)

tfr.trajectories.table(pred, country = "Burkina Faso")
summary(pred, country = "Burkina Faso")

## Exploring double logistic function (Figure 3)
DLcurve.plot(country = "BFA", mcmc.list = m, burnin = burnin, pi = c(95, 80), nr.curves = 100)

DLcurve.plot(country = "THA", mcmc.list = m, burnin = burnin, pi = c(95, 80), nr.curves = 100)

## MCMC traces, density and diagnosis (Figure 4 and 5)
tfr.partraces.plot(m, par.names = "rho_phase2", nr.points = 200)
tfr.partraces.cs.plot(m, country = "Nigeria", par.names = "tfr_36", nr.points = 200)
tfr.pardensity.plot(m, par.names = "rho_phase2", burnin = burnin)
tfr.pardensity.cs.plot(m, country = "Nigeria", par.names = "tfr_36", burnin = burnin)

# tfr.diagnose(simu.dir.unc, thin = thin, burnin = burnin, express = TRUE) 
## cannot run for a toy simulation

## Section 4.5
countries <- c(566, 840)
myrawTFR <- subset(rawTFR, country_code %in% countries)
myrawTFR <- subset(myrawTFR, !(country_code == 566 & method == "Indirect" & source == "DHS-NS"))
write.csv(myrawTFR, file = "raw_tfr_user.csv", row.names = FALSE)

simu.dir.extra <- paste0(simu.dir.unc, "_extra")
dir.create(simu.dir.extra)
file.copy(list.files(simu.dir.unc, full.names = TRUE), simu.dir.extra, recursive = TRUE)

run.tfr.mcmc.extra(sim.dir = simu.dir.extra, countries = countries, iter = total.iter, burnin = burnin,
  uncertainty = TRUE, my.tfr.raw.file = "raw_tfr_user.csv", covariates = c("source", "method"))

## reate similar plots (Figure 6)
plot1 <- tfr.estimation.plot(country = 566, sim.dir = simu.dir.extra, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
plot2 <- tfr.estimation.plot(country = 840, sim.dir = simu.dir.extra, burnin = burnin,
  thin = thin, pis = c(80, 95), plot.raw = TRUE)
print(plot1)
print(plot2)

## Section 5.2
## Recommendations for a real simulation
## It is commented out as it takes a long time to run.
# set.seed(123)
# sim.dir.base <- file.path(getwd(), "baseline")

# annual <- TRUE
# nr.chains <- 3
# total.iter <- 62000
# thin <- 10
# burnin <- 2000
# iso.goodvr <- c(36, 40, 56, 124, 203, 208, 246, 250, 276, 300, 352, 372, 380, 392,
#   410, 428, 442, 528, 554, 578, 620, 724, 752, 756, 792, 826, 840)
# m <- run.tfr.mcmc(output.dir = simu.dir.unc, nr.chains = nr.chains, iter = total.iter,
#   annual = annual, thin = thin, uncertainty = TRUE, ar.phase2 = TRUE, iso.unbiased = iso.goodvr,
#   parallel = TRUE)
# pred <- tfr.predict(sim.dir = simu.dir.unc, end.year = 2100, burnin = burnin, nr.traj = 1000,
#   uncertainty = TRUE)

## session info
sessionInfo()
