##### Preliminaries #####

# Load packages
library("dalmatian")
library("ggplot2")
library("dplyr")
library("tidyr")

# Set random seed to ensure reproducibility
set.seed(27182)

##### Section 4: Sample data #####

# Load pfdata
data("pied_flycatchers_1", package = "dalmatian")

# Define response variable
pfdata$logLoad <- log(pmax(pfdata$load, 0.01))

##### Section 4: Model fitting #####

# Section 4.1: Model definition

# Mean model
mymean <- list(fixed = list(name = "alpha",
    formula = ~ log(IVI) + broodsize + sex),
  random = list(name = "epsilon", formula = ~ -1 + indidx))

# Dispersion model
mydisp <- list(fixed = list(name = "psi", formula = ~ broodsize + sex),
  random = list(name = "xi", formula = ~ -1 + indidx), link = "log")

# Section 5.2: MCMC arguments

# Define list of arguments for jags.model()
jm.args <- list(file = "pied_flycatchers_1.jags", n.adapt = 1000)

# Define list of arguments for coda.samples()
cs.args <- list(n.iter = 2000)

# Add RNG samplers and seeds to initial values
jm.args$inits <- list(
  list(".RNG.name" = "base::Wichmann-Hill", ".RNG.seed" = 13),
  list(".RNG.name" = "base::Marsaglia-Multicarry", ".RNG.seed" = 1597),
  list(".RNG.name" = "base::Super-Duper", ".RNG.seed" = 196418))


# Section 4.4: Running the sampler

# Fit model
pfresults <- dalmatian(df = pfdata, mean.model = mymean,
  dispersion.model = mydisp, jags.model.args = jm.args,
  coda.samples.args = cs.args, response = "logLoad",
  overwrite = TRUE)

##### Section 6: Post-processing #####

# Secton 6.1: Convergence diagnostics

# Generate traceplots
pftraceplots <- traceplots(pfresults, show = FALSE)

# Fixed effects for mean
pftraceplots$meanFixed

# Fixed effects for didpersion
pftraceplots$dispersionFixed

# Random effects for mean
pftraceplots$meanRandom

# Random effects for dispersion
pftraceplots$dispersionRandom

# Compute convergence diagnostics
pfconvergence <- convergence(pfresults)

# Gelman-Rubin diagnostics
pfconvergence$gelman

# Raftery diagnostics
pfconvergence$raftery

# Effective sample size
round(pfconvergence$effectiveSize)

# Section 6.2: Posterior summaries

# Compute numerical summaries
summary(pfresults)

# Construct caterpillar plots
pfcaterpillar <- caterpillar(pfresults, show = FALSE)

# Fixed effects for mean
pfcaterpillar$meanFixed

# Fixed effects for dispersion
pfcaterpillar$dispersionFixed

# Random effects for mean (not shown in paper)
pfcaterpillar$meanRandom

# Random effects for variance (not shown in paper)
pfcaterpillar$dispersionRandom

# Section 6.3: Fitted values

# Compute fitted mean and dispersion for first indivdiual
newdata <- cbind(pfdata[1, c("broodsize", "sex", "indidx")],
   data.frame(IVI = 1:100))

predict(object = pfresults, newdata = newdata)

# Compute population level predictions of mean and dispersion for all
# combinations of sex and broodsize
newdata1 <- crossing(broodsize = factor(c("Decreased", "Increased")),
  IVI = 1:100, sex = factor(c("Female", "Male")))
newdata1$indidx <- factor(NA, levels(pfdata$indidx))

fitted1 <- predict(object = pfresults, newdata = newdata1, type = "response")

# Plot predicted values with 95% credible intervals
qplot(data = fitted1$mean, x = IVI, y = exp(Predicted), geom = "line",
  xlab = "IVI", ylab = "Fitted Mean", ymin = exp(`2.5%`), ymax = exp(`97.5%`),
  group = broodsize, colour = broodsize, facets = . ~ sex) + 
geom_ribbon(alpha = 0.2, aes(fill = broodsize))

##### Section 7.1: Rounding #####

# Set random seed to ensure reproducibility
set.seed(27182)

# Create variables bounding the true load
pfdata$lowerLoad <- log(pmax(0.001, pfdata$load - 0.049))
pfdata$upperLoad <- log(pfdata$load + 0.05)

# Update jags.model arguments including initial values
jm.args2 <- list(file = "pied_flycatchers_2.jags", n.adapt = 1000,
  inits = lapply(1:3,
    function(i) {
      last <- pfresults$coda[[i]][2000, ]

      setJAGSInits(mymean, mydisp, fixed.mean = last[1:4],
        random.mean = last[5:56], fixed.dispersion = last[57:59],
        sd.mean = last[60], sd.dispersion = last[61],
        random.dispersion = last[62:113],
        y = runif(nrow(pfdata), pfdata$lowerLoad, pfdata$upperLoad))
    }
  )
)

# Add RNG samplers and seeds to initial values
jm.args2$inits[[1]] <- c(jm.args2$inits[[1]], list(".RNG.name" = "base::Wichmann-Hill", ".RNG.seed" = 21))
jm.args2$inits[[2]] <- c(jm.args2$inits[[2]], list(".RNG.name" = "base::Marsaglia-Multicarry", ".RNG.seed" = 2584))
jm.args2$inits[[3]] <- c(jm.args2$inits[[3]], list(".RNG.name" = "base::Super-Duper", ".RNG.seed" = 317811))
                                                                          
# Fit model
pfresults2 <- dalmatian(df = pfdata, mean.model = mymean,
  dispersion.model = mydisp, jags.model.args = jm.args2,
  coda.samples.args = cs.args, rounding = TRUE, lower = "lowerLoad",
  upper = "upperLoad", overwrite = TRUE)

# Recompute numerical summaries
summary(pfresults2)

##### Section 7.2: Weights #####

# Compute summaries by day
pfdataByDay <- group_by(pfdata, indidx, broodsize, sex, date) %>%
  summarise(nvisit = n(), avgLoad = mean(load), avgIVI = mean(IVI)) %>%
  ungroup()

# Respecify model components
mymean <- list(fixed = list(name = "alpha",
  formula = ~ log(avgIVI) + broodsize + sex,
  priors = list(c("dnorm", 0, 0.001))))

mydisp <- list(fixed = list(name = "psi", formula = ~ broodsize + sex),
  link = "log", weights = "nvisit")

# Compute new response variable
pfdataByDay$logAvgLoad <- log(pmax(pfdataByDay$avgLoad, 0.001))

# Re-fit model
jm.args3 <- list(file = "pied_flycatchers_3.jags", n.adapt = 1000)

# Add RNG samplers and seeds to initial values
jm.args3$inits <- list(
  list(".RNG.name" = "base::Wichmann-Hill", ".RNG.seed" = 34),
  list(".RNG.name" = "base::Marsaglia-Multicarry", ".RNG.seed" = 4181),
  list(".RNG.name" = "base::Super-Duper", ".RNG.seed" = 514229))

pfresults3 <- dalmatian(df = pfdataByDay, mean.model = mymean,
  dispersion.model = mydisp, jags.model.args = jm.args3,
  coda.samples.args = cs.args, response = "logAvgLoad",
  overwrite = TRUE)

# Recompute numerical summaries
summary(pfresults3)