# set the random generators as they were in earlier R version <= 3.5.0
Sys.info()
suppressWarnings(RNGversion("3.5.0")) 

### Replication R code for the manual 'SimInf: An R package for Data-driven
### Stochastic Disease Spread Simulations' Note: The results reported here are
### based on SimInf version 6.1.0 Note: threads = 1 and a seed are defined in most
### examples for reproducible trajectories.

### Section 3: Load the SimInf R package Note: If you are installing SimInf from
### source than you need to install a C compiler and the GNU Scientific Library
### (GSL) (https://www.gnu.org/software/gsl/) To install GSL: apt-get install
### libgsl0-dev (package on e.g. Debian and Ubuntu) dnf install gsl-devel (package
### on e.g. Fedora, CentOS and RHEL) brew install gsl (Homebrew package on OS X) On
### a Windows machine you first need to download and install Rtools from
### https://cran.r-project.org/bin/windows/Rtools. The GSL files are downloaded, if
### needed, from https://github.com/rwinlib/gsl during the installation of SimInf.

library("SimInf")

### Section 4.1: A first example: The SIR model Subection: Specification of the SIR
### model without scheduled events

## Assume an initial population of S=999 susceptible, I=1 infected and R=0
## recovered (or removed) individuals. Let beta denote the transmission rate and
## gamma the recovery rate. The initial state of the node is defined by the u0
## data.frame. Define tspan to simulate 180 days and record the state every 7th
## day.  Since there are no between-node interactions in this example, the
## stochastic process in one node does not affect any other nodes in the model.
## Consequently, it is straightforward to run many realizations of this model,
## simply by replicating a node in u0, for example, n = 1000 times.
n <- 1000
u0 <- data.frame(S = rep(999, n), I = rep(1, n), R = rep(0, n))
tspan <- seq(from = 1, to = 180, by = 7)
model <- SIR(u0 = u0, tspan = tspan, beta = 0.16, gamma = 0.077)

## Run the stochastic simulation and attach one trajectory to the result object.
## Use the threads=1 argument and set.seed for reproducibility.
set.seed(123)
result <- run(model = model, threads = 1)

## Prints some basic information about the model, such as the global data
## parameters and the extremes, the mean and the quartiles of the count in each
## compartment across all nodes.
result

## Use the trajectory method to view the number of individuals in each compartment
## at the time-points specified in tspan.  Below is an excerpt of the simulated
## data from the first node that clearly shows there was an outbreak there. To
## extract all data from every node, you only have to remove the node argument.
head(trajectory(model = result, node = 1))

## Figure 3: The default plot will display the median count in each compartment
## across nodes as a colored line together with the inter-quartile range using the
## same color, but with trans- parency.
plot(result, main = "SIR model")

## Realizations from a subset of 10 nodes.
plot(result, node = 1:10, range = FALSE, main = "SIR model: 10 nodes")

### Section 4.1: A first example: The SIR model Subection: Specification of
### scheduled events in the SIR model

## Let us start with five empty nodes. Add susceptible individuals during the
## first ten time-steps. Then, add one infected individual at t = 25. Further, add
## some movements between t = 35 and t = 45.  Finally, at t = 70 and t = 110,
## reove twenty percent of the individuals.
u0 <- data.frame(S = rep(0, 5), I = rep(0, 5), R = rep(0, 5))

add <- data.frame(event = "enter", time = rep(1:10, each = 5),
                  node = 1:5, dest = 0, n = 1:5, proportion = 0,
                  select = 1, shift = 0)

infect <- data.frame(event = "enter", time = 25, node = 5,
                     dest = 0, n = 1, proportion = 0, 
                     select = 2, shift = 0)

move <- data.frame(event = "extTrans", time = 35:45,
                   node = c(5, 5, 5, 5, 4, 4, 4, 3, 3, 2, 1),
                   dest = c(4, 3, 3, 1, 3, 2, 1, 2, 1, 1, 2), n = 5, proportion = 0, 
                   select = 4, shift = 0)

remove <- data.frame(event = "exit", time = c(70, 110), node = rep(1:5, each = 2), 
                     dest = 0, n = 0, proportion = 0.2, select = 4, shift = 0)

## Combine all events and create model
events <- rbind(add, infect, move, remove)
model <- SIR(u0 = u0, tspan = 1:180, events = events, beta = 0.16, gamma = 0.077)

## Figure 4 left: Run trajectory and plot result
set.seed(3)
result <- run(model, threads = 1)
plot(result, node = 1:5, range = FALSE)

## Figure 4 right: Model without introducing an infected individual
model_no_infected <- SIR(u0 = u0, tspan = 1:180, events = rbind(add, move, remove), 
                         beta = 0.16, gamma = 0.077)
set.seed(3)
result_no_infected <- run(model_no_infected, threads = 1)
plot(result_no_infected, node = 1:5, range = FALSE)

## We can use 'replicate' (or similar) to generate many realizations, together
## with some custom analysis of each trajectory.  Here, we find that infection
## spread from the 5th node in about half of 1000 trajectories.
set.seed(123)
mean(replicate(n = 1000, {
  nI <- trajectory(run(model = model, threads = 1), node = 1:4)$I
  sum(nI) > 0
}))

### Section 4.2: A second example: The SISe_sp model This section illustrates the
### specification of the SISe_sp model, which is also predefined in the SimInf
### package. The SISe_sp model contains the two compartments susceptible S and
### infected I.  It contains an environmental compartment to model shedding of a
### pathogen to the environment.  Moreover, it also includes a spatial coupling of
### the environmental contamination among proximal nodes to capture between-node
### spread unrelated to moving infected individuals.

## We will now demonstrate how to construct and use the SISe_sp model together
## with synthetic cattle events and population data included with the package
## SimInf. The data defines the initial population in 1600 nodes and scheduled
## events over 4 x 365 days. Load the datasets for the example.
data("nodes", package = "SimInf")
u0 <- u0_SISe()
events <- events_SISe()

## To model local spread of the environmental infectious pressure among proximal
## nodes, we have to generate a distance matrix.  Let us use the synthetic data
## set 'nodes' included in the SimInf package, which defines a cattle population
## consisting of 1600 herds located in a 50 square kilometer region, and let
## proximal neighbors be defined as neighbors within 2500m.
d_ik <- distance_matrix(x = nodes$x, y = nodes$y, cutoff = 2500)

## Let a random sample of 10% of the herds start with 5% infected individuals.
set.seed(123)
i <- sample(x = 1:1600, size = 160)
u0$I[i] <- as.integer(u0$S[i] * 0.05)
u0$S[i] <- u0$S[i] - u0$I[i]

## The 'SISe_sp' model contains parameters at a global and local scale. Here, the
## parameter values were chosen such that the proportion of infected nodes in a
## trajectory is about 10% and displays a seasonal pattern. The global parameters
## are: the spatial coupling = 0.2 (D in Equation 20), the shedding rate alpha =
## 1, the recovery rate gamma = 0.1 and the indirect transmission rate upsilon =
## 0.012.  Moreover, the global parameter beta(t) captures decay of the pathogen
## in four seasons: beta_t1 = 0.1, beta_t2 = 0.12, beta_t3 = 0.12 and beta_t4 =
## 0.1.  However, the duration of each season is local to a node and is specified
## as the day of the year each season ends.  Here, for simplicity, we let end_t1 =
## 91, end_t2 = 182, end_t3 = 273 and end_t4 = 365 in all nodes.  Furthermore, the
## distances between nodes are local data extracted from distance = d_ik.
## Finally, we let phi = 0 at the beginning of the simulation (becomes v0 in the
## model object).
model <- SISe_sp(u0 = u0, tspan = 1:1460, events = events, phi = 0, upsilon = 0.012, 
                 gamma = 0.1, alpha = 1, beta_t1 = 0.1, beta_t2 = 0.12, beta_t3 = 0.12, beta_t4 = 0.1, 
                 end_t1 = 91, end_t2 = 182, end_t3 = 273, end_t4 = 365, distance = d_ik, coupling = 0.2)

## Figure 5: Let us use the prevalence() method to explore the proportion of
## infected nodes through time.
plot(NULL, xlim = c(0, 1500), ylim = c(0, 0.18), ylab = "Prevalance", xlab = "Time")
set.seed(123)
replicate(5, {
  result <- run(model = model, threads = 1)
  p <- prevalence(model = result, formula = I ~ S + I, type = "nop")
  lines(p)
})

## Let us use the function gdata() to change the global coupling parameter and
## then run some more trajectories.
gdata(model, "coupling") <- 0.1
replicate(5, {
  result <- run(model = model, threads = 1)
  p <- prevalence(model = result, formula = I ~ S + I, type = "nop")
  lines(p, col = "blue", lty = 2)
})

### Section 5: Extending SimInf: New models Note: Since extending SimInf requires
### that C code can be compiled, you will first need to install a compiler to run
### these examples. To read more about interfacing compiled code from R and
### creating R add-on packages, the ’Writing R extensions’
### (https://cran.r-project.org/doc/manuals/r-release/R-exts.html) manual is the
### official guide and describes the process in detail. Another useful resource is
### the ’R packages’ book by Wickham (2015) (http://r-pkgs.had.co.nz/).

### Section 5.1: Using the model parser to define a new model Subsection:
### Introductory examples of using mparse

## In a first example we will consider the SIR model in a closed population i.e.,
## no births or deaths. If we let b denote the transmission rate and g the
## recovery rate, the model can be described as,
transitions <- c("S -> b*S*I/(S+I+R) -> I", "I -> g*I -> R")
compartments <- c("S", "I", "R")
n <- 1000
u0 <- data.frame(S = rep(99, n), I = rep(5, n), R = rep(0, n))
model <- mparse(transitions = transitions, compartments = compartments,
                gdata = c(b = 0.16, g = 0.077), u0 = u0, tspan = 1:180)

## Figure 6: Run the model and plot the resulting trajectory.
set.seed(123)
result <- run(model = model, threads = 1)
plot(result)

## Let us elaborate on the previous example and explore the incidence cases per
## day. Add one compartment 'Icum' whose sole purpose is to keep track of how many
## individuals who become infected over time.  The right hand side 'I + Icum' of
## the transition 'S -> b*S*I/(S+I+R) -> I + Icum', means that both 'I' and 'Icum'
## are incremented by one each time the transition happens.
transitions <- c("S -> b*S*I/(S+I+R) -> I + Icum", "I -> g*I -> R")
compartments <- c("S", "I", "Icum", "R")

## Since there are no between-node movements in this example, the stochastic
## process in one node does not affect any other node in the model.  It is
## therefore straightforward to run many realizations of this model, simply by
## replicating a node in the initial condition u0, for example, n = 1000 times.
n <- 1000
u0 <- data.frame(S = rep(99, n), I = rep(1, n), Icum = rep(0, n), R = rep(0, n))
model <- mparse(transitions, compartments, gdata = c(b = 0.16, g = 0.077), u0 = u0, 
                tspan = 1:150)
set.seed(123)
result <- run(model = model, threads = 1)

## Post-process the simulated trajectory to compare the incidence cases in the
## first node with the average incidence cases among all realizations. Extract the
## trajectory and calculate successive differences of 'Icum' at each time-point.
traj <- trajectory(model = result, compartments = "Icum")
cases <- stepfun(result@tspan[-1], diff(c(0, traj$Icum[traj$node == 1])))
avg_cases <- c(0, diff(by(traj, traj$time, function(x) sum(x$Icum)))/n)

## Figure 7: Plot the result as an epidemic curve.
plot(cases, main = "", xlab = "Time", ylab = "Number of cases", do.points = FALSE)
lines(avg_cases, col = "blue", lwd = 2, lty = 2)

### Section 5.1: Using the model parser to define a new model Subsection:
### Incorporate scheduled events in an mparse model

## To illustrate how models generated using 'mparse' can incorporate scheduled
## events, consider an epidemic in a population consisting of 1600 nodes, for
## example, cattle herds, that are connected to each other by livestock movements.
## Assume an outbreak is detected on day twenty-one after introduction of an
## infection and that we wish to explore how vaccination could limit the outbreak,
## if resources for vaccination can handle 50 herds per day and 80% of the animals
## in each herd.  Let us add one compartment 'V to the model, to represent
## vaccinated individuals, so that the model now contains the 'S', 'I', 'Icum',
## 'R' and 'V' compartments.  As before, let 'b' denote the transmission rate and
## 'g' the recovery rate.
transitions <- c("S -> b*S*I/(S+I+R+V) -> I + Icum", "I -> g*I -> R")
compartments <- c("S", "I", "Icum", "R", "V")

## Load the example data for an SIR model in a population of 1600 nodes (cattle
## herds) with its associated scheduled events: births, deaths, and livestock
## movements. Let Icum = 0 and R = 0.
u0 <- u0_SIR()
u0$Icum <- 0
u0$V <- 0
events <- events_SIR()

## Now generate vaccination events i.e., internal transfer events.  Use select = 3
## and shift = 1 to move animals from the susceptible, infectious and recovered
## compartments to the vaccinated compartment, see the definitions of E and N
## below.  Let us start the vaccinations in nodes 1-50 on day twenty-one, and
## continue until all herds are vaccinated on day fifty-two, and use proportion =
## 0.8 to vaccinate 80\% of the animals in each herd.  Assume that vaccinated
## individuals become immune and non-infectious immediately.
vaccination <- data.frame(event = "intTrans", time = rep(21:52, each = 50), node = 1:1600, 
                          dest = 0, n = 0, proportion = 0.8, select = 3, shift = 1)

## To simulate from this model, we have define the select matrix E to handle which
## compartments to sample from when processing scheduled events. The first column
## is for 'enter' events (births); add newborn animals to the susceptible
## compartment S. The second column is for 'exit' events (deaths) and 'external
## transfer' events (livestock movements); sample animals from the S, I, R or V
## compartments. Finally, the third column is for 'internal transfer' events
## (vaccination); sample individuals from the S, I or R compartments.
E <- matrix(c(1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0), nrow = 5, ncol = 3, byrow = TRUE, 
            dimnames = list(c("S", "I", "Icum", "R", "V"), c("1", "2", "3")))

## Define the shift matrix N to process 'internal transfer' events (vaccination);
## move sampled animals from the S compartment four steps forward to the V
## compartment.  Similarly, move sampled animals from the I compartment three
## steps forward to the V compartment, and finally, move sampled individuals from
## the R compartment one step forward.
N <- matrix(c(4, 3, 0, 1, 0), nrow = 5, ncol = 1, dimnames = list(c("S", "I", "Icum", 
                                                                    "R", "V"), c("1")))

## Additionally, we have to redefine the \code{select} column in the events,
## since the data is from the predefined \code{SIR} model with another $E$
## matrix.  Let us change \code{select} for movements.
events$select[events$select == 4] <- 2

## Create an 'epicurve' function to estimate the average number of new cases per
## day from n = 1000 realizations. To clear infection that was introduced in the
## previous trajectory, animals are first moved to the susceptible compartment.
## Then, one infected individual is introduced into a randomly sampled node from
## the population.  Note that we use the 'L' suffix to create an integer value
## rather than a numeric value.  Run the model and accumulate 'Icum'.  For
## efficiency, we use 'as.is = TRUE', the internal matrix format, to extract
## 'Icum' in every node at each time-point in 'tspan'.
epicurve <- function(model, n = 1000) {
  Icum <- numeric(length(model@tspan))
  for (i in seq_len(n)) {
    ## Move all infected individuals to the susceptible compartment. This is to remove
    ## infection from the node with one infected individual from the previous
    ## trajectory.
    model@u0["S", ] <- model@u0["S", ] + model@u0["I", ]
    model@u0["I", ] <- 0L
    
    ## Sample one node in the population where to introduce infection with one
    ## infected individual.
    j <- sample(seq_len(Nn(model)), 1)
    model@u0["I", j] <- 1L
    model@u0["S", j] <- model@u0["S", j] - 1L
    
    ## Run the model
    result <- run(model = model)
    
    ## Accumulate Icum.
    traj <- trajectory(model = result, compartments = "Icum", as.is = TRUE)
    Icum <- Icum + colSums(traj)
  }
  
  stepfun(model@tspan[-1], diff(c(0, Icum/n)))
}

## Generate an epicurve with the average number of cases per day for the first
## three hundred days of the epidemic without vaccination.  Note that this
## calculation takes some minutes
model_no_vac <- mparse(transitions = transitions, compartments = compartments,
                       gdata = c(b = 0.16, 
                                 g = 0.077), u0 = u0, tspan = 1:300,
                       events = events, E = E, N = N)
cases_no_vac <- epicurve(model_no_vac)

## Similarly, generate an epicurve after incorporating the vaccination events.
## Note that this calculation takes some minutes
model_vac <- mparse(transitions = transitions, compartments = compartments,
                    gdata = c(b = 0.16, 
                              g = 0.077), u0 = u0, tspan = 1:300,
                    events = rbind(events, vaccination), E = E, 
                    N = N)
cases_vac <- epicurve(model_vac)

## Figure 8: As expected, the number of cases decrease rapidly after vaccination,
## while the outbreak is ongoing for a longer time in the unvaccinated population
plot(cases_no_vac, main = "", xlim = c(0, 300), xlab = "Time", ylab = "Number of cases", 
     do.points = FALSE)
lines(cases_vac, col = "blue", do.points = FALSE, lty = 2)
abline(v = 21, col = "red", lty = 3)
legend("topright", c("No vaccination", "Vaccination"), col = c("black", "blue"), 
       lty = 1:2)

### Section 5.2: Use the SimInf framework from another package Create an R add-on
### package that uses SimInf by linking to its core solver. The SimInf package
### includes the 'package_skeleton' method to facilitate this: it creates
### directories, saves R and C code files to appropriate places, and creates
### skeleton help files.

## Consider we wish to create a new add-on package \pkg{PredatorPrey} based on
## the Rosenzweig-MacArthur predator-prey model demonstrated in 'GillespieSSA:
## Implementing the Gillespie Stochastic Simulation Algorithm in R'
## (https://www.jstatsoft.org/article/view/v025i12).  The model has a
## density-dependent growth in the prey and and a nonlinear Type-2 functional
## response in the predator.  Let 'R' and 'F' denote the number of prey and
## predators, respectively.  The model consists of five transitions: i) prey
## birth, ii) prey death due to non-predatory events, iii) prey death due to
## predation, iv) predator birth, and v) predator death. Let 'bR', 'dR, 'bF', and
## 'dF' denote the per capita birth and death rate of the prey and predator,
## respectively.  Furthermore, let 'K' be the carrying capacity of the prey,
## 'alpha' the predation efficiency, and 'w' the degree of predator saturation.
## Using parameter values from 'GillespieSSA: Implementing the Gillespie
## Stochastic Simulation Algorithm in R', we define the model as
transitions <- c("@ -> bR*R -> R", "R -> (dR+(bR-dR)*R/K)*R -> @", "R -> alpha/(1+w*R)*R*F -> @", 
                 "@ -> bF*alpha/(1+w*R)*R*F -> F", "F -> dF*F -> @")
compartments <- c("R", "F")
parameters <- c(bR = 2, bF = 2, dR = 1, K = 1000, alpha = 0.007, w = 0.0035, dF = 2)

## Assume the initial population u0 consists of R=1000 prey and F=100 predators
## and we are interested in simulating n=1000 replicates over 100 days. Since
## there are no between-node movements in this example, we can generate replicates
## of the model simply by starting with n identical nodes.
n <- 1000
u0 <- data.frame(R = rep(1000, n), F = rep(100, n))
model <- mparse(transitions = transitions,
                compartments = compartments, gdata = parameters, 
                u0 = u0, tspan = 1:100)

## Now instead of running the model to generate data, let us use it to create an R
## add-on package. Save the files for a package skeleton to a folder. Here we use
## a temporary folder.
path <- tempdir()
package_skeleton(model = model, name = "PredatorPrey", path = path)

## Install and load the package. We let 'repos = NULL' to install from local files
## and use 'type = 'source'' to compile the files.
install.packages(file.path(path, "PredatorPrey"), repos = NULL, type = "source")

## If the installation was successful, the newly installed package PredatorPrey
## can be loaded in R with the following command.
library("PredatorPrey")

## Now create a model and run it to generate data.
model <- PredatorPrey(u0 = u0, tspan = 1:100, gdata = parameters)
set.seed(123)
result <- run(model, threads = 1)

## Because the 'PredatorPrey' model contains the 'SimInf_model' class, it can make
## use of utility functions provided in the SimInf package, for example, 'show'.
result

## Figure 9: Use the trajectory method to plot the phase plane from 1000
## realizations. Additionally extract the simulated data from the fourth node (use
## node = 4) to illustrate stochastic extinction of the predators.
opar <- par(mfrow = c(1, 2))
plot(R ~ F, trajectory(model = result), cex = 0.7, pch = 20, xlab = "Number of predators", 
     ylab = "Number of prey", col = rgb(0, 0, 0, alpha = 0.2))
plot(R ~ time, trajectory(model = result, node = 4), type = "l", xlab = "Time", ylab = "N")
lines(F ~ time, trajectory(model = result, node = 4), type = "l", lty = 2)
legend("right", c("Prey", "Predator"), lty = 1:2)
par(opar)

### Section 6: Benchmark Ten replicates were performed and average run-time was
### estimated to generate 1,000 realizations of an SIR model with parameters β =
### 0.16 and γ = 0.077 and initial conditions S = 1000, I = 10 and R = 0.

library("microbenchmark")

## SimInf
u0 <- data.frame(S = rep(1000, 1000), I = rep(10, 1000), R = rep(0, 1000))
tspan <- c(1, 150)
model <- SIR(u0 = u0, tspan = tspan, beta = 0.16, gamma = 0.077)
res1 <- microbenchmark(run(model, threads = 1, solver = "ssm"),
                       run(model, threads = 2, solver = "ssm"),
                       run(model, threads = 4, solver = "ssm"),
                       times = 10, unit = "ms")
res1

## adaptivetau
library("adaptivetau")
init.values <- c(S = 1000, I = 10, R = 0)
transitions <- list(c(S = -1, I = +1), c(I = -1, R = +1))
params <- list(beta = 0.16, gamma = 0.077)
SIRrateF <- function(x, p, t) {
  c(x["S"] * p$beta[1] * x["I"]/(x["S"] + x["I"] + x["R"]), p$gamma * x["I"])
}
res2 <- microbenchmark(replicate(1000, ssa.exact(init.values = init.values, transitions, 
                                                 SIRrateF, params = params, tf = 150)),
                       replicate(1000, ssa.adaptivetau(init.values = init.values, 
                                                       transitions, SIRrateF,
                                                       params = params, tf = 150)),
                       times = 10L, unit = "ms")
res2

## GillespieSSA Note: Based on the average runtimes given in Table 4, this last
## part of the replication script takes about 11-12 minutes.
library("GillespieSSA")
params <- c(beta=0.16, gamma=0.077)
x0  <- c(S = 1000, I = 10, R = 0)
a   <- c("beta*S*I/(S+I+R)","gamma*I")
nu  <- matrix(c(-1,0,+1,-1,0,+1),nrow=3,byrow=TRUE)
res3 <- microbenchmark(replicate(1000, ssa(x0,a,nu,params,tf=150)),
                       replicate(1000, ssa(x0,a,nu,params,tf=150, method = "OTL")),
                       times = 10L, unit = "ms")
res3

## Collect information about the current R session.
sessionInfo()