# set the random generators as they were in earlier R version <= 3.5.0 Sys.info() suppressWarnings(RNGversion("3.5.0")) ### Replication R code for the manual 'SimInf: An R package for Data-driven ### Stochastic Disease Spread Simulations' Note: The results reported here are ### based on SimInf version 6.1.0 Note: threads = 1 and a seed are defined in most ### examples for reproducible trajectories. ### Section 3: Load the SimInf R package Note: If you are installing SimInf from ### source than you need to install a C compiler and the GNU Scientific Library ### (GSL) (https://www.gnu.org/software/gsl/) To install GSL: apt-get install ### libgsl0-dev (package on e.g. Debian and Ubuntu) dnf install gsl-devel (package ### on e.g. Fedora, CentOS and RHEL) brew install gsl (Homebrew package on OS X) On ### a Windows machine you first need to download and install Rtools from ### https://cran.r-project.org/bin/windows/Rtools. The GSL files are downloaded, if ### needed, from https://github.com/rwinlib/gsl during the installation of SimInf. library("SimInf") ### Section 4.1: A first example: The SIR model Subection: Specification of the SIR ### model without scheduled events ## Assume an initial population of S=999 susceptible, I=1 infected and R=0 ## recovered (or removed) individuals. Let beta denote the transmission rate and ## gamma the recovery rate. The initial state of the node is defined by the u0 ## data.frame. Define tspan to simulate 180 days and record the state every 7th ## day. Since there are no between-node interactions in this example, the ## stochastic process in one node does not affect any other nodes in the model. ## Consequently, it is straightforward to run many realizations of this model, ## simply by replicating a node in u0, for example, n = 1000 times. n <- 1000 u0 <- data.frame(S = rep(999, n), I = rep(1, n), R = rep(0, n)) tspan <- seq(from = 1, to = 180, by = 7) model <- SIR(u0 = u0, tspan = tspan, beta = 0.16, gamma = 0.077) ## Run the stochastic simulation and attach one trajectory to the result object. ## Use the threads=1 argument and set.seed for reproducibility. set.seed(123) result <- run(model = model, threads = 1) ## Prints some basic information about the model, such as the global data ## parameters and the extremes, the mean and the quartiles of the count in each ## compartment across all nodes. result ## Use the trajectory method to view the number of individuals in each compartment ## at the time-points specified in tspan. Below is an excerpt of the simulated ## data from the first node that clearly shows there was an outbreak there. To ## extract all data from every node, you only have to remove the node argument. head(trajectory(model = result, node = 1)) ## Figure 3: The default plot will display the median count in each compartment ## across nodes as a colored line together with the inter-quartile range using the ## same color, but with trans- parency. plot(result, main = "SIR model") ## Realizations from a subset of 10 nodes. plot(result, node = 1:10, range = FALSE, main = "SIR model: 10 nodes") ### Section 4.1: A first example: The SIR model Subection: Specification of ### scheduled events in the SIR model ## Let us start with five empty nodes. Add susceptible individuals during the ## first ten time-steps. Then, add one infected individual at t = 25. Further, add ## some movements between t = 35 and t = 45. Finally, at t = 70 and t = 110, ## reove twenty percent of the individuals. u0 <- data.frame(S = rep(0, 5), I = rep(0, 5), R = rep(0, 5)) add <- data.frame(event = "enter", time = rep(1:10, each = 5), node = 1:5, dest = 0, n = 1:5, proportion = 0, select = 1, shift = 0) infect <- data.frame(event = "enter", time = 25, node = 5, dest = 0, n = 1, proportion = 0, select = 2, shift = 0) move <- data.frame(event = "extTrans", time = 35:45, node = c(5, 5, 5, 5, 4, 4, 4, 3, 3, 2, 1), dest = c(4, 3, 3, 1, 3, 2, 1, 2, 1, 1, 2), n = 5, proportion = 0, select = 4, shift = 0) remove <- data.frame(event = "exit", time = c(70, 110), node = rep(1:5, each = 2), dest = 0, n = 0, proportion = 0.2, select = 4, shift = 0) ## Combine all events and create model events <- rbind(add, infect, move, remove) model <- SIR(u0 = u0, tspan = 1:180, events = events, beta = 0.16, gamma = 0.077) ## Figure 4 left: Run trajectory and plot result set.seed(3) result <- run(model, threads = 1) plot(result, node = 1:5, range = FALSE) ## Figure 4 right: Model without introducing an infected individual model_no_infected <- SIR(u0 = u0, tspan = 1:180, events = rbind(add, move, remove), beta = 0.16, gamma = 0.077) set.seed(3) result_no_infected <- run(model_no_infected, threads = 1) plot(result_no_infected, node = 1:5, range = FALSE) ## We can use 'replicate' (or similar) to generate many realizations, together ## with some custom analysis of each trajectory. Here, we find that infection ## spread from the 5th node in about half of 1000 trajectories. set.seed(123) mean(replicate(n = 1000, { nI <- trajectory(run(model = model, threads = 1), node = 1:4)$I sum(nI) > 0 })) ### Section 4.2: A second example: The SISe_sp model This section illustrates the ### specification of the SISe_sp model, which is also predefined in the SimInf ### package. The SISe_sp model contains the two compartments susceptible S and ### infected I. It contains an environmental compartment to model shedding of a ### pathogen to the environment. Moreover, it also includes a spatial coupling of ### the environmental contamination among proximal nodes to capture between-node ### spread unrelated to moving infected individuals. ## We will now demonstrate how to construct and use the SISe_sp model together ## with synthetic cattle events and population data included with the package ## SimInf. The data defines the initial population in 1600 nodes and scheduled ## events over 4 x 365 days. Load the datasets for the example. data("nodes", package = "SimInf") u0 <- u0_SISe() events <- events_SISe() ## To model local spread of the environmental infectious pressure among proximal ## nodes, we have to generate a distance matrix. Let us use the synthetic data ## set 'nodes' included in the SimInf package, which defines a cattle population ## consisting of 1600 herds located in a 50 square kilometer region, and let ## proximal neighbors be defined as neighbors within 2500m. d_ik <- distance_matrix(x = nodes$x, y = nodes$y, cutoff = 2500) ## Let a random sample of 10% of the herds start with 5% infected individuals. set.seed(123) i <- sample(x = 1:1600, size = 160) u0$I[i] <- as.integer(u0$S[i] * 0.05) u0$S[i] <- u0$S[i] - u0$I[i] ## The 'SISe_sp' model contains parameters at a global and local scale. Here, the ## parameter values were chosen such that the proportion of infected nodes in a ## trajectory is about 10% and displays a seasonal pattern. The global parameters ## are: the spatial coupling = 0.2 (D in Equation 20), the shedding rate alpha = ## 1, the recovery rate gamma = 0.1 and the indirect transmission rate upsilon = ## 0.012. Moreover, the global parameter beta(t) captures decay of the pathogen ## in four seasons: beta_t1 = 0.1, beta_t2 = 0.12, beta_t3 = 0.12 and beta_t4 = ## 0.1. However, the duration of each season is local to a node and is specified ## as the day of the year each season ends. Here, for simplicity, we let end_t1 = ## 91, end_t2 = 182, end_t3 = 273 and end_t4 = 365 in all nodes. Furthermore, the ## distances between nodes are local data extracted from distance = d_ik. ## Finally, we let phi = 0 at the beginning of the simulation (becomes v0 in the ## model object). model <- SISe_sp(u0 = u0, tspan = 1:1460, events = events, phi = 0, upsilon = 0.012, gamma = 0.1, alpha = 1, beta_t1 = 0.1, beta_t2 = 0.12, beta_t3 = 0.12, beta_t4 = 0.1, end_t1 = 91, end_t2 = 182, end_t3 = 273, end_t4 = 365, distance = d_ik, coupling = 0.2) ## Figure 5: Let us use the prevalence() method to explore the proportion of ## infected nodes through time. plot(NULL, xlim = c(0, 1500), ylim = c(0, 0.18), ylab = "Prevalance", xlab = "Time") set.seed(123) replicate(5, { result <- run(model = model, threads = 1) p <- prevalence(model = result, formula = I ~ S + I, type = "nop") lines(p) }) ## Let us use the function gdata() to change the global coupling parameter and ## then run some more trajectories. gdata(model, "coupling") <- 0.1 replicate(5, { result <- run(model = model, threads = 1) p <- prevalence(model = result, formula = I ~ S + I, type = "nop") lines(p, col = "blue", lty = 2) }) ### Section 5: Extending SimInf: New models Note: Since extending SimInf requires ### that C code can be compiled, you will first need to install a compiler to run ### these examples. To read more about interfacing compiled code from R and ### creating R add-on packages, the ’Writing R extensions’ ### (https://cran.r-project.org/doc/manuals/r-release/R-exts.html) manual is the ### official guide and describes the process in detail. Another useful resource is ### the ’R packages’ book by Wickham (2015) (http://r-pkgs.had.co.nz/). ### Section 5.1: Using the model parser to define a new model Subsection: ### Introductory examples of using mparse ## In a first example we will consider the SIR model in a closed population i.e., ## no births or deaths. If we let b denote the transmission rate and g the ## recovery rate, the model can be described as, transitions <- c("S -> b*S*I/(S+I+R) -> I", "I -> g*I -> R") compartments <- c("S", "I", "R") n <- 1000 u0 <- data.frame(S = rep(99, n), I = rep(5, n), R = rep(0, n)) model <- mparse(transitions = transitions, compartments = compartments, gdata = c(b = 0.16, g = 0.077), u0 = u0, tspan = 1:180) ## Figure 6: Run the model and plot the resulting trajectory. set.seed(123) result <- run(model = model, threads = 1) plot(result) ## Let us elaborate on the previous example and explore the incidence cases per ## day. Add one compartment 'Icum' whose sole purpose is to keep track of how many ## individuals who become infected over time. The right hand side 'I + Icum' of ## the transition 'S -> b*S*I/(S+I+R) -> I + Icum', means that both 'I' and 'Icum' ## are incremented by one each time the transition happens. transitions <- c("S -> b*S*I/(S+I+R) -> I + Icum", "I -> g*I -> R") compartments <- c("S", "I", "Icum", "R") ## Since there are no between-node movements in this example, the stochastic ## process in one node does not affect any other node in the model. It is ## therefore straightforward to run many realizations of this model, simply by ## replicating a node in the initial condition u0, for example, n = 1000 times. n <- 1000 u0 <- data.frame(S = rep(99, n), I = rep(1, n), Icum = rep(0, n), R = rep(0, n)) model <- mparse(transitions, compartments, gdata = c(b = 0.16, g = 0.077), u0 = u0, tspan = 1:150) set.seed(123) result <- run(model = model, threads = 1) ## Post-process the simulated trajectory to compare the incidence cases in the ## first node with the average incidence cases among all realizations. Extract the ## trajectory and calculate successive differences of 'Icum' at each time-point. traj <- trajectory(model = result, compartments = "Icum") cases <- stepfun(result@tspan[-1], diff(c(0, traj$Icum[traj$node == 1]))) avg_cases <- c(0, diff(by(traj, traj$time, function(x) sum(x$Icum)))/n) ## Figure 7: Plot the result as an epidemic curve. plot(cases, main = "", xlab = "Time", ylab = "Number of cases", do.points = FALSE) lines(avg_cases, col = "blue", lwd = 2, lty = 2) ### Section 5.1: Using the model parser to define a new model Subsection: ### Incorporate scheduled events in an mparse model ## To illustrate how models generated using 'mparse' can incorporate scheduled ## events, consider an epidemic in a population consisting of 1600 nodes, for ## example, cattle herds, that are connected to each other by livestock movements. ## Assume an outbreak is detected on day twenty-one after introduction of an ## infection and that we wish to explore how vaccination could limit the outbreak, ## if resources for vaccination can handle 50 herds per day and 80% of the animals ## in each herd. Let us add one compartment 'V to the model, to represent ## vaccinated individuals, so that the model now contains the 'S', 'I', 'Icum', ## 'R' and 'V' compartments. As before, let 'b' denote the transmission rate and ## 'g' the recovery rate. transitions <- c("S -> b*S*I/(S+I+R+V) -> I + Icum", "I -> g*I -> R") compartments <- c("S", "I", "Icum", "R", "V") ## Load the example data for an SIR model in a population of 1600 nodes (cattle ## herds) with its associated scheduled events: births, deaths, and livestock ## movements. Let Icum = 0 and R = 0. u0 <- u0_SIR() u0$Icum <- 0 u0$V <- 0 events <- events_SIR() ## Now generate vaccination events i.e., internal transfer events. Use select = 3 ## and shift = 1 to move animals from the susceptible, infectious and recovered ## compartments to the vaccinated compartment, see the definitions of E and N ## below. Let us start the vaccinations in nodes 1-50 on day twenty-one, and ## continue until all herds are vaccinated on day fifty-two, and use proportion = ## 0.8 to vaccinate 80\% of the animals in each herd. Assume that vaccinated ## individuals become immune and non-infectious immediately. vaccination <- data.frame(event = "intTrans", time = rep(21:52, each = 50), node = 1:1600, dest = 0, n = 0, proportion = 0.8, select = 3, shift = 1) ## To simulate from this model, we have define the select matrix E to handle which ## compartments to sample from when processing scheduled events. The first column ## is for 'enter' events (births); add newborn animals to the susceptible ## compartment S. The second column is for 'exit' events (deaths) and 'external ## transfer' events (livestock movements); sample animals from the S, I, R or V ## compartments. Finally, the third column is for 'internal transfer' events ## (vaccination); sample individuals from the S, I or R compartments. E <- matrix(c(1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0), nrow = 5, ncol = 3, byrow = TRUE, dimnames = list(c("S", "I", "Icum", "R", "V"), c("1", "2", "3"))) ## Define the shift matrix N to process 'internal transfer' events (vaccination); ## move sampled animals from the S compartment four steps forward to the V ## compartment. Similarly, move sampled animals from the I compartment three ## steps forward to the V compartment, and finally, move sampled individuals from ## the R compartment one step forward. N <- matrix(c(4, 3, 0, 1, 0), nrow = 5, ncol = 1, dimnames = list(c("S", "I", "Icum", "R", "V"), c("1"))) ## Additionally, we have to redefine the \code{select} column in the events, ## since the data is from the predefined \code{SIR} model with another $E$ ## matrix. Let us change \code{select} for movements. events$select[events$select == 4] <- 2 ## Create an 'epicurve' function to estimate the average number of new cases per ## day from n = 1000 realizations. To clear infection that was introduced in the ## previous trajectory, animals are first moved to the susceptible compartment. ## Then, one infected individual is introduced into a randomly sampled node from ## the population. Note that we use the 'L' suffix to create an integer value ## rather than a numeric value. Run the model and accumulate 'Icum'. For ## efficiency, we use 'as.is = TRUE', the internal matrix format, to extract ## 'Icum' in every node at each time-point in 'tspan'. epicurve <- function(model, n = 1000) { Icum <- numeric(length(model@tspan)) for (i in seq_len(n)) { ## Move all infected individuals to the susceptible compartment. This is to remove ## infection from the node with one infected individual from the previous ## trajectory. model@u0["S", ] <- model@u0["S", ] + model@u0["I", ] model@u0["I", ] <- 0L ## Sample one node in the population where to introduce infection with one ## infected individual. j <- sample(seq_len(Nn(model)), 1) model@u0["I", j] <- 1L model@u0["S", j] <- model@u0["S", j] - 1L ## Run the model result <- run(model = model) ## Accumulate Icum. traj <- trajectory(model = result, compartments = "Icum", as.is = TRUE) Icum <- Icum + colSums(traj) } stepfun(model@tspan[-1], diff(c(0, Icum/n))) } ## Generate an epicurve with the average number of cases per day for the first ## three hundred days of the epidemic without vaccination. Note that this ## calculation takes some minutes model_no_vac <- mparse(transitions = transitions, compartments = compartments, gdata = c(b = 0.16, g = 0.077), u0 = u0, tspan = 1:300, events = events, E = E, N = N) cases_no_vac <- epicurve(model_no_vac) ## Similarly, generate an epicurve after incorporating the vaccination events. ## Note that this calculation takes some minutes model_vac <- mparse(transitions = transitions, compartments = compartments, gdata = c(b = 0.16, g = 0.077), u0 = u0, tspan = 1:300, events = rbind(events, vaccination), E = E, N = N) cases_vac <- epicurve(model_vac) ## Figure 8: As expected, the number of cases decrease rapidly after vaccination, ## while the outbreak is ongoing for a longer time in the unvaccinated population plot(cases_no_vac, main = "", xlim = c(0, 300), xlab = "Time", ylab = "Number of cases", do.points = FALSE) lines(cases_vac, col = "blue", do.points = FALSE, lty = 2) abline(v = 21, col = "red", lty = 3) legend("topright", c("No vaccination", "Vaccination"), col = c("black", "blue"), lty = 1:2) ### Section 5.2: Use the SimInf framework from another package Create an R add-on ### package that uses SimInf by linking to its core solver. The SimInf package ### includes the 'package_skeleton' method to facilitate this: it creates ### directories, saves R and C code files to appropriate places, and creates ### skeleton help files. ## Consider we wish to create a new add-on package \pkg{PredatorPrey} based on ## the Rosenzweig-MacArthur predator-prey model demonstrated in 'GillespieSSA: ## Implementing the Gillespie Stochastic Simulation Algorithm in R' ## (https://www.jstatsoft.org/article/view/v025i12). The model has a ## density-dependent growth in the prey and and a nonlinear Type-2 functional ## response in the predator. Let 'R' and 'F' denote the number of prey and ## predators, respectively. The model consists of five transitions: i) prey ## birth, ii) prey death due to non-predatory events, iii) prey death due to ## predation, iv) predator birth, and v) predator death. Let 'bR', 'dR, 'bF', and ## 'dF' denote the per capita birth and death rate of the prey and predator, ## respectively. Furthermore, let 'K' be the carrying capacity of the prey, ## 'alpha' the predation efficiency, and 'w' the degree of predator saturation. ## Using parameter values from 'GillespieSSA: Implementing the Gillespie ## Stochastic Simulation Algorithm in R', we define the model as transitions <- c("@ -> bR*R -> R", "R -> (dR+(bR-dR)*R/K)*R -> @", "R -> alpha/(1+w*R)*R*F -> @", "@ -> bF*alpha/(1+w*R)*R*F -> F", "F -> dF*F -> @") compartments <- c("R", "F") parameters <- c(bR = 2, bF = 2, dR = 1, K = 1000, alpha = 0.007, w = 0.0035, dF = 2) ## Assume the initial population u0 consists of R=1000 prey and F=100 predators ## and we are interested in simulating n=1000 replicates over 100 days. Since ## there are no between-node movements in this example, we can generate replicates ## of the model simply by starting with n identical nodes. n <- 1000 u0 <- data.frame(R = rep(1000, n), F = rep(100, n)) model <- mparse(transitions = transitions, compartments = compartments, gdata = parameters, u0 = u0, tspan = 1:100) ## Now instead of running the model to generate data, let us use it to create an R ## add-on package. Save the files for a package skeleton to a folder. Here we use ## a temporary folder. path <- tempdir() package_skeleton(model = model, name = "PredatorPrey", path = path) ## Install and load the package. We let 'repos = NULL' to install from local files ## and use 'type = 'source'' to compile the files. install.packages(file.path(path, "PredatorPrey"), repos = NULL, type = "source") ## If the installation was successful, the newly installed package PredatorPrey ## can be loaded in R with the following command. library("PredatorPrey") ## Now create a model and run it to generate data. model <- PredatorPrey(u0 = u0, tspan = 1:100, gdata = parameters) set.seed(123) result <- run(model, threads = 1) ## Because the 'PredatorPrey' model contains the 'SimInf_model' class, it can make ## use of utility functions provided in the SimInf package, for example, 'show'. result ## Figure 9: Use the trajectory method to plot the phase plane from 1000 ## realizations. Additionally extract the simulated data from the fourth node (use ## node = 4) to illustrate stochastic extinction of the predators. opar <- par(mfrow = c(1, 2)) plot(R ~ F, trajectory(model = result), cex = 0.7, pch = 20, xlab = "Number of predators", ylab = "Number of prey", col = rgb(0, 0, 0, alpha = 0.2)) plot(R ~ time, trajectory(model = result, node = 4), type = "l", xlab = "Time", ylab = "N") lines(F ~ time, trajectory(model = result, node = 4), type = "l", lty = 2) legend("right", c("Prey", "Predator"), lty = 1:2) par(opar) ### Section 6: Benchmark Ten replicates were performed and average run-time was ### estimated to generate 1,000 realizations of an SIR model with parameters β = ### 0.16 and γ = 0.077 and initial conditions S = 1000, I = 10 and R = 0. library("microbenchmark") ## SimInf u0 <- data.frame(S = rep(1000, 1000), I = rep(10, 1000), R = rep(0, 1000)) tspan <- c(1, 150) model <- SIR(u0 = u0, tspan = tspan, beta = 0.16, gamma = 0.077) res1 <- microbenchmark(run(model, threads = 1, solver = "ssm"), run(model, threads = 2, solver = "ssm"), run(model, threads = 4, solver = "ssm"), times = 10, unit = "ms") res1 ## adaptivetau library("adaptivetau") init.values <- c(S = 1000, I = 10, R = 0) transitions <- list(c(S = -1, I = +1), c(I = -1, R = +1)) params <- list(beta = 0.16, gamma = 0.077) SIRrateF <- function(x, p, t) { c(x["S"] * p$beta[1] * x["I"]/(x["S"] + x["I"] + x["R"]), p$gamma * x["I"]) } res2 <- microbenchmark(replicate(1000, ssa.exact(init.values = init.values, transitions, SIRrateF, params = params, tf = 150)), replicate(1000, ssa.adaptivetau(init.values = init.values, transitions, SIRrateF, params = params, tf = 150)), times = 10L, unit = "ms") res2 ## GillespieSSA Note: Based on the average runtimes given in Table 4, this last ## part of the replication script takes about 11-12 minutes. library("GillespieSSA") params <- c(beta=0.16, gamma=0.077) x0 <- c(S = 1000, I = 10, R = 0) a <- c("beta*S*I/(S+I+R)","gamma*I") nu <- matrix(c(-1,0,+1,-1,0,+1),nrow=3,byrow=TRUE) res3 <- microbenchmark(replicate(1000, ssa(x0,a,nu,params,tf=150)), replicate(1000, ssa(x0,a,nu,params,tf=150, method = "OTL")), times = 10L, unit = "ms") res3 ## Collect information about the current R session. sessionInfo()