##---------------------------------------------------------
## 00 --- functions that will be needed for Section 4 (line ~500)
##---------------------------------------------------------

# @param b_dat A list == outputs to births() below.
# @param quantiles the quantiles of the birth simulations you wish to return
# @param names Simplified names for the output
birth_quantiles <- function(b_dat, 
                            quantiles = c(.1, .25, .5, .75, .9),
                            nms = c("asian", "black_aa", "hispanic", "nat_amer", "pac_isl", "two_more", "white", "total")) {
  
  if (length(quantiles) > 1) {
    b_quants <- lapply(b_dat, function(l, quantiles, nms) {
      d <- data.frame(base::apply(l$birth_simulation, 2, quantile, quantiles))
      names(d) <- nms
      return(d)
    }, quantiles = quantiles, nms = nms)
  } else if (length(quantiles) == 1) { # only 1 quantile supplied, reduce to data.frame
    b_quants <- lapply(b_dat, function(l, quantiles) {
      base::apply(l$birth_simulation, 2, quantile, quantiles)
    }, quantiles = quantiles)
    
    b_quants <- data.frame(do.call("rbind", b_quants))
    rownames(b_quants) <- names(b_dat)
    names(b_quants) <- nms
  }
  
  return(b_quants)
}

# @param b_dat A list == outputs to deaths() below.
# @param quantiles the quantiles of the death simulations you wish to return
# @param names Simplified names for the output
death_quantiles <- function(b_dat, 
                            quantiles = c(.1, .25, .5, .75, .9),
                            nms = c("0_1", "1_4", "5_9", "10_14", "15_17", "18_24",
                                    "25_29", "30_34", "35_39", "40_44", "45_49", "50_54", "55_59",
                                    "60_64", "65_69", "70_74", "75_79", "80_84", "85up", "total")) {
  
  if (length(quantiles) > 1) {
    b_quants <- lapply(b_dat, function(l, quantiles, nms) {
      d <- data.frame(base::apply(l$death_simulation, 2, quantile, quantiles))
      names(d) <- nms
      return(d)
    }, quantiles = quantiles, nms = nms)
  } else if (length(quantiles) == 1) { # only 1 quantile supplied, reduce to data.frame
    b_quants <- lapply(b_dat, function(l, quantiles) {
      base::apply(l$death_simulation, 2, quantile, quantiles)
    }, quantiles = quantiles)
    
    b_quants <- data.frame(do.call("rbind", b_quants))
    rownames(b_quants) <- names(b_dat)
    names(b_quants) <- nms
  }
  
  return(b_quants)
}


# @title simulate births for a small area population.
# @description Simulate the births for a small area population based on the female population in the 
# small area. Population is input as population by race and age. Simulation of births controls
# for race, age, marital status, and multiple birth rate. Menopause is assumed to occur at age 
# 60, which is conservatively high (US avg is 51; typical range is 48-55).
# @param df_list A named list of data-frames contaning population data for a single small area where 
# each element of the list has populations of a different race (different fertility rates)
# @param fr_vec_list A list of named vectors, with each vector containing fertility rate data for
# women of a specific race (corresponding to the elemnts of df_list). 
# @param pct_births_unmar A named vector giving the percentage of births to unmarried women in a set
# of age brackets. 
# @param mult_births_list A list of named vectors, with each vector providing the multiple birth
# rate for each race as above.
# @param seed A seed for reproducibility.
# @param nsim An integer for the number of simulations to run. Defaults to \code{10000L}
# @return ...
births <- function(df_list, fr_vec_list, pct_births_unmar, mult_births_list,
                   seed = sample.int(10000L, size = 1), nsim = 10000L) {
  if (is.null(names(df_list))) stop("df_list must be named -- names should be racial groups")
  
  
  pct_births_unmar <- replicate(n = length(df_list), pct_births_unmar, simplify = FALSE)
  
  set.seed(seed)
  mc <- match.call()
  
  # output: list with one element per racial group. Each element is a vector of births per simulation
  births <- mapply(births_race, df = df_list, fr_vec = fr_vec_list, pct_births_unmar = pct_births_unmar,
                   mult_births = mult_births_list, nsim = nsim, SIMPLIFY = FALSE)
  # bind outputs together and aggregate
  births <- data.frame(do.call("cbind", births))
  names(births) <- c(names(df_list)); rownames(births) <- 1:nsim
  births$total <- base::apply(births, 1, sum, na.rm = TRUE)
  
  return(list(birth_simulation = births, nsim = nsim, seed = seed, call = mc))
}

# @title Simulate births for a small area population of females of a single race
# @description
# @param df A data-frame contaning data on the population of a single race in a single small area
# @param fr_vec A named vector of fertility rates of women by age
# @param pct_births_unmar A named vector giving the percentage of births to unmarried women in a set
# of age brackets. 
# @param mult_births A named vector providing the multiple birth rate for the specified race of women.
# @param nsim An integer for the number of simulations to run. Defaults to \code{10000L}
# @return ...
births_race <- function(df, fr_vec, pct_births_unmar, mult_births, nsim = 10000L) {
  # 01 setup 
  age_ht <- data.frame(dat = c("under15", "15_17", "18_24", "25_29", "30_34", "35_39", "40_44", "45_49", 
                               "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", "85up"),  
                       mar = c("u15", "15_17", "20_24", "25_29", "30_34", "35_39", "40_44", 
                               rep("45up", 3), rep(NA, 6)), stringsAsFactors = FALSE) # assume menopause at 60
  # subset to women only, breakout by age
  df_f <- df[df$gender == "Female", ]
  df_f <- synthACS:::split_df(df_f, "age")
  
  # 02 apply simulation to each subset
  # "births" is a matrix with rows = each simulation; columns = for each age group
  births <- do.call("cbind", lapply(df_f, sim_births, ht = age_ht, fr_vec = fr_vec, pct_births_unmar = pct_births_unmar,
                                    mult_births = mult_births, nsim = nsim))
  # aggregate age groups per simulation
  births <- base::apply(births, 1, sum, na.rm = TRUE) # output: vector of births per simulation
  
  return(births)
}


# @title Simulate births for a given age-cohort
# @description Simulate births for a given age-cohort accounting for twin/triplet births.
# @param df A data.frame containing population data on a single age group of females 
# from a single small area. 
# @param ht A symbol table (hash-table) mapping values of the 'age' variable in \code{df} to the 
# string patterns used in \code{fr_vec} below.
# @param fr_vec A named vector of fertility rates of women by age
# @param pct_births_unmar A named vector giving the percentage of births to unmarried women in a set
# of age brackets. 
# @param mult_births A named vector providing the multiple birth rate for the specified race of women.
# @param nsim An integer for the number of simulations to run. Defaults to \code{10000L}
# @return
sim_births <- function(df, ht, fr_vec, pct_births_unmar, mult_births, nsim = 10000L) {
  if (nrow(df) < 1L) return(0)
  
  # 01. Setup:
  # extract fertility rate for given age group
  # then convert overall rate by age to rates for married vs unmarried women in age bracket.
  cond_var_comp <- ht[, 2][which(df[, age][1] == ht[, 1])]
  if (is.na(cond_var_comp)) return(0) # if NA, memopause, no babies
  
  f_rate <- fr_vec[which(grepl(cond_var_comp, names(fr_vec)))]
  unmar_pct_b <- pct_births_unmar[which(grepl(cond_var_comp, names(pct_births_unmar)))] / 100
  tb_mar <- table(df$marital_status)
  unmar_pct <- 1 - (tb_mar[which(grepl("married", names(tb_mar)))] / sum(tb_mar))
  
  # fertility rates for age bracket by marital status
  fr_unmar <- f_rate * unmar_pct_b / unmar_pct
  fr_mar <- f_rate * (1 - unmar_pct_b) / (1 - unmar_pct)
  
  # 02. Simulate nsim times:
  # get cohorts of married/unmarried women, then apply birth simulation to each group.
  num_mar <- tb_mar[which(grepl("married", names(tb_mar)))]
  num_unmar <- sum(tb_mar[-which(grepl("married", names(tb_mar)))])
  
  birth_sim <- vector("numeric", length = nsim)
  for (i in 1:nsim) {
    mar_births <- mult_births(num_mar, fr_rate = fr_mar, mult_births = mult_births)
    unmar_births <- mult_births(num_unmar, fr_rate = fr_unmar, mult_births = mult_births)
    birth_sim[i] <- sum(mar_births, unmar_births)
  }
  
  # return
  return(birth_sim)
}

# Given a number of women (n_obs) simulate the number of births as well as the occurance
# of twins and triplets
# @param fr_rate = fertility rate for the population to simulate (married/unmarried) - births / 1000 women
# @param mult_births = rate of multiple births per 1000 live births
# @return An integer number of births
mult_births <- function(n_obs, fr_rate, mult_births) {
  # simulate birth events
  mult_births <- mult_births / 1000
  births <- sum(ifelse(runif(n_obs) < fr_rate / 1000, 1, 0))
  # simulate birth events resulting in a twin/triplet
  m2 <- runif(births)
  # generate multiple-birth babies
  twins_trips <- sum(ifelse(m2 < mult_births[2], 2, # two additional (triplets)
                            ifelse(m2 > mult_births[2] & m2 < mult_births[1], 1, 0))) # one additional (twins)
  return(sum(births, twins_trips)) # total number of infants
}


#----------------------------------------------------------
### MORTALITY FUNCTIONS
#----------------------------------------------------------

# @title simulate deaths for a small area population.
# @description Simulate the deaths for a small area population based on the population in the 
# small area. Simulation of deaths controls for race, age, and gender.
# @param df_list A named list of data-frames contaning population data for a single small area where 
# each element of the list has populations of a different race (different death rates)
# @param dr_mat A list of data.frame's containing death rate data for individuals of a specific race by age/gender
# @param u15_vec_list A list of named vectors, each containing the breakout of individuals under
# age 15 by gender and age brackete (0-4, 5-9, 10-14).
# @param seed A seed for reproducibility.
# @param nsim An integer for the number of simulations to run. Defaults to \code{10000L}
# @return ...
deaths <- function(df_list, dr_mat, u15_vec_list,
                   seed = sample.int(10000L, size = 1), nsim = 10000L) {
  
  if (is.null(names(df_list))) stop("df_list must be named -- names should be racial groups")
  if (!all.equal(names(df_list), names(u15_vec_list)) |
      !all.equal(names(df_list), names(dr_mat))) {
    stop("racial groups must correspond in df_list, dr_mat, and u15_vec_list")
  }
  
  # 01. Check which races have people! 
  # then subset to those
  which_pop <- which(unlist(lapply(df_list, nrow)) > 0)
  df_list <- df_list[which_pop]
  dr_mat  <- dr_mat[which_pop]
  u15_vec_list <- u15_vec_list[which_pop]
  
  set.seed(seed)
  mc <- match.call()
  
  # output: list with one element per racial group. Each element is a vector of deaths per simulation
  deaths <- mapply(deaths_race, df = df_list, dr_mat = dr_mat,  u15_vec = u15_vec_list,
                   nsim = nsim, SIMPLIFY = FALSE)
  # bind race outputs together 
  # output: matrix of deaths-- rows = per simulation, columns = age groups
  deaths <- Reduce("+", deaths)
  deaths <- cbind(deaths, base::apply(deaths, 1, sum, na.rm = TRUE))
  colnames(deaths)[ncol(deaths)] <- "total"
  
  return(list(death_simulation = deaths, nsim = nsim, seed = seed, call = mc))
}


# @title Simulate deaths for a small area population of a single race
# @description
# @param df A data-frame contaning data on the population of a single race in a single small area
# @param dr_mat A data.frame of death rates of men and women by age -- of a single race
# @param u15_vec A named vector of population counts for the population under 15
# @param nsim An integer for the number of simulations to run. Defaults to \code{10000L}
# @return ...
deaths_race <- function(df, dr_mat, u15_vec, nsim = 10000L) {
  # 01 setup 
  age_ht <- data.frame(dat = c("15_17", "18_24", "25_29", "30_34", "35_39", "40_44", "45_49", 
                               "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", "85up"),  
                       dr = c("15_19", "20_24", "25_29", "30_34", "35_39", "40_44", "45_49", 
                              "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", 
                              "85 and over"), stringsAsFactors = FALSE) 
  # split by gender and age
  df <- synthACS:::split_df(df, "gender")
  df_m <- synthACS:::split_df(df[[1]], "age")
  df_f <- synthACS:::split_df(df[[2]], "age")
  rm(df)
  
  # 02 apply simulation to each subset
  # first check for >0 people.
  pop_0m <- which(unlist(lapply(df_m, nrow)) > 0)
  pop_0f <- which(unlist(lapply(df_f, nrow)) > 0)
  
  # pre-allocate output matrices
  d_f <- d_m <- matrix(0, nrow = nsim, ncol = 19, 
                       dimnames = list(1:nsim, c("0_1", "1_4", "5_9", "10_14", "15_17",
                                                 "18_24", "25_29", "30_34", "35_39", "40_44",
                                                 "45_49", "50_54", "55_59", "60_64", "65_69",
                                                 "70_74", "75_79", "80_84", "85up")))
  
  # simulate by age group
  # insert outputs into d_m, d_f
  if (length(pop_0m) > 0) {
    deaths_m <- lapply(df_m[pop_0m], sim_deaths, ht = age_ht, dr_mat = dr_mat, u15_vec = u15_vec, nsim = nsim)  
    for (j in 1:length(deaths_m)) {
      if (names(deaths_m)[j] == "under15") { d_m[, 1:4] <- deaths_m[[j]] }
      else {
        idx <- pop_0m[which(names(deaths_m)[j] == names(pop_0m))] + 3
        d_m[, idx] <- deaths_m[[j]]
      }
    }
  }
  if (length(pop_0f) > 0) {
    deaths_f <- lapply(df_f[pop_0f], sim_deaths, ht = age_ht, dr_mat = dr_mat, u15_vec = u15_vec, nsim = nsim)
    for (j in 1:length(deaths_f)) {
      if (names(deaths_f)[j] == "under15") { d_f[, 1:4] <- deaths_f[[j]] }
      else {
        idx <- pop_0f[which(names(deaths_f)[j] == names(pop_0f))] + 3
        d_f[, idx] <- deaths_f[[j]]
      }
    }
  }
  
  # combine male and female deaths
  # output: matrix of deaths-- rows = per simulation, columns = age groups
  return(ifelse(is.na(d_m), 0, d_m) + ifelse(is.na(d_f), 0, d_f)) # slightly faster than Reduce("+", ...)
}


# @title Simulate deaths for a given cohort 
# @description Simulate deaths for a given cohort defined by gender, age, and race. 
# @param df A data.frame containing population data on a single cohort (gender, age, race)
# from a single small area. 
# @param ht A symbol table (hash-table) mapping values of the 'age' variable in \code{df} to the 
# string patterns used in \code{dr_vec} below.
# @param dr_mat A data.frame of death rates of individuals by age (for a specific race)
# @param nsim An integer for the number of simulations to run. Defaults to \code{10000L}
# @return
sim_deaths <- function(df, ht, dr_mat, u15_vec, nsim = 10000L) {
  if (nrow(df) < 1L) return(0)
  # overview:
  # 1. extract death rate for age and gender group in df
  # 2. run simulations for each individual in the df
  # 3. repeat (2) nsim times
  
  # (A) do U15 separately -- need to split out ages
  if (df$age[1] == "under15") { # allocation of ages, death rates by age groups
    if (df$gender[1] == "Male") {
      u15_vec <- u15_vec[1:3] / sum(u15_vec[1:3])
      d_rate  <- dr_mat[dr_mat$gender == "male" & dr_mat$age %in% c("Under 1", "1_4", "5_9", "10_14"), "death_rate", drop = TRUE]
    } else {
      u15_vec <- u15_vec[4:6] / sum(u15_vec[4:6])
      d_rate  <- dr_mat[dr_mat$gender == "female" & dr_mat$age %in% c("Under 1", "1_4", "5_9", "10_14"), "death_rate", drop = TRUE]
    }
    # split out age groups
    # map: 1:4 == c("0_1", "1_4", "5_9", "10_14")
    u15_ages <- sample.int(4L, size = nrow(df), replace =TRUE, 
                           prob = c(u15_vec[1] * .2, u15_vec[1] * .8, u15_vec[2], u15_vec[3])) # proportionally allocate <1, 1-4
    n_pop <- c(sum(u15_ages == 1L),
               sum(u15_ages == 2L),
               sum(u15_ages == 3L),
               sum(u15_ages == 4L))
    
    # 02./03. simulate
    death_sim <- matrix(0, nrow = nsim, ncol = 4, dimnames = list(1:nsim, c("0_1", "1_4", "5_9", "10_14")))
    for (i in 1:nsim) {
      death_sim[i, ] <- unlist(mapply(function(pop, dr) {
        sum(runif(n =pop) < dr)
      }, pop = n_pop, dr = d_rate / 100000)) # death rate = deaths per 100k people
    }
  } else { # (B) Ages not U15
    # extract death rate for given age & gender group
    age_grp <- ht[, 2][which(df[, age][1] == ht[, 1])]
    gender <- tolower(as.character(df$gender[1]))
    d_rate <- dr_mat[dr_mat$age == age_grp & dr_mat$gender == gender, "death_rate", drop = TRUE]
    
    # 02./03. Simulate nsim times:
    death_sim <- vector("numeric", length = nsim)
    for (i in 1:nsim) {
      death_sim[i] <- sum(runif(n = nrow(df)) < d_rate / 100000) # death rate = deaths per 100k people
    }
  }
  # return
  return(death_sim)
}

##---------------------------------------------------------
## YOU WILL NEED THE FOLLOWING PACKAGES TO FULLY EXECUTE THIS CODE.
##---------------------------------------------------------
req_pkgs <- c("tigris", "rgdal", "ggplot2", "stringr", "maptools", "scales", "RColorBrewer",
              "gpclib", "sf", "acs", "retry", "data.table")

needed_pkgs <- req_pkgs[!(req_pkgs %in% installed.packages()[, "Package"])]
if (length(needed_pkgs) > 0) {install.packages(needed_pkgs)}

##---------------------------------------------------------
## SECTION 3 - USER GUIDE 
##---------------------------------------------------------

# http://api.census.gov/data/key_signup.html
acs::api.key.install(key = "...<redacted>...")

library("data.table")
library("acs")
library("retry")
library("synthACS")

## PULL ACS DATA
ca_geo <- geo.make(state = "CA", county = "*")
ca_bachelors <- pull_bachelors(endyear = 2014, span = 5, geography = ca_geo)

### subsection: macroACS class
##----------------------
methods(class = "macroACS")

ca_county_pop <- pull_population(endyear = 2014, span = 5, geography = ca_geo)
get_span(ca_county_pop)
get_endyear(ca_county_pop)
get_geography(ca_county_pop)
get_dataset_names(ca_county_pop)

fetch_data(ca_county_pop, geography = c("Los Ang", "San Diego"), dataset = "estimate",
           choice = "median_age_by_sex")
fetch_data(ca_county_pop, geography = "*", dataset = "st.err", choice = "median_age_by_sex")

### subsection: synthetic micro datasets
##----------------------
## GENERATE SYNTHETIC DATSETS
ca_dat_SMSM <- pull_synth_data(2014, 5, ca_geo)
split_ca_dat <- split(ca_dat_SMSM, n_splits = 10)

library("parallel")
ca_synthetic <- derive_synth_datasets(ca_dat_SMSM, leave_cores = 0)


## marginalizing attributes
ca_marg <- marginalize_attr(ca_synthetic, varlist = c("race", "ind_income", "nativity", 
             "geog_mobility"), marginalize_out =  TRUE)


## adding attributes: transit/work data
ca_transit <- pull_transit_work(2014, 5, ca_geo)
get_dataset_names(ca_transit)

# setup / inputs
transit_work <- gen_attr_vectors(ca_transit, "mode_transit_by_age")
mode_levels <- c("no_work", "drove_alone", "carpool", "transit", "walk", "other", "work_at_home")

round(transit_work[[1]], 0) # round(transit_work[[1]][1:49], 0) -- displayed output

# augment attribute vector
transit_work <- lapply(transit_work, function(x, lev) {
  # augment vectors
  unemp <- data.frame(emp_status = "unemployed", age = NA, pct = 1, level = "no_work")
  NI_LF <- data.frame(emp_status = "not_in_labor_force", age = NA, pct = 1, level = "no_work")
  # mung X
  x <- x[!grepl(paste(c("_all", "pct_"), collapse = "|"), names(x))]
  emp_status <- rep("employed", length = length(x))
  age <- substr(names(x), nchar(names(x))-4, nchar(names(x)))
  age <- ifelse(substr(age, 1, 1) == "_", substr(age, 2, nchar(age)), age)
  age <- ifelse(age == "16_19", "15_17",
         ifelse(age == "20_24", "18_24", age))
  
  levels <- substr(names(x), 1, nchar(names(x))-5)
  levels <- ifelse(substr(levels, nchar(levels), nchar(levels)) == "_", substr(levels, 1, nchar(levels)-1), levels)
  
  # replication for ages
  employed <- data.frame(emp_status = emp_status, age = age, pct = x, level = levels, 
                         stringsAsFactors = FALSE)
  rownames(employed) <- NULL;
  employed <- split(employed, employed$age)  
  employed <- list(employed[['15_17']], employed[['18_24']], 
                   replicate(n = 4, employed[['25_44']], simplify =FALSE),
                   replicate(n = 2, employed[['45_54']], simplify =FALSE),
                   employed[['55_59']], employed[['60_64']],
                   replicate(n = 5, employed[['65up']], simplify =FALSE))

  employed[[3]][[1]]$age <- "25_29"
  employed[[3]][[2]]$age <- "30_34"
  employed[[3]][[3]]$age <- "35_39"
  employed[[3]][[4]]$age <- "40_44"
  employed[[4]][[1]]$age <- "45_49"
  employed[[4]][[2]]$age <- "50_54"
  employed[[7]][[1]]$age <- "65_69"
  employed[[7]][[2]]$age <- "70_74"
  employed[[7]][[3]]$age <- "75_79"
  employed[[7]][[4]]$age <- "80_84"
  employed[[7]][[5]]$age <- "85up"
  employed[[3]] <- do.call("rbind", employed[[3]])
  employed[[4]] <- do.call("rbind", employed[[4]])
  employed[[7]] <- do.call("rbind", employed[[7]])
  
  
  return(data.table::rbindlist(list(unemp, NI_LF, data.table::rbindlist(employed))))
}, lev = mode_levels)


ca_and_transit <- all_geog_synthetic_new_attribute(ca_marg, 
                                                   attr_name = "transit_work",
                                                   conditional_vars = c("emp_status", "age"),
                                                   st_list = transit_work) 


### subsection: synthACS class
##----------------------
## GENERATE CONSTRAINTS; CREATE CONSTRAINT LIST; OPTIMIZE MICRODATA

methods(class = "synthACS")

# A. do optimization with split/combine
n_splits <- 10
split_ca_dat <- split(ca_dat_SMSM, n_splits = n_splits)
tmp_opts <- vector("list", length= n_splits)

for (i in 1:n_splits) {
    # derive synthetics
    tmp_synth <- derive_synth_datasets(split_ca_dat[[i]], leave_cores = 0)
    
    # create constraints for simulated annealing
    a <- all_geog_constraint_age(tmp_synth, method = "macro.table")
    g <- all_geog_constraint_gender(tmp_synth, method = "macro.table")
    m <- all_geog_constraint_marital_status(tmp_synth, method = "macro.table")
    r <- all_geog_constraint_race(tmp_synth, method = "synthetic")
    e <- all_geog_constraint_edu(tmp_synth, method = "synthetic")
    
    cll <- all_geogs_add_constraint(attr_name = "age", attr_total_list = a, 
                                    macro_micro = tmp_synth)
    cll <- all_geogs_add_constraint(attr_name = "gender", attr_total_list = g, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "marital_status", attr_total_list = m, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "race", attr_total_list = r, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "edu_attain", attr_total_list = e, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    
    cll <- all_geogs_add_constraint(attr_name = "age", attr_total_list = a, 
                                    macro_micro = tmp_synth)
    cll <- all_geogs_add_constraint(attr_name = "gender", attr_total_list = g, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "marital_status", attr_total_list = m, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "race", attr_total_list = r, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "edu_attain", attr_total_list = e, 
                                    macro_micro = tmp_synth, constraint_list_list = cll)
    
    # anneal
    tmp_opts[[i]] <- all_geog_optimize_microdata(
            tmp_synth, seed = 6550L, verbose = TRUE,
            constraint_list_list = cll, p_accept = 0.4, max_iter = 10000L
        )
}

# create the string needed for combine_smsm(). 
paste0("tmp_opts[[", 1:n_splits, "]]", sep= ", ", collapse= "")

# B. recombine: copy and paste the resulting string, excluding the final trailing comma
opt_ca <- combine_smsm(tmp_opts[[1]], tmp_opts[[2]], tmp_opts[[3]], tmp_opts[[4]], tmp_opts[[5]], 
                       tmp_opts[[6]], tmp_opts[[7]], tmp_opts[[8]], tmp_opts[[9]], tmp_opts[[10]])

rm(a, g, m, r, e, tmp_synth, tmp_opts)

### subsection: examining outputs
##----------------------
LA_fit <- get_best_fit(opt_ca, geography = "Los Ang")
LA_tae <- get_final_tae(opt_ca, geography = "Los Ang")

plot_TAEpath(opt_ca, geography = "Santa Cl")
plot_TAEpath(opt_ca, geography = "Yuba")

summary(opt_ca)

##---------------------------------------------------------
## SECTION 4 - SIMULATIONS OF FERTILITY AND MORTALITY
##---------------------------------------------------------

# Author: Alex Whitworth
# Date: April - 2016; updated June-2016
# Desc: Example simulation of spatial microsimulation. 
#   Simulation of births for all census tracts w/in LA county
# References: 
#   Hamilton, Brady E. et al. "Births: Final Data for 2014." 
#    National Vital Statistical Reports Vol 64 No 12.
#   Xu JQ, Murphy S, Kochanek KD. "Deaths: Final data for 2013." 
#    National Vital Statistics Reports Vol 64 No 2.

#----------------------------------------------------------
require("tigris")
require("rgdal")
require("ggplot2")
require("stringr")
require("maptools")
require("scales")
require("RColorBrewer")
require("gpclib")
require("sf")
# preferred by maptools, but not required: require(rgeos)
# Warning (maptools v0.9-5): 
#   support for gpclib will be withdrawn from maptools at the next major release
maptools::gpclibPermit() 

#----------------------------------------------------------
# 01. Build synthetic data
#----------------------------------------------------------
library("synthACS")
library("parallel")

la_geo <- geo.make(state = "CA", county = "Los Angeles", tract = "*")
la_dat <- pull_synth_data(2014, 5, la_geo)

# remove tracts w/ pop < 120
la_ind <- which(unlist(la_dat$estimates$age_by_sex[, 1]) < 120)
la_dat$estimates <- lapply(la_dat$estimates, function(l, ind) {l[-ind, ]}, ind = la_ind)
la_dat$standard_error <- lapply(la_dat$standard_error, function(l, ind) {l[-ind, ]}, ind = la_ind)
la_dat$geography <- la_dat$geography[-la_ind, ]
rm(la_ind)

# B. do optimization with split/combine
n_splits <- 75
split_la_dat <- synthACS::split(la_dat, n_splits = n_splits)
tmp_opts <- vector("list", length= n_splits)

for (i in 1:n_splits) {
    la_syn <- derive_synth_datasets(split_la_dat[[i]], leave_cores = 0)
    
    # 02. build constraint lists
    a <- all_geog_constraint_age(la_syn, method = "macro.table")
    g <- all_geog_constraint_gender(la_syn, method = "macro.table")
    m <- all_geog_constraint_marital_status(la_syn, method = "macro.table")
    r <- all_geog_constraint_race(la_syn, method = "synthetic")
    e <- all_geog_constraint_edu(la_syn, method = "synthetic")
    
    cll <- all_geogs_add_constraint(attr_name = "age", attr_total_list = a, macro_micro = la_syn)
    cll <- all_geogs_add_constraint(attr_name = "gender", attr_total_list = g, macro_micro = la_syn,
                                    constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "marital_status", attr_total_list = m, macro_micro = la_syn, 
                                    constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "race", attr_total_list = r,  macro_micro = la_syn, 
                                    constraint_list_list = cll)
    cll <- all_geogs_add_constraint(attr_name = "edu_attain", attr_total_list = e, macro_micro = la_syn, 
                                    constraint_list_list = cll)
    
    tmp_opts[[i]] <- all_geog_optimize_microdata(la_syn, prob_name = "p", constraint_list_list = cll,
                                                 seed = 12389L, p_accept = 0.4, max_iter = 10000L)
}

# create the string needed for combine_smsm(). 
paste0("tmp_opts[[", 1:n_splits, "]]", sep= ", ", collapse= "")

# copy and paste the resulting string, excluding the final trailing comma
opt_la <- combine_smsm(tmp_opts[[1]], tmp_opts[[2]], tmp_opts[[3]], tmp_opts[[4]], tmp_opts[[5]], 
                       tmp_opts[[6]], tmp_opts[[7]], tmp_opts[[8]], tmp_opts[[9]], tmp_opts[[10]], 
                       tmp_opts[[11]], tmp_opts[[12]], tmp_opts[[13]], tmp_opts[[14]], tmp_opts[[15]], 
                       tmp_opts[[16]], tmp_opts[[17]], tmp_opts[[18]], tmp_opts[[19]], tmp_opts[[20]], 
                       tmp_opts[[21]], tmp_opts[[22]], tmp_opts[[23]], tmp_opts[[24]], tmp_opts[[25]], 
                       tmp_opts[[26]], tmp_opts[[27]], tmp_opts[[28]], tmp_opts[[29]], tmp_opts[[30]], 
                       tmp_opts[[31]], tmp_opts[[32]], tmp_opts[[33]], tmp_opts[[34]], tmp_opts[[35]], 
                       tmp_opts[[36]], tmp_opts[[37]], tmp_opts[[38]], tmp_opts[[39]], tmp_opts[[40]], 
                       tmp_opts[[41]], tmp_opts[[42]], tmp_opts[[43]], tmp_opts[[44]], tmp_opts[[45]], 
                       tmp_opts[[46]], tmp_opts[[47]], tmp_opts[[48]], tmp_opts[[49]], tmp_opts[[50]], 
                       tmp_opts[[51]], tmp_opts[[52]], tmp_opts[[53]], tmp_opts[[54]], tmp_opts[[55]],
                       tmp_opts[[56]], tmp_opts[[57]], tmp_opts[[58]], tmp_opts[[59]], tmp_opts[[60]], 
                       tmp_opts[[61]], tmp_opts[[62]], tmp_opts[[63]], tmp_opts[[64]], tmp_opts[[65]], 
                       tmp_opts[[66]], tmp_opts[[67]], tmp_opts[[68]], tmp_opts[[69]], tmp_opts[[70]], 
                       tmp_opts[[71]], tmp_opts[[72]], tmp_opts[[73]], tmp_opts[[74]], tmp_opts[[75]])

rm(la_syn, tmp_opts, split_la_dat)
rm(a, g, m, r, e, cll)

#----------------------------------------------------------
# SIMULATIONS
# 04. simulate births
#----------------------------------------------------------
# (A) input data to simulation -----------
# Tables 15, 16  from (Hamilton et al; 2014)
pct_births_unmar <- c(u15 = 99.4, "15_17" = 96.6, "18_19" = 87.0, "20_24" = 65.7, "25_29" = 36.7, "30_34" = 22.5,
                      "35_39" = 21.6, "40up" = 24.3)

# Tables 4, 7 from (Hamilton et al; 2014)
fr_list <- list(asian = c(u15 = 0.1, "15_17" = 3.3, "18_19" = 13.9, "20_24" = 37.5, "25_29" = 90.0, "30_34" = 121.3,
                         "35_39" = 68.9, "40_44" = 16.1, "45up" = 1.5), 
                black_aa = c(u15 = 0.6, "15_17" = 16.7, "18_19" = 61.9, "20_24" = 102.6, "25_29" = 103.1, "30_34" = 79.4,
                            "35_39" = 42.6, "40_44" = 10.2, "45up" = 0.8),
                hispanic = c(u15 = 0.4, "15_17" = 19.3, "18_19" = 66.1, "20_24" = 104.5, "25_29" = 118.7, "30_34" = 96.5,
                            "35_39" = 53.6, "40_44" = 13.5, "45up" = 0.9),
                nat_amer = c(u15 = 0.3, "15_17" = 13.2, "18_19" = 48.6, "20_24" = 73.2, "25_29" = 74.7, "30_34" = 52.3,
                            "35_39" = 24.1, "40_44" = 5.5, "45up" = 0.3),
                pac_isl = c(u15 = 0.1, "15_17" = 3.3, "18_19" = 13.9, "20_24" = 37.5, "25_29" = 90.0, "30_34" = 121.3,
                           "35_39" = 68.9, "40_44" = 16.1, "45up" = 1.5), 
                two_more = c(u15 = 0.3, "15_17" = 10.9, "18_19" = 43.8, "20_24" = 79.0, "25_29" = 105.8, "30_34" = 100.8,
                            "35_39" = 51.0, "40_44" = 10.6, "45up" = 0.8), # using 'all' category, no breakout in data summaries
                white = c(u15 = 0.2, "15_17" = 10.2, "18_19" = 42.0, "20_24" = 77.3, "25_29" = 108.6, "30_34" = 103.9,
                         "35_39" = 51.2, "40_44" = 10.2, "45up" = 0.7))

# Tables 27 from (Hamilton et al; 2014)
mb_list <- list(c("twins" = 33.9, "triplets" = 1.2), c("twins" = 40, "triplets" = 0.9), c("twins" = 24.1, "triplets" = 0.7), 
                c("twins" = 33.9, "triplets" = 1.2), c("twins" = 33.9, "triplets" = 1.2), c("twins" = 33.9, "triplets" = 1.2),
                c("twins" = 36.7, "triplets" = 1.4))

# adjust fertility rates downward as adjustment for CA vs USA
# Tables 12 from (Hamilton et al; 2014)
us_fr <- c(u15 = 0.3, "15_17" = 10.9, "18_19" = 43.8, "20_24" = 79.0, "25_29" = 105.8, "30_34" = 100.8,
           "35_39" = 51.0, "40_44" = 10.6, "45up" = 0.8)
ca_fr <- c(u15 = 0.2, "15_17" = 9.6, "18_19" = 38.0, "20_24" = 66.8, "25_29" = 93.9, "30_34" = 106.1,
           "35_39" = 63.8, "40_44" = 15.0, "45up" = 1.3)
adj_fr <- function(x, us, st) {return(round(x * st / us, 8))}

fr_list <- lapply(fr_list, adj_fr, us = us_fr, st = ca_fr); rm(us_fr, ca_fr, adj_fr)

# (B) Run simulation -----------
la_tracts <- lapply(opt_la$best_fit, function(l) split(l, l$race))

nnodes <- parallel::detectCores()
cl <- parallel::makeCluster(nnodes, type = "PSOCK")

clusterExport(cl, varlist = c("births", "births_race", "sim_births", "mult_births"))

births_la <- parallel::clusterMap(cl, RECYCLE = TRUE, SIMPLIFY = FALSE, .scheduling = "dynamic",
                                  fun = births, 
                                  df_list = la_tracts,
                                  fr_vec_list = list(fr_list),
                                  pct_births_unmar = list(pct_births_unmar),
                                  mult_births_list = list(mb_list), 
                                  seed = 234674L, nsim = 10000L)

parallel::stopCluster(cl)

names(births_la) <- names(la_tracts)


rm(cl, pct_births_unmar, births, births_race, mult_births, sim_births, mb_list, fr_list, la_tracts)

# 05. simulate deaths
#----------------------------------------------------------
# (A) input data to simulation -----------
## exact breakouts by age and race from base tables:
# B01001B (black_aa), B01001C (nat_amer), B01001D (asian), B01001E (pac_isl), B01001G (2 or more),
# B01001H (white, not hisp), B01001I (hispanic)
u15 <- list(
  asian    = c("m_u5" = 31127, "m_5_9" = 30208, "m_10_14" = 33646, "f_u5" = 29529, "f_5_9" = 29137, "f_10_14" = 32104),
  black_aa = c("m_u5" = 24553, "m_5_9" = 24822, "m_10_14" = 26775, "f_u5" = 23020, "f_5_9" = 23843, "f_10_14" = 25963),
  hispanic = c("m_u5" = 206083, "m_5_9" = 201089, "m_10_14" = 204756, "f_u5" = 198169, "f_5_9" = 193272, "f_10_14" = 197382),
  nat_amer = c("m_u5" = 1643, "m_5_9" = 1787, "m_10_14" = 1723, "f_u5" = 1550, "f_5_9" = 1446, "f_10_14" = 1621),
  pac_isl  = c("m_u5" = 955, "m_5_9" = 900, "m_10_14" = 933, "f_u5" = 770, "f_5_9" = 742, "f_10_14" = 960),
  two_more = c("m_u5" = 26179, "m_5_9" = 20097, "m_10_14" = 19653, "f_u5" = 23961, "f_5_9" = 19164, "f_10_14" = 17868),
  white    = c("m_u5" = 54441, "m_5_9" = 54949, "m_10_14" = 56844, "f_u5" = 51811, "f_5_9" = 51608, "f_10_14" = 53760)
)

data("AgeRaceDR", package = "synthACS")
AgeRaceDR <- AgeRaceDR[AgeRaceDR$age != "All ages" & AgeRaceDR$gender != "both", ]
dr_mat <- list(asian = AgeRaceDR[AgeRaceDR$race == "asian.isl", ],
               black_aa = AgeRaceDR[AgeRaceDR$race == "black", ],
               hispanic = AgeRaceDR[AgeRaceDR$race == "hispanic", ],
               nat_amer = AgeRaceDR[AgeRaceDR$race == "nat.amer", ],
               pac_isl = AgeRaceDR[AgeRaceDR$race == "asian.isl", ],
               two_more = AgeRaceDR[AgeRaceDR$race == "all", ],
               white = AgeRaceDR[AgeRaceDR$race == "white", ]
)
rm(AgeRaceDR)


# (B) Run simulation -----------
la_tracts <- lapply(opt_la$best_fit, function(l) split(l, l$race))
la_tracts <- lapply(la_tracts, function(l) {names(l) <- c("asian", "black_aa", "hispanic", "nat_amer",
                                                          "pac_isl", "two_more", "white"); return(l)})

nnodes <- parallel::detectCores()
cl <- parallel::makeCluster(nnodes, type = "PSOCK")

clusterExport(cl, varlist = c("deaths", "deaths_race", "sim_deaths"))

deaths_la <- parallel::clusterMap(cl, RECYCLE = TRUE, SIMPLIFY = FALSE, .scheduling = "dynamic",
                                  fun = deaths, 
                                  df_list = la_tracts,
                                  dr_mat = list(dr_mat),
                                  u15_vec_list = list(u15),
                                  seed = 234674L, nsim = 10000L)

parallel::stopCluster(cl)

names(deaths_la) <- names(la_tracts)
rm(cl, dr_mat, u15, la_tracts, deaths, deaths_race, sim_deaths, nnodes)

# (C) Extract outputs -----------
# birth_q <- birth_quantiles(births_la, quantiles = c(.1, .5, .9))
# death_q <- death_quantiles(deaths_la, quantiles = c(.1, .5, .9))

la_b50 <- birth_quantiles(births_la, quantiles = .5)
la_d50 <- death_quantiles(deaths_la, quantiles = .5)
la50 <- data.frame(la_b50, la_d50); rm(la_b50, la_d50)

#----------------------------------------------------------
# 06. Mapping -- See library requirements note (~line 515)
#----------------------------------------------------------
# remove geo-data which don't have simulation data (census tracts < 120 people)
idx <- which(rownames(la_dat$estimates$age_by_sex) %in% rownames(la50))
la_dat$estimates <- lapply(la_dat$estimates, function(l, ind) {l[ind,]}, ind= idx)
la_dat$standard_error <- lapply(la_dat$standard_error, function(l, ind) {l[ind,]}, ind= idx)
la_dat$geography <- la_dat$geography[idx,]
rm(idx)

la50$pop        <- la_dat$estimates$age_by_sex[, 1]
la50$male_pop   <- la_dat$estimates$age_by_sex[, 2]
la50$female_pop <- la_dat$estimates$age_by_sex[, 3]
la50 <- data.frame(geoid = paste0(str_pad(la_dat$geography$state, 2, "left", pad = "0"), 
                                 str_pad(la_dat$geography$county, 3, "left", pad = "0"), 
                                 str_pad(la_dat$geography$tract, 6, "left", pad = "0")),
                   la50)


la_geo <- tracts(state = "CA", county = "Los Angeles", cb = TRUE)
la_geo <- geo_join(la_geo, la50, "GEOID", "geoid")
la_geo <- la_geo[la_geo$ALAND > 0, ]
la_geo2 <- fortify(la_geo, region = "geoid")

data("la_hospitals", package = "synthACS")

## 50th percentile plots
#----------------------------------------------------------
### BIRTHS
png("./la_births.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total)) +
  lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Births by Census Tract \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "50th Percentile - Births",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0, 300, 100), limits = c(0,310)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### + hospitals
png("./la_births_hosp.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total)) +
  geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) +
  lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Births with Hospital Locations \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "50th Percentile - Births",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0, 300, 100), limits = c(0,310)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### DEATHS
png("./la_deaths.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total.1)) +
  lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Deaths by Census Tract \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "50th Percentile - Deaths",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0, 150, 25), limits = c(0, 135)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### + hospitals
png("./la_deaths_hosp.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total.1)) +
  geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) +
  lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Deaths with Hospital Locations \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "50th Percentile - Deaths",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0, 150, 25), limits = c(0, 135)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### HISPANIC BIRTHS
png("./hisp_births.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = hispanic)) +
  lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Births to Hispanic Mothers by Census Tract \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "50th Percentile - Births",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0, 200, 50), limits = c(0, 160)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### DEATHS OVER AGE 65
m <- data.frame(la_geo2[, 32:36]); m$geometry <- NULL
la_geo2$d_over65 <- base::apply(m, 1, sum)

png("./la_deaths65.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) +
  geom_sf(mapping = aes(fill = d_over65)) +
  lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Deaths (Age > = 65y.o.) \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "50th Percentile - Deaths",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0,100,25), limits = c(0, 110)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

## 90th percentile plots
#----------------------------------------------------------
la_b90 <- birth_quantiles(births_la, quantiles = .9)
la_d90 <- death_quantiles(deaths_la, quantiles = .9)
la90 <- data.frame(la_b90, la_d90); rm(la_b90, la_d90)

la90 <- data.frame(geoid = paste0(str_pad(la_dat$geography$state, 2, "left", pad = "0"), 
                                 str_pad(la_dat$geography$county, 3, "left", pad = "0"), 
                                 str_pad(la_dat$geography$tract, 6, "left", pad = "0")),
                   la90)

la_geo <- tracts(state = "CA", county = "Los Angeles", cb = TRUE)
la_geo <- geo_join(la_geo, la90, "GEOID", "geoid")
la_geo <- la_geo[la_geo$ALAND > 0, ]
la_geo2 <- fortify(la_geo, region = "geoid")

### BIRTHS
png("./la_births90.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total)) +
  lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Births by Census Tract \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "90th Percentile - Births",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0,300, 100), limits = c(0, 310)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### + hospitals
png("./la_births90_hosp.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total)) +
  geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) +
  lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Births with Hospital Locations \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "90th Percentile - Births",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0,300, 100), limits = c(0, 310)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### DEATHS
png("./la_deaths90.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total.1)) +
  lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Deaths by Census Tract \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "90th Percentile - Deaths",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0, 150, 50), limits = c(0, 135)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()

### + hospitals
png("./la_deaths90_hosp.png", height = 600, width = 600, units = "px")
ggplot(la_geo2) + 
  geom_sf(mapping = aes(fill = total.1)) +
  geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) +
  lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + 
  labs(x = "Longitude", y = "Latitude", title = "Number of Deaths with Hospital Locations \n Los Angeles County, CA") +
  scale_fill_gradientn(name = "90th Percentile - Deaths",
                       colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"),
                       breaks = seq(0, 150, 50), limits = c(0, 135)) +
  theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11),
        legend.background = element_rect(colour = "black", fill = "#DCDCDC"),
        title = element_text(face = "bold", size = 12),
        axis.title = element_text(face = "bold.italic", size = 11),
        axis.text = element_text(size = 10))
dev.off()