##--------------------------------------------------------- ## 00 --- functions that will be needed for Section 4 (line ~500) ##--------------------------------------------------------- # @param b_dat A list == outputs to births() below. # @param quantiles the quantiles of the birth simulations you wish to return # @param names Simplified names for the output birth_quantiles <- function(b_dat, quantiles = c(.1, .25, .5, .75, .9), nms = c("asian", "black_aa", "hispanic", "nat_amer", "pac_isl", "two_more", "white", "total")) { if (length(quantiles) > 1) { b_quants <- lapply(b_dat, function(l, quantiles, nms) { d <- data.frame(base::apply(l$birth_simulation, 2, quantile, quantiles)) names(d) <- nms return(d) }, quantiles = quantiles, nms = nms) } else if (length(quantiles) == 1) { # only 1 quantile supplied, reduce to data.frame b_quants <- lapply(b_dat, function(l, quantiles) { base::apply(l$birth_simulation, 2, quantile, quantiles) }, quantiles = quantiles) b_quants <- data.frame(do.call("rbind", b_quants)) rownames(b_quants) <- names(b_dat) names(b_quants) <- nms } return(b_quants) } # @param b_dat A list == outputs to deaths() below. # @param quantiles the quantiles of the death simulations you wish to return # @param names Simplified names for the output death_quantiles <- function(b_dat, quantiles = c(.1, .25, .5, .75, .9), nms = c("0_1", "1_4", "5_9", "10_14", "15_17", "18_24", "25_29", "30_34", "35_39", "40_44", "45_49", "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", "85up", "total")) { if (length(quantiles) > 1) { b_quants <- lapply(b_dat, function(l, quantiles, nms) { d <- data.frame(base::apply(l$death_simulation, 2, quantile, quantiles)) names(d) <- nms return(d) }, quantiles = quantiles, nms = nms) } else if (length(quantiles) == 1) { # only 1 quantile supplied, reduce to data.frame b_quants <- lapply(b_dat, function(l, quantiles) { base::apply(l$death_simulation, 2, quantile, quantiles) }, quantiles = quantiles) b_quants <- data.frame(do.call("rbind", b_quants)) rownames(b_quants) <- names(b_dat) names(b_quants) <- nms } return(b_quants) } # @title simulate births for a small area population. # @description Simulate the births for a small area population based on the female population in the # small area. Population is input as population by race and age. Simulation of births controls # for race, age, marital status, and multiple birth rate. Menopause is assumed to occur at age # 60, which is conservatively high (US avg is 51; typical range is 48-55). # @param df_list A named list of data-frames contaning population data for a single small area where # each element of the list has populations of a different race (different fertility rates) # @param fr_vec_list A list of named vectors, with each vector containing fertility rate data for # women of a specific race (corresponding to the elemnts of df_list). # @param pct_births_unmar A named vector giving the percentage of births to unmarried women in a set # of age brackets. # @param mult_births_list A list of named vectors, with each vector providing the multiple birth # rate for each race as above. # @param seed A seed for reproducibility. # @param nsim An integer for the number of simulations to run. Defaults to \code{10000L} # @return ... births <- function(df_list, fr_vec_list, pct_births_unmar, mult_births_list, seed = sample.int(10000L, size = 1), nsim = 10000L) { if (is.null(names(df_list))) stop("df_list must be named -- names should be racial groups") pct_births_unmar <- replicate(n = length(df_list), pct_births_unmar, simplify = FALSE) set.seed(seed) mc <- match.call() # output: list with one element per racial group. Each element is a vector of births per simulation births <- mapply(births_race, df = df_list, fr_vec = fr_vec_list, pct_births_unmar = pct_births_unmar, mult_births = mult_births_list, nsim = nsim, SIMPLIFY = FALSE) # bind outputs together and aggregate births <- data.frame(do.call("cbind", births)) names(births) <- c(names(df_list)); rownames(births) <- 1:nsim births$total <- base::apply(births, 1, sum, na.rm = TRUE) return(list(birth_simulation = births, nsim = nsim, seed = seed, call = mc)) } # @title Simulate births for a small area population of females of a single race # @description # @param df A data-frame contaning data on the population of a single race in a single small area # @param fr_vec A named vector of fertility rates of women by age # @param pct_births_unmar A named vector giving the percentage of births to unmarried women in a set # of age brackets. # @param mult_births A named vector providing the multiple birth rate for the specified race of women. # @param nsim An integer for the number of simulations to run. Defaults to \code{10000L} # @return ... births_race <- function(df, fr_vec, pct_births_unmar, mult_births, nsim = 10000L) { # 01 setup age_ht <- data.frame(dat = c("under15", "15_17", "18_24", "25_29", "30_34", "35_39", "40_44", "45_49", "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", "85up"), mar = c("u15", "15_17", "20_24", "25_29", "30_34", "35_39", "40_44", rep("45up", 3), rep(NA, 6)), stringsAsFactors = FALSE) # assume menopause at 60 # subset to women only, breakout by age df_f <- df[df$gender == "Female", ] df_f <- synthACS:::split_df(df_f, "age") # 02 apply simulation to each subset # "births" is a matrix with rows = each simulation; columns = for each age group births <- do.call("cbind", lapply(df_f, sim_births, ht = age_ht, fr_vec = fr_vec, pct_births_unmar = pct_births_unmar, mult_births = mult_births, nsim = nsim)) # aggregate age groups per simulation births <- base::apply(births, 1, sum, na.rm = TRUE) # output: vector of births per simulation return(births) } # @title Simulate births for a given age-cohort # @description Simulate births for a given age-cohort accounting for twin/triplet births. # @param df A data.frame containing population data on a single age group of females # from a single small area. # @param ht A symbol table (hash-table) mapping values of the 'age' variable in \code{df} to the # string patterns used in \code{fr_vec} below. # @param fr_vec A named vector of fertility rates of women by age # @param pct_births_unmar A named vector giving the percentage of births to unmarried women in a set # of age brackets. # @param mult_births A named vector providing the multiple birth rate for the specified race of women. # @param nsim An integer for the number of simulations to run. Defaults to \code{10000L} # @return sim_births <- function(df, ht, fr_vec, pct_births_unmar, mult_births, nsim = 10000L) { if (nrow(df) < 1L) return(0) # 01. Setup: # extract fertility rate for given age group # then convert overall rate by age to rates for married vs unmarried women in age bracket. cond_var_comp <- ht[, 2][which(df[, age][1] == ht[, 1])] if (is.na(cond_var_comp)) return(0) # if NA, memopause, no babies f_rate <- fr_vec[which(grepl(cond_var_comp, names(fr_vec)))] unmar_pct_b <- pct_births_unmar[which(grepl(cond_var_comp, names(pct_births_unmar)))] / 100 tb_mar <- table(df$marital_status) unmar_pct <- 1 - (tb_mar[which(grepl("married", names(tb_mar)))] / sum(tb_mar)) # fertility rates for age bracket by marital status fr_unmar <- f_rate * unmar_pct_b / unmar_pct fr_mar <- f_rate * (1 - unmar_pct_b) / (1 - unmar_pct) # 02. Simulate nsim times: # get cohorts of married/unmarried women, then apply birth simulation to each group. num_mar <- tb_mar[which(grepl("married", names(tb_mar)))] num_unmar <- sum(tb_mar[-which(grepl("married", names(tb_mar)))]) birth_sim <- vector("numeric", length = nsim) for (i in 1:nsim) { mar_births <- mult_births(num_mar, fr_rate = fr_mar, mult_births = mult_births) unmar_births <- mult_births(num_unmar, fr_rate = fr_unmar, mult_births = mult_births) birth_sim[i] <- sum(mar_births, unmar_births) } # return return(birth_sim) } # Given a number of women (n_obs) simulate the number of births as well as the occurance # of twins and triplets # @param fr_rate = fertility rate for the population to simulate (married/unmarried) - births / 1000 women # @param mult_births = rate of multiple births per 1000 live births # @return An integer number of births mult_births <- function(n_obs, fr_rate, mult_births) { # simulate birth events mult_births <- mult_births / 1000 births <- sum(ifelse(runif(n_obs) < fr_rate / 1000, 1, 0)) # simulate birth events resulting in a twin/triplet m2 <- runif(births) # generate multiple-birth babies twins_trips <- sum(ifelse(m2 < mult_births[2], 2, # two additional (triplets) ifelse(m2 > mult_births[2] & m2 < mult_births[1], 1, 0))) # one additional (twins) return(sum(births, twins_trips)) # total number of infants } #---------------------------------------------------------- ### MORTALITY FUNCTIONS #---------------------------------------------------------- # @title simulate deaths for a small area population. # @description Simulate the deaths for a small area population based on the population in the # small area. Simulation of deaths controls for race, age, and gender. # @param df_list A named list of data-frames contaning population data for a single small area where # each element of the list has populations of a different race (different death rates) # @param dr_mat A list of data.frame's containing death rate data for individuals of a specific race by age/gender # @param u15_vec_list A list of named vectors, each containing the breakout of individuals under # age 15 by gender and age brackete (0-4, 5-9, 10-14). # @param seed A seed for reproducibility. # @param nsim An integer for the number of simulations to run. Defaults to \code{10000L} # @return ... deaths <- function(df_list, dr_mat, u15_vec_list, seed = sample.int(10000L, size = 1), nsim = 10000L) { if (is.null(names(df_list))) stop("df_list must be named -- names should be racial groups") if (!all.equal(names(df_list), names(u15_vec_list)) | !all.equal(names(df_list), names(dr_mat))) { stop("racial groups must correspond in df_list, dr_mat, and u15_vec_list") } # 01. Check which races have people! # then subset to those which_pop <- which(unlist(lapply(df_list, nrow)) > 0) df_list <- df_list[which_pop] dr_mat <- dr_mat[which_pop] u15_vec_list <- u15_vec_list[which_pop] set.seed(seed) mc <- match.call() # output: list with one element per racial group. Each element is a vector of deaths per simulation deaths <- mapply(deaths_race, df = df_list, dr_mat = dr_mat, u15_vec = u15_vec_list, nsim = nsim, SIMPLIFY = FALSE) # bind race outputs together # output: matrix of deaths-- rows = per simulation, columns = age groups deaths <- Reduce("+", deaths) deaths <- cbind(deaths, base::apply(deaths, 1, sum, na.rm = TRUE)) colnames(deaths)[ncol(deaths)] <- "total" return(list(death_simulation = deaths, nsim = nsim, seed = seed, call = mc)) } # @title Simulate deaths for a small area population of a single race # @description # @param df A data-frame contaning data on the population of a single race in a single small area # @param dr_mat A data.frame of death rates of men and women by age -- of a single race # @param u15_vec A named vector of population counts for the population under 15 # @param nsim An integer for the number of simulations to run. Defaults to \code{10000L} # @return ... deaths_race <- function(df, dr_mat, u15_vec, nsim = 10000L) { # 01 setup age_ht <- data.frame(dat = c("15_17", "18_24", "25_29", "30_34", "35_39", "40_44", "45_49", "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", "85up"), dr = c("15_19", "20_24", "25_29", "30_34", "35_39", "40_44", "45_49", "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", "85 and over"), stringsAsFactors = FALSE) # split by gender and age df <- synthACS:::split_df(df, "gender") df_m <- synthACS:::split_df(df[[1]], "age") df_f <- synthACS:::split_df(df[[2]], "age") rm(df) # 02 apply simulation to each subset # first check for >0 people. pop_0m <- which(unlist(lapply(df_m, nrow)) > 0) pop_0f <- which(unlist(lapply(df_f, nrow)) > 0) # pre-allocate output matrices d_f <- d_m <- matrix(0, nrow = nsim, ncol = 19, dimnames = list(1:nsim, c("0_1", "1_4", "5_9", "10_14", "15_17", "18_24", "25_29", "30_34", "35_39", "40_44", "45_49", "50_54", "55_59", "60_64", "65_69", "70_74", "75_79", "80_84", "85up"))) # simulate by age group # insert outputs into d_m, d_f if (length(pop_0m) > 0) { deaths_m <- lapply(df_m[pop_0m], sim_deaths, ht = age_ht, dr_mat = dr_mat, u15_vec = u15_vec, nsim = nsim) for (j in 1:length(deaths_m)) { if (names(deaths_m)[j] == "under15") { d_m[, 1:4] <- deaths_m[[j]] } else { idx <- pop_0m[which(names(deaths_m)[j] == names(pop_0m))] + 3 d_m[, idx] <- deaths_m[[j]] } } } if (length(pop_0f) > 0) { deaths_f <- lapply(df_f[pop_0f], sim_deaths, ht = age_ht, dr_mat = dr_mat, u15_vec = u15_vec, nsim = nsim) for (j in 1:length(deaths_f)) { if (names(deaths_f)[j] == "under15") { d_f[, 1:4] <- deaths_f[[j]] } else { idx <- pop_0f[which(names(deaths_f)[j] == names(pop_0f))] + 3 d_f[, idx] <- deaths_f[[j]] } } } # combine male and female deaths # output: matrix of deaths-- rows = per simulation, columns = age groups return(ifelse(is.na(d_m), 0, d_m) + ifelse(is.na(d_f), 0, d_f)) # slightly faster than Reduce("+", ...) } # @title Simulate deaths for a given cohort # @description Simulate deaths for a given cohort defined by gender, age, and race. # @param df A data.frame containing population data on a single cohort (gender, age, race) # from a single small area. # @param ht A symbol table (hash-table) mapping values of the 'age' variable in \code{df} to the # string patterns used in \code{dr_vec} below. # @param dr_mat A data.frame of death rates of individuals by age (for a specific race) # @param nsim An integer for the number of simulations to run. Defaults to \code{10000L} # @return sim_deaths <- function(df, ht, dr_mat, u15_vec, nsim = 10000L) { if (nrow(df) < 1L) return(0) # overview: # 1. extract death rate for age and gender group in df # 2. run simulations for each individual in the df # 3. repeat (2) nsim times # (A) do U15 separately -- need to split out ages if (df$age[1] == "under15") { # allocation of ages, death rates by age groups if (df$gender[1] == "Male") { u15_vec <- u15_vec[1:3] / sum(u15_vec[1:3]) d_rate <- dr_mat[dr_mat$gender == "male" & dr_mat$age %in% c("Under 1", "1_4", "5_9", "10_14"), "death_rate", drop = TRUE] } else { u15_vec <- u15_vec[4:6] / sum(u15_vec[4:6]) d_rate <- dr_mat[dr_mat$gender == "female" & dr_mat$age %in% c("Under 1", "1_4", "5_9", "10_14"), "death_rate", drop = TRUE] } # split out age groups # map: 1:4 == c("0_1", "1_4", "5_9", "10_14") u15_ages <- sample.int(4L, size = nrow(df), replace =TRUE, prob = c(u15_vec[1] * .2, u15_vec[1] * .8, u15_vec[2], u15_vec[3])) # proportionally allocate <1, 1-4 n_pop <- c(sum(u15_ages == 1L), sum(u15_ages == 2L), sum(u15_ages == 3L), sum(u15_ages == 4L)) # 02./03. simulate death_sim <- matrix(0, nrow = nsim, ncol = 4, dimnames = list(1:nsim, c("0_1", "1_4", "5_9", "10_14"))) for (i in 1:nsim) { death_sim[i, ] <- unlist(mapply(function(pop, dr) { sum(runif(n =pop) < dr) }, pop = n_pop, dr = d_rate / 100000)) # death rate = deaths per 100k people } } else { # (B) Ages not U15 # extract death rate for given age & gender group age_grp <- ht[, 2][which(df[, age][1] == ht[, 1])] gender <- tolower(as.character(df$gender[1])) d_rate <- dr_mat[dr_mat$age == age_grp & dr_mat$gender == gender, "death_rate", drop = TRUE] # 02./03. Simulate nsim times: death_sim <- vector("numeric", length = nsim) for (i in 1:nsim) { death_sim[i] <- sum(runif(n = nrow(df)) < d_rate / 100000) # death rate = deaths per 100k people } } # return return(death_sim) } ##--------------------------------------------------------- ## YOU WILL NEED THE FOLLOWING PACKAGES TO FULLY EXECUTE THIS CODE. ##--------------------------------------------------------- req_pkgs <- c("tigris", "rgdal", "ggplot2", "stringr", "maptools", "scales", "RColorBrewer", "gpclib", "sf", "acs", "retry", "data.table") needed_pkgs <- req_pkgs[!(req_pkgs %in% installed.packages()[, "Package"])] if (length(needed_pkgs) > 0) {install.packages(needed_pkgs)} ##--------------------------------------------------------- ## SECTION 3 - USER GUIDE ##--------------------------------------------------------- # http://api.census.gov/data/key_signup.html acs::api.key.install(key = "......") library("data.table") library("acs") library("retry") library("synthACS") ## PULL ACS DATA ca_geo <- geo.make(state = "CA", county = "*") ca_bachelors <- pull_bachelors(endyear = 2014, span = 5, geography = ca_geo) ### subsection: macroACS class ##---------------------- methods(class = "macroACS") ca_county_pop <- pull_population(endyear = 2014, span = 5, geography = ca_geo) get_span(ca_county_pop) get_endyear(ca_county_pop) get_geography(ca_county_pop) get_dataset_names(ca_county_pop) fetch_data(ca_county_pop, geography = c("Los Ang", "San Diego"), dataset = "estimate", choice = "median_age_by_sex") fetch_data(ca_county_pop, geography = "*", dataset = "st.err", choice = "median_age_by_sex") ### subsection: synthetic micro datasets ##---------------------- ## GENERATE SYNTHETIC DATSETS ca_dat_SMSM <- pull_synth_data(2014, 5, ca_geo) split_ca_dat <- split(ca_dat_SMSM, n_splits = 10) library("parallel") ca_synthetic <- derive_synth_datasets(ca_dat_SMSM, leave_cores = 0) ## marginalizing attributes ca_marg <- marginalize_attr(ca_synthetic, varlist = c("race", "ind_income", "nativity", "geog_mobility"), marginalize_out = TRUE) ## adding attributes: transit/work data ca_transit <- pull_transit_work(2014, 5, ca_geo) get_dataset_names(ca_transit) # setup / inputs transit_work <- gen_attr_vectors(ca_transit, "mode_transit_by_age") mode_levels <- c("no_work", "drove_alone", "carpool", "transit", "walk", "other", "work_at_home") round(transit_work[[1]], 0) # round(transit_work[[1]][1:49], 0) -- displayed output # augment attribute vector transit_work <- lapply(transit_work, function(x, lev) { # augment vectors unemp <- data.frame(emp_status = "unemployed", age = NA, pct = 1, level = "no_work") NI_LF <- data.frame(emp_status = "not_in_labor_force", age = NA, pct = 1, level = "no_work") # mung X x <- x[!grepl(paste(c("_all", "pct_"), collapse = "|"), names(x))] emp_status <- rep("employed", length = length(x)) age <- substr(names(x), nchar(names(x))-4, nchar(names(x))) age <- ifelse(substr(age, 1, 1) == "_", substr(age, 2, nchar(age)), age) age <- ifelse(age == "16_19", "15_17", ifelse(age == "20_24", "18_24", age)) levels <- substr(names(x), 1, nchar(names(x))-5) levels <- ifelse(substr(levels, nchar(levels), nchar(levels)) == "_", substr(levels, 1, nchar(levels)-1), levels) # replication for ages employed <- data.frame(emp_status = emp_status, age = age, pct = x, level = levels, stringsAsFactors = FALSE) rownames(employed) <- NULL; employed <- split(employed, employed$age) employed <- list(employed[['15_17']], employed[['18_24']], replicate(n = 4, employed[['25_44']], simplify =FALSE), replicate(n = 2, employed[['45_54']], simplify =FALSE), employed[['55_59']], employed[['60_64']], replicate(n = 5, employed[['65up']], simplify =FALSE)) employed[[3]][[1]]$age <- "25_29" employed[[3]][[2]]$age <- "30_34" employed[[3]][[3]]$age <- "35_39" employed[[3]][[4]]$age <- "40_44" employed[[4]][[1]]$age <- "45_49" employed[[4]][[2]]$age <- "50_54" employed[[7]][[1]]$age <- "65_69" employed[[7]][[2]]$age <- "70_74" employed[[7]][[3]]$age <- "75_79" employed[[7]][[4]]$age <- "80_84" employed[[7]][[5]]$age <- "85up" employed[[3]] <- do.call("rbind", employed[[3]]) employed[[4]] <- do.call("rbind", employed[[4]]) employed[[7]] <- do.call("rbind", employed[[7]]) return(data.table::rbindlist(list(unemp, NI_LF, data.table::rbindlist(employed)))) }, lev = mode_levels) ca_and_transit <- all_geog_synthetic_new_attribute(ca_marg, attr_name = "transit_work", conditional_vars = c("emp_status", "age"), st_list = transit_work) ### subsection: synthACS class ##---------------------- ## GENERATE CONSTRAINTS; CREATE CONSTRAINT LIST; OPTIMIZE MICRODATA methods(class = "synthACS") # A. do optimization with split/combine n_splits <- 10 split_ca_dat <- split(ca_dat_SMSM, n_splits = n_splits) tmp_opts <- vector("list", length= n_splits) for (i in 1:n_splits) { # derive synthetics tmp_synth <- derive_synth_datasets(split_ca_dat[[i]], leave_cores = 0) # create constraints for simulated annealing a <- all_geog_constraint_age(tmp_synth, method = "macro.table") g <- all_geog_constraint_gender(tmp_synth, method = "macro.table") m <- all_geog_constraint_marital_status(tmp_synth, method = "macro.table") r <- all_geog_constraint_race(tmp_synth, method = "synthetic") e <- all_geog_constraint_edu(tmp_synth, method = "synthetic") cll <- all_geogs_add_constraint(attr_name = "age", attr_total_list = a, macro_micro = tmp_synth) cll <- all_geogs_add_constraint(attr_name = "gender", attr_total_list = g, macro_micro = tmp_synth, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "marital_status", attr_total_list = m, macro_micro = tmp_synth, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "race", attr_total_list = r, macro_micro = tmp_synth, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "edu_attain", attr_total_list = e, macro_micro = tmp_synth, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "age", attr_total_list = a, macro_micro = tmp_synth) cll <- all_geogs_add_constraint(attr_name = "gender", attr_total_list = g, macro_micro = tmp_synth, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "marital_status", attr_total_list = m, macro_micro = tmp_synth, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "race", attr_total_list = r, macro_micro = tmp_synth, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "edu_attain", attr_total_list = e, macro_micro = tmp_synth, constraint_list_list = cll) # anneal tmp_opts[[i]] <- all_geog_optimize_microdata( tmp_synth, seed = 6550L, verbose = TRUE, constraint_list_list = cll, p_accept = 0.4, max_iter = 10000L ) } # create the string needed for combine_smsm(). paste0("tmp_opts[[", 1:n_splits, "]]", sep= ", ", collapse= "") # B. recombine: copy and paste the resulting string, excluding the final trailing comma opt_ca <- combine_smsm(tmp_opts[[1]], tmp_opts[[2]], tmp_opts[[3]], tmp_opts[[4]], tmp_opts[[5]], tmp_opts[[6]], tmp_opts[[7]], tmp_opts[[8]], tmp_opts[[9]], tmp_opts[[10]]) rm(a, g, m, r, e, tmp_synth, tmp_opts) ### subsection: examining outputs ##---------------------- LA_fit <- get_best_fit(opt_ca, geography = "Los Ang") LA_tae <- get_final_tae(opt_ca, geography = "Los Ang") plot_TAEpath(opt_ca, geography = "Santa Cl") plot_TAEpath(opt_ca, geography = "Yuba") summary(opt_ca) ##--------------------------------------------------------- ## SECTION 4 - SIMULATIONS OF FERTILITY AND MORTALITY ##--------------------------------------------------------- # Author: Alex Whitworth # Date: April - 2016; updated June-2016 # Desc: Example simulation of spatial microsimulation. # Simulation of births for all census tracts w/in LA county # References: # Hamilton, Brady E. et al. "Births: Final Data for 2014." # National Vital Statistical Reports Vol 64 No 12. # Xu JQ, Murphy S, Kochanek KD. "Deaths: Final data for 2013." # National Vital Statistics Reports Vol 64 No 2. #---------------------------------------------------------- require("tigris") require("rgdal") require("ggplot2") require("stringr") require("maptools") require("scales") require("RColorBrewer") require("gpclib") require("sf") # preferred by maptools, but not required: require(rgeos) # Warning (maptools v0.9-5): # support for gpclib will be withdrawn from maptools at the next major release maptools::gpclibPermit() #---------------------------------------------------------- # 01. Build synthetic data #---------------------------------------------------------- library("synthACS") library("parallel") la_geo <- geo.make(state = "CA", county = "Los Angeles", tract = "*") la_dat <- pull_synth_data(2014, 5, la_geo) # remove tracts w/ pop < 120 la_ind <- which(unlist(la_dat$estimates$age_by_sex[, 1]) < 120) la_dat$estimates <- lapply(la_dat$estimates, function(l, ind) {l[-ind, ]}, ind = la_ind) la_dat$standard_error <- lapply(la_dat$standard_error, function(l, ind) {l[-ind, ]}, ind = la_ind) la_dat$geography <- la_dat$geography[-la_ind, ] rm(la_ind) # B. do optimization with split/combine n_splits <- 75 split_la_dat <- synthACS::split(la_dat, n_splits = n_splits) tmp_opts <- vector("list", length= n_splits) for (i in 1:n_splits) { la_syn <- derive_synth_datasets(split_la_dat[[i]], leave_cores = 0) # 02. build constraint lists a <- all_geog_constraint_age(la_syn, method = "macro.table") g <- all_geog_constraint_gender(la_syn, method = "macro.table") m <- all_geog_constraint_marital_status(la_syn, method = "macro.table") r <- all_geog_constraint_race(la_syn, method = "synthetic") e <- all_geog_constraint_edu(la_syn, method = "synthetic") cll <- all_geogs_add_constraint(attr_name = "age", attr_total_list = a, macro_micro = la_syn) cll <- all_geogs_add_constraint(attr_name = "gender", attr_total_list = g, macro_micro = la_syn, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "marital_status", attr_total_list = m, macro_micro = la_syn, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "race", attr_total_list = r, macro_micro = la_syn, constraint_list_list = cll) cll <- all_geogs_add_constraint(attr_name = "edu_attain", attr_total_list = e, macro_micro = la_syn, constraint_list_list = cll) tmp_opts[[i]] <- all_geog_optimize_microdata(la_syn, prob_name = "p", constraint_list_list = cll, seed = 12389L, p_accept = 0.4, max_iter = 10000L) } # create the string needed for combine_smsm(). paste0("tmp_opts[[", 1:n_splits, "]]", sep= ", ", collapse= "") # copy and paste the resulting string, excluding the final trailing comma opt_la <- combine_smsm(tmp_opts[[1]], tmp_opts[[2]], tmp_opts[[3]], tmp_opts[[4]], tmp_opts[[5]], tmp_opts[[6]], tmp_opts[[7]], tmp_opts[[8]], tmp_opts[[9]], tmp_opts[[10]], tmp_opts[[11]], tmp_opts[[12]], tmp_opts[[13]], tmp_opts[[14]], tmp_opts[[15]], tmp_opts[[16]], tmp_opts[[17]], tmp_opts[[18]], tmp_opts[[19]], tmp_opts[[20]], tmp_opts[[21]], tmp_opts[[22]], tmp_opts[[23]], tmp_opts[[24]], tmp_opts[[25]], tmp_opts[[26]], tmp_opts[[27]], tmp_opts[[28]], tmp_opts[[29]], tmp_opts[[30]], tmp_opts[[31]], tmp_opts[[32]], tmp_opts[[33]], tmp_opts[[34]], tmp_opts[[35]], tmp_opts[[36]], tmp_opts[[37]], tmp_opts[[38]], tmp_opts[[39]], tmp_opts[[40]], tmp_opts[[41]], tmp_opts[[42]], tmp_opts[[43]], tmp_opts[[44]], tmp_opts[[45]], tmp_opts[[46]], tmp_opts[[47]], tmp_opts[[48]], tmp_opts[[49]], tmp_opts[[50]], tmp_opts[[51]], tmp_opts[[52]], tmp_opts[[53]], tmp_opts[[54]], tmp_opts[[55]], tmp_opts[[56]], tmp_opts[[57]], tmp_opts[[58]], tmp_opts[[59]], tmp_opts[[60]], tmp_opts[[61]], tmp_opts[[62]], tmp_opts[[63]], tmp_opts[[64]], tmp_opts[[65]], tmp_opts[[66]], tmp_opts[[67]], tmp_opts[[68]], tmp_opts[[69]], tmp_opts[[70]], tmp_opts[[71]], tmp_opts[[72]], tmp_opts[[73]], tmp_opts[[74]], tmp_opts[[75]]) rm(la_syn, tmp_opts, split_la_dat) rm(a, g, m, r, e, cll) #---------------------------------------------------------- # SIMULATIONS # 04. simulate births #---------------------------------------------------------- # (A) input data to simulation ----------- # Tables 15, 16 from (Hamilton et al; 2014) pct_births_unmar <- c(u15 = 99.4, "15_17" = 96.6, "18_19" = 87.0, "20_24" = 65.7, "25_29" = 36.7, "30_34" = 22.5, "35_39" = 21.6, "40up" = 24.3) # Tables 4, 7 from (Hamilton et al; 2014) fr_list <- list(asian = c(u15 = 0.1, "15_17" = 3.3, "18_19" = 13.9, "20_24" = 37.5, "25_29" = 90.0, "30_34" = 121.3, "35_39" = 68.9, "40_44" = 16.1, "45up" = 1.5), black_aa = c(u15 = 0.6, "15_17" = 16.7, "18_19" = 61.9, "20_24" = 102.6, "25_29" = 103.1, "30_34" = 79.4, "35_39" = 42.6, "40_44" = 10.2, "45up" = 0.8), hispanic = c(u15 = 0.4, "15_17" = 19.3, "18_19" = 66.1, "20_24" = 104.5, "25_29" = 118.7, "30_34" = 96.5, "35_39" = 53.6, "40_44" = 13.5, "45up" = 0.9), nat_amer = c(u15 = 0.3, "15_17" = 13.2, "18_19" = 48.6, "20_24" = 73.2, "25_29" = 74.7, "30_34" = 52.3, "35_39" = 24.1, "40_44" = 5.5, "45up" = 0.3), pac_isl = c(u15 = 0.1, "15_17" = 3.3, "18_19" = 13.9, "20_24" = 37.5, "25_29" = 90.0, "30_34" = 121.3, "35_39" = 68.9, "40_44" = 16.1, "45up" = 1.5), two_more = c(u15 = 0.3, "15_17" = 10.9, "18_19" = 43.8, "20_24" = 79.0, "25_29" = 105.8, "30_34" = 100.8, "35_39" = 51.0, "40_44" = 10.6, "45up" = 0.8), # using 'all' category, no breakout in data summaries white = c(u15 = 0.2, "15_17" = 10.2, "18_19" = 42.0, "20_24" = 77.3, "25_29" = 108.6, "30_34" = 103.9, "35_39" = 51.2, "40_44" = 10.2, "45up" = 0.7)) # Tables 27 from (Hamilton et al; 2014) mb_list <- list(c("twins" = 33.9, "triplets" = 1.2), c("twins" = 40, "triplets" = 0.9), c("twins" = 24.1, "triplets" = 0.7), c("twins" = 33.9, "triplets" = 1.2), c("twins" = 33.9, "triplets" = 1.2), c("twins" = 33.9, "triplets" = 1.2), c("twins" = 36.7, "triplets" = 1.4)) # adjust fertility rates downward as adjustment for CA vs USA # Tables 12 from (Hamilton et al; 2014) us_fr <- c(u15 = 0.3, "15_17" = 10.9, "18_19" = 43.8, "20_24" = 79.0, "25_29" = 105.8, "30_34" = 100.8, "35_39" = 51.0, "40_44" = 10.6, "45up" = 0.8) ca_fr <- c(u15 = 0.2, "15_17" = 9.6, "18_19" = 38.0, "20_24" = 66.8, "25_29" = 93.9, "30_34" = 106.1, "35_39" = 63.8, "40_44" = 15.0, "45up" = 1.3) adj_fr <- function(x, us, st) {return(round(x * st / us, 8))} fr_list <- lapply(fr_list, adj_fr, us = us_fr, st = ca_fr); rm(us_fr, ca_fr, adj_fr) # (B) Run simulation ----------- la_tracts <- lapply(opt_la$best_fit, function(l) split(l, l$race)) nnodes <- parallel::detectCores() cl <- parallel::makeCluster(nnodes, type = "PSOCK") clusterExport(cl, varlist = c("births", "births_race", "sim_births", "mult_births")) births_la <- parallel::clusterMap(cl, RECYCLE = TRUE, SIMPLIFY = FALSE, .scheduling = "dynamic", fun = births, df_list = la_tracts, fr_vec_list = list(fr_list), pct_births_unmar = list(pct_births_unmar), mult_births_list = list(mb_list), seed = 234674L, nsim = 10000L) parallel::stopCluster(cl) names(births_la) <- names(la_tracts) rm(cl, pct_births_unmar, births, births_race, mult_births, sim_births, mb_list, fr_list, la_tracts) # 05. simulate deaths #---------------------------------------------------------- # (A) input data to simulation ----------- ## exact breakouts by age and race from base tables: # B01001B (black_aa), B01001C (nat_amer), B01001D (asian), B01001E (pac_isl), B01001G (2 or more), # B01001H (white, not hisp), B01001I (hispanic) u15 <- list( asian = c("m_u5" = 31127, "m_5_9" = 30208, "m_10_14" = 33646, "f_u5" = 29529, "f_5_9" = 29137, "f_10_14" = 32104), black_aa = c("m_u5" = 24553, "m_5_9" = 24822, "m_10_14" = 26775, "f_u5" = 23020, "f_5_9" = 23843, "f_10_14" = 25963), hispanic = c("m_u5" = 206083, "m_5_9" = 201089, "m_10_14" = 204756, "f_u5" = 198169, "f_5_9" = 193272, "f_10_14" = 197382), nat_amer = c("m_u5" = 1643, "m_5_9" = 1787, "m_10_14" = 1723, "f_u5" = 1550, "f_5_9" = 1446, "f_10_14" = 1621), pac_isl = c("m_u5" = 955, "m_5_9" = 900, "m_10_14" = 933, "f_u5" = 770, "f_5_9" = 742, "f_10_14" = 960), two_more = c("m_u5" = 26179, "m_5_9" = 20097, "m_10_14" = 19653, "f_u5" = 23961, "f_5_9" = 19164, "f_10_14" = 17868), white = c("m_u5" = 54441, "m_5_9" = 54949, "m_10_14" = 56844, "f_u5" = 51811, "f_5_9" = 51608, "f_10_14" = 53760) ) data("AgeRaceDR", package = "synthACS") AgeRaceDR <- AgeRaceDR[AgeRaceDR$age != "All ages" & AgeRaceDR$gender != "both", ] dr_mat <- list(asian = AgeRaceDR[AgeRaceDR$race == "asian.isl", ], black_aa = AgeRaceDR[AgeRaceDR$race == "black", ], hispanic = AgeRaceDR[AgeRaceDR$race == "hispanic", ], nat_amer = AgeRaceDR[AgeRaceDR$race == "nat.amer", ], pac_isl = AgeRaceDR[AgeRaceDR$race == "asian.isl", ], two_more = AgeRaceDR[AgeRaceDR$race == "all", ], white = AgeRaceDR[AgeRaceDR$race == "white", ] ) rm(AgeRaceDR) # (B) Run simulation ----------- la_tracts <- lapply(opt_la$best_fit, function(l) split(l, l$race)) la_tracts <- lapply(la_tracts, function(l) {names(l) <- c("asian", "black_aa", "hispanic", "nat_amer", "pac_isl", "two_more", "white"); return(l)}) nnodes <- parallel::detectCores() cl <- parallel::makeCluster(nnodes, type = "PSOCK") clusterExport(cl, varlist = c("deaths", "deaths_race", "sim_deaths")) deaths_la <- parallel::clusterMap(cl, RECYCLE = TRUE, SIMPLIFY = FALSE, .scheduling = "dynamic", fun = deaths, df_list = la_tracts, dr_mat = list(dr_mat), u15_vec_list = list(u15), seed = 234674L, nsim = 10000L) parallel::stopCluster(cl) names(deaths_la) <- names(la_tracts) rm(cl, dr_mat, u15, la_tracts, deaths, deaths_race, sim_deaths, nnodes) # (C) Extract outputs ----------- # birth_q <- birth_quantiles(births_la, quantiles = c(.1, .5, .9)) # death_q <- death_quantiles(deaths_la, quantiles = c(.1, .5, .9)) la_b50 <- birth_quantiles(births_la, quantiles = .5) la_d50 <- death_quantiles(deaths_la, quantiles = .5) la50 <- data.frame(la_b50, la_d50); rm(la_b50, la_d50) #---------------------------------------------------------- # 06. Mapping -- See library requirements note (~line 515) #---------------------------------------------------------- # remove geo-data which don't have simulation data (census tracts < 120 people) idx <- which(rownames(la_dat$estimates$age_by_sex) %in% rownames(la50)) la_dat$estimates <- lapply(la_dat$estimates, function(l, ind) {l[ind,]}, ind= idx) la_dat$standard_error <- lapply(la_dat$standard_error, function(l, ind) {l[ind,]}, ind= idx) la_dat$geography <- la_dat$geography[idx,] rm(idx) la50$pop <- la_dat$estimates$age_by_sex[, 1] la50$male_pop <- la_dat$estimates$age_by_sex[, 2] la50$female_pop <- la_dat$estimates$age_by_sex[, 3] la50 <- data.frame(geoid = paste0(str_pad(la_dat$geography$state, 2, "left", pad = "0"), str_pad(la_dat$geography$county, 3, "left", pad = "0"), str_pad(la_dat$geography$tract, 6, "left", pad = "0")), la50) la_geo <- tracts(state = "CA", county = "Los Angeles", cb = TRUE) la_geo <- geo_join(la_geo, la50, "GEOID", "geoid") la_geo <- la_geo[la_geo$ALAND > 0, ] la_geo2 <- fortify(la_geo, region = "geoid") data("la_hospitals", package = "synthACS") ## 50th percentile plots #---------------------------------------------------------- ### BIRTHS png("./la_births.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total)) + lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Births by Census Tract \n Los Angeles County, CA") + scale_fill_gradientn(name = "50th Percentile - Births", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0, 300, 100), limits = c(0,310)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### + hospitals png("./la_births_hosp.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total)) + geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) + lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Births with Hospital Locations \n Los Angeles County, CA") + scale_fill_gradientn(name = "50th Percentile - Births", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0, 300, 100), limits = c(0,310)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### DEATHS png("./la_deaths.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total.1)) + lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Deaths by Census Tract \n Los Angeles County, CA") + scale_fill_gradientn(name = "50th Percentile - Deaths", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0, 150, 25), limits = c(0, 135)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### + hospitals png("./la_deaths_hosp.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total.1)) + geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) + lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Deaths with Hospital Locations \n Los Angeles County, CA") + scale_fill_gradientn(name = "50th Percentile - Deaths", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0, 150, 25), limits = c(0, 135)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### HISPANIC BIRTHS png("./hisp_births.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = hispanic)) + lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Births to Hispanic Mothers by Census Tract \n Los Angeles County, CA") + scale_fill_gradientn(name = "50th Percentile - Births", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0, 200, 50), limits = c(0, 160)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### DEATHS OVER AGE 65 m <- data.frame(la_geo2[, 32:36]); m$geometry <- NULL la_geo2$d_over65 <- base::apply(m, 1, sum) png("./la_deaths65.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = d_over65)) + lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Deaths (Age > = 65y.o.) \n Los Angeles County, CA") + scale_fill_gradientn(name = "50th Percentile - Deaths", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0,100,25), limits = c(0, 110)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ## 90th percentile plots #---------------------------------------------------------- la_b90 <- birth_quantiles(births_la, quantiles = .9) la_d90 <- death_quantiles(deaths_la, quantiles = .9) la90 <- data.frame(la_b90, la_d90); rm(la_b90, la_d90) la90 <- data.frame(geoid = paste0(str_pad(la_dat$geography$state, 2, "left", pad = "0"), str_pad(la_dat$geography$county, 3, "left", pad = "0"), str_pad(la_dat$geography$tract, 6, "left", pad = "0")), la90) la_geo <- tracts(state = "CA", county = "Los Angeles", cb = TRUE) la_geo <- geo_join(la_geo, la90, "GEOID", "geoid") la_geo <- la_geo[la_geo$ALAND > 0, ] la_geo2 <- fortify(la_geo, region = "geoid") ### BIRTHS png("./la_births90.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total)) + lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Births by Census Tract \n Los Angeles County, CA") + scale_fill_gradientn(name = "90th Percentile - Births", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0,300, 100), limits = c(0, 310)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### + hospitals png("./la_births90_hosp.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total)) + geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) + lims(x = c(-119, -117.6), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Births with Hospital Locations \n Los Angeles County, CA") + scale_fill_gradientn(name = "90th Percentile - Births", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0,300, 100), limits = c(0, 310)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### DEATHS png("./la_deaths90.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total.1)) + lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Deaths by Census Tract \n Los Angeles County, CA") + scale_fill_gradientn(name = "90th Percentile - Deaths", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0, 150, 50), limits = c(0, 135)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off() ### + hospitals png("./la_deaths90_hosp.png", height = 600, width = 600, units = "px") ggplot(la_geo2) + geom_sf(mapping = aes(fill = total.1)) + geom_point(data = la_hospitals, aes(x = geo_long, y = geo_lat), colour = "red", size = rel(1)) + lims(x = c(-119, -117.5), y = c(33.65, 34.8)) + labs(x = "Longitude", y = "Latitude", title = "Number of Deaths with Hospital Locations \n Los Angeles County, CA") + scale_fill_gradientn(name = "90th Percentile - Deaths", colours = c("#eff3ff", "#c6dbef", "#9ecae1", "#6baed6", "#4292c6", "#3182bd", "#08519c"), breaks = seq(0, 150, 50), limits = c(0, 135)) + theme(legend.position = "bottom", legend.title = element_text(face = "bold", size = 11), legend.background = element_rect(colour = "black", fill = "#DCDCDC"), title = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold.italic", size = 11), axis.text = element_text(size = 10)) dev.off()