## This file contains the code required to reproduce the results presented ## in the paper. Before running the code, make sure you have the necessary packages ## installed. Open the file in your R environment and execute it. This should ## reproduce the results from the paper, including any tables, figures, or ## statistical analyses. The code may generate output files, plots, or tables. ## Check the code comments or documentation for details on where to find these ## outputs. ## * packages library("hdpGLM") library("PReMiuM") ## library("magrittr") library("glue") library("tictoc") library("SimDesign") library("ggplot2") ## * setup theme_set(theme_bw() + theme(legend.position = "top") + theme(strip.background = element_rect(colour="white", fill="white"), strip.text.x = element_text(size=11, face='bold', hjust=0), strip.text.y = element_text(size=11, face="bold", vjust=0), legend.background= element_rect(fill='transparent'), legend.key.height = grid::unit(.1, "cm"), legend.key.width = grid::unit(.8, "cm") ) ) ## * argument constant ## Example ## Parameters of prior distribution for a DPP with gaussian outcome ## that regress an outcome on two covariates const_list = list( ## For beta (vector of size tree for intercept and two linear coefficients) mu_beta = c(0, 0, 0), Sigma_beta = diag(3), ## For the sigma_k df_sigma=10, s2_sigma=10, ## for SBC alpha=1 ) ## * str of hdpGLM object, output of hdpGLM() options(digits = 5) set.seed(777) sim <- hdpGLM_simulateData(n = 200, nCov = 2, K = 2, nCovj = 1, J = 5, family = "gaussian") df <- sim$data mod <- hdpGLM(y ~ X1 + X2, y ~ W1, data = df, K = 50, mcmc = list(burn.in = 0, n.iter = 100), n.display = 30) mod$max_active class(mod) str(mod) ## * Summary of hdpGLM summary(mod) summary_tidy(mod) ## * Visualization (non-hierarchical DPP) set.seed(777) ## Simulate data to show examples of visualization using DPP model sim <- hdpGLM_simulateData(n = 500, nCov = 2, K = 3, nCovj = 1, J = 1, family = "gaussian") ## Estimate the model mod <- hdpGLM(y ~ X1 + X2, data = sim$data, mcmc = list(burn.in = 0, n.iter = 100)) ## Plots ## ----- g <- plot(mod); g ## with option to plot clusters separately g <- plot(mod, separate = T); g ## with option to plot clusters separately and selecting covars to display plot(mod, separate=T, term=c('X1', 'X2')) ## * Visualization (hierarchical DPP) set.seed(777) sim <- hdpGLM_simulateData(n = 500, nCov = 2, K = 3, nCovj = 1, J = 5, family = "gaussian") ## creating context labels sim$data <- sim$data %>% dplyr::mutate(school = paste0("School ", toupper(letters[C]))) ## estimation mod <- hdpGLM(y ~ X1 + X2, y ~ W1, data = sim$data, context.id = "school", mcmc = list(burn.in = 0, n.iter = 100)) ## Plots ## ----- g <- plot(mod, context.id = "school", term=c("X1", "X2")); g g <- plot_tau(mod); g g <- plot_pexp_beta(mod, smooth.line = T); g ## * Classification set.seed(777) ## Simulate data sim <- hdpGLM_simulateData(n = 500, nCov = 2, K = 3, nCovj = 1, J = 1, family = "gaussian") ## estimate the model mod <- hdpGLM(y ~ X1 + X2, data = sim$data, mcmc = list(burn.in = 0, n.iter = 100)) ## classify the data points hdpGLM_classify(data=sim$data, samples=mod) %>% head() ## * Examples ## ** Applied (DPP) data("welfare", package = "hdpGLM") head(welfare) welfare %>% tibble::as_tibble() mod <- lm(support ~ inequality + income + ideology, data = welfare) summary(mod) set.seed(777) mcmc <- list(burn.in = 1000, n.iter = 5000) mod <- hdpGLM(support ~ inequality + income + ideology, data = welfare, mcmc = mcmc, K = 30) summary(mod) nclusters(mod) classify(welfare, mod) %>% head() ## ** Applied (hDPP) help(welfare2) data("welfare2", package = "hdpGLM") head(welfare2) ## adding country labels welfare2 <- welfare2 %>% dplyr::mutate(country = paste0("Ctr ", country) ) ## estimation set.seed(777) mcmc <- list(burn.in = 1, n.iter = 50) mod <- hdpGLM(support ~ inequality + income + ideology, context.id = "country", support ~ gap, data = welfare2, mcmc = mcmc) summary_tidy(mod) plot(mod, ncol = 4, context.id = "country") plot_pexp_beta(mod, smooth.line = T) plot_tau(mod) ## saving the plots g <- plot(mod, ncol = 4, context.id = "country"); g g <- plot_pexp_beta(mod, smooth.line = T); g ## ** Simulated (DPP) ## Complete example ## ---------------- library(hdpGLM) set.seed(777) ## simlating model parameters of the DPP regression parameters <- hdpGLM_simParameters(nCov = 3, K = 2) ## setting all linear covariates of second cluster to be equal the first parameters$beta[[2]] <- parameters$beta[[1]] ## setting beta2 of the first cluster to be 2 parameters$beta[[1]]['beta2'] <- 2 ## setting beta2 of the second cluster to be -1.5 parameters$beta[[2]]['beta2'] <- -1.5 parameters ## simulating the data sim <- hdpGLM_simulateData(n = 2000, nCov = 3, K = 2, parameters = parameters, family = "gaussian") str(sim) head(sim$data) df <- sim$data mod <- hdpGLM(y ~ X1 + X2 + X3, data = df, mcmc = list(burn.in = 500, n.iter = 10000)) summary(mod) ## summary_tidy(mod) g <- plot(mod, separate = TRUE, ncol = 4); g dfc <- hdpGLM_classify(data = df, samples = mod) head(dfc) ## * Session info sessionInfo()