#---------------------------------- #load the library and the data library("BNPmix") data(CPP, package = "BNPmix") #---------------------------------- ### Univariate (Sec. 5.1) #---------------------------------- y <- CPP[CPP$hosp == 11,] # Prior elicitation assuming 3 clusters DPprior <- PYcalibrate(Ek = 3, n = nrow(y), discount = 0) prior <- list(strength = DPprior$strength, discount = 0) mcmc <- list(niter = 5000, nburn = 4000) output <- list(grid = seq(30, 46, length.out = 100), out_type = "FULL") # Run the estimation procedure and plot the results set.seed(42) fit1 <- PYdensity(y = y$gest, mcmc = mcmc, prior = prior, output = output) print(fit1) plot(fit1, show_hist = TRUE, xlab = "gestational age") plot(fit1, band = FALSE, show_clust = TRUE, xlab = "gestational age") #----------------------------------- # Multivariate example 1 (Sec. 5.2a) #----------------------------------- # Define y and tuning parameters y <- cbind(CPP$gest, log(CPP$dde), CPP$weight) prior <- list(strength = 0.05, discount = 0.1) grid <- expand.grid(seq(30, 46, length.out = 25), seq(0.7, 5.2, length.out = 25), seq(0, 130, length.out = 25)) output = list(grid = grid, out_type = "FULL") mcmc <- list(niter = 2000, nburn = 1000, m_imp = 100) # Run the estimation procedure set.seed(42) fit2 <- PYdensity(y = y, mcmc = mcmc, prior = prior, output = output) # Plot the results (intensive computation, be patient) p12 <- plot(fit2, dim = c(1,2), show_clust = TRUE, xlab="gestational age", ylab="log(DDE)") p21 <- plot(fit2, dim = c(2,1), show_clust = TRUE, ylab="gestational age", xlab="log(DDE)") p13 <- plot(fit2, dim = c(1,3), show_clust = TRUE, xlab="gestational age", ylab="weight") p23 <- plot(fit2, dim = c(2,3), show_clust = TRUE, xlab="log(DDE)", ylab="weight") p32 <- plot(fit2, dim = c(3,2), show_clust = TRUE, ylab="log(DDE)", xlab="weight") p31 <- plot(fit2, dim = c(3,1), show_clust = TRUE, ylab="gestational age", xlab="weight") p1 <- plot(fit2, dim = c(1,1), show_clust = TRUE, xlab="gestational age", ylab="density") p2 <- plot(fit2, dim = c(2,2), show_clust = TRUE, xlab="log(DDE)", ylab="density") p3 <- plot(fit2, dim = c(3,3), show_clust = TRUE, xlab="weight", ylab="density") gridExtra::grid.arrange( p1, p12, p13, p21, p2, p23, p31, p32, p3, layout_matrix = matrix(1:9,3,3)) #---------------------------------- # Multivariate example 2 (Sec. 5.2b) #---------------------------------- output <- list(out_type = "CLUST") mcmc <- list(niter = 5000, nburn = 4000, model = "DLS", method = "MAR") # Simulate the synthetic data set p <- 25 set.seed(42) ysim <- rbind( mnormt::rmnorm(50, mean = rep(1, p), varcov = diag(1, p)), mnormt::rmnorm(40, mean = sample(1:5, p, rep=TRUE), varcov = diag(1, p)), matrix(rt(10*p, df = 2), 10, p) * 2) ysim <- scale(ysim) # Fit the PY mixture prior <- list(strength = 1, discount = 0.1, hyper = TRUE) fit.sim <- PYdensity(y = ysim, mcmc = mcmc, prior = prior, output = output) # Explore the clustering structure fit.sim.part <- partition(fit.sim) sort(ftable(fit.sim.part$partitions[3, ]), decreasing = T) dissmat <- dist(1-fit.sim.part$psm) clus <- hclust(dissmat) heatmap(as.matrix(dissmat), Rowv = as.dendrogram(hclust(dissmat)), Colv = NA, labRow = FALSE, labCol = FALSE) pdf("dendro.pdf",7,5) heatmap(as.matrix(dissmat), Rowv = as.dendrogram(hclust(dissmat)), Colv = NA, labRow = FALSE, labCol = FALSE) dev.off() #--------------------------------------- # Density regression example (Sec. 5.3) #--------------------------------------- # Prior initialization and parameter setting PYpar <- PYcalibrate(Ek = 3, n = nrow(CPP), discount = 0.25) prior <- list(strength = PYpar$strength, discount = PYpar$discount) grid_y <- seq(26, 47, length.out = 100) grid_x <- round(quantile(CPP$dde, c(0,0.25,.5,.75,.95,.99))) mcmc <- list(niter = 5000, nburn = 4000) output = list(grid_x = grid_x, grid_y = grid_y, out_type = "FULL", out_param = TRUE) # Fit the model set.seed(42) fit.reg <- PYregression(y = CPP$gest, x = CPP$dde, prior = prior, mcmc = mcmc, output = output) # Plot the results regplot <- data.frame( dens = as.vector(apply(fit.reg$density, c(1,2), mean)), qlow = as.vector(apply(fit.reg$density, c(1,2), quantile, probs = 0.025)), qupp = as.vector(apply(fit.reg$density, c(1,2), quantile, probs = 0.975)), grid = rep(grid_y, 6), label = factor(rep(paste("DDE = ", grid_x), each = length(grid_y)), level = rep(paste("DDE = ", grid_x)))) library(ggplot2) ggplot(regplot) + theme_bw() + geom_line(data = regplot, map = aes(x = grid, y = dens)) + geom_ribbon(data = regplot, map = aes(x = grid, ymin = qlow, ymax = qupp), fill = "blue", alpha = 0.3) + facet_wrap(~label, ncol = 3, nrow = 2) + labs(x = "gestational age", y = "density") #------------------------------------------------------ # Density estimation for correlated samples (Sec. 5.4) #------------------------------------------------------ # Parameter setting mcmc <- list(niter = 5000, nburn = 4000) output <- list(grid = seq(30, 46, length.out = 100), out_type = "FULL") # Estimate the model fit.ddp <- DDPdensity(y = CPP$gest, group = CPP$hosp, mcmc = mcmc, output = output) fit.ddp$group <- factor(CPP$hosp, labels = paste("Hospital ", levels(CPP$hosp))) plot(fit.ddp, wrap_dim = c(3,4), ylab = "density", xlab = "gestiational age")