library("conting") ############################################################################################ ## ## ## Section 4.1: Alcohol, Obesity and Hypertension Complete Contingency Table ## ## ## ############################################################################################ set.seed(1) ## Set a seed for reproducibility data("AOH", package = "conting") ## Load the AOH data aoh_ex <- bcct(formula = y ~ alc * hyp * obe, data = AOH, n.sample = 1000, prior = "UIP") ## Initially request 1000 iterations of the MCMC algorithm for a complete contingency table. ## The maximal model is the saturated model including the 3-way interaction. aoh_ex <- bcctu(object = aoh_ex, n.sample = 9000) ## Request a further 9000 iterations. aoh_ex ## Print out the resultant object. This will give a brief summary of the analysis performed. ts.plot(aoh_ex$BETA[, 1], ylab = "Intercept parameter", xlab = "Iteration number") ## Produce a trace plot of the intercept parameter to check MCMC convergence. aoh_ex_summ <- summary(aoh_ex, n.burnin = 5000, cutoff = 0.05, best = 4) aoh_ex_summ ## Create a more detailed summary including posterior summary statistics. Use a burn-in phase ## of 5000 iterations. Provide posterior summary statistics for the log-linear parameters only ## where the posterior probability is greater than 0.05. Provide posterior model probabilities ## only for the four best models. aoh_ex_summ ## Print out this summary sub_model(object = aoh_ex, order = 2, n.burnin = 5000) ## Produce summary statistics for the model with second highest posterior model ## probability. ############################################################################################ ## ## ## Section 4.2: Risk factors for coronary heart disease Complete Contingency Table ## ## ## ############################################################################################ set.seed(1) ## Set a seed for reproducibility data("heart", package = "conting") ## Load the heart data heart_ex <- bcct(formula = y ~ (A + B + C + D + E + F)^6, data = heart, n.sample = 50000, start.formula = y ~ (A + B + C + D + E + F)^2, prior = "UIP") ## Request 50000 iterations of the MCMC algorithm for complete contingency tables. Let the ## maximal model be the saturated model with the 6-way interaction. Start the MCMC from ## the model only including all two-way interactions. Note that, unlike the example in the ## paper, this will not save output to external files. That can be accomplished by setting ## the "save" argument. First, set a sensible working directory first using the function ## setwd() and then use the following code: ## ## set.seed(1) ## heart_ex <- bcct(formula = y ~ (A + B + C + D + E + F)^6, data = heart, ## n.sample = 50000, start.formula = y ~ (A + B + C + D + E + F)^2, ## save = 1000, prior = "UIP") ## ## list.files() ## This will show you the output files in the working directory. ## ## unlink(x = c("BETA.txt", "MHACC.txt", "MODEL.txt", "RJACC.txt", "SIG.txt")) ## This will delete the external files. However you can still use the following code since ## the MCMC output has been read back into R. accept_rate(heart_ex) ## This will calculate the acceptance rates of the MH and RJ algorithms. heart_ex_bp <- bayespval(heart_ex, n.burnin=10000, statistic="deviance") heart_ex_bp ## Assess model adequacy using a burn-in of 10000 and the deviance discrepancy statistic. inter_stats(heart_ex, n.burnin = 10000, prob.level = 0.99) ## Produce posterior summary statistics for log-linear parameters, after a burn-in of ## 10000 and where the HPDIs have a probability content of 0.99. mod_probs(heart_ex, n.burnin = 10000, best = 4) ## Produce posterior model probabilities for the four best models after a burn-in of ## 10000. ############################################################################################ ## ## ## Section 4.3: Spina Bifida Incomplete Contingency Table ## ## ## ############################################################################################ data("spina", package = "conting") ## Load the rabbit data spina ## Print the data set.seed(1) ## Set a seed for reproducibility spina_ex_ind <- bict(formula = y ~ S1 + S2 + S3 + eth, data = spina, n.sample = 25000, null.move.prob = 1) ## For single model inference set the required model as the maximal model (here: the ## independence model with no interactions) and set the probability of null moves to 1 so ## that we will not propose a move to another model. Request 25000 iterations of the ## incomplete contingency table MCMC algorithm. summary(spina_ex_ind, n.burnin = 5000) ## Summarise this model using a burn-in of 5000 and the X2 discrepancy statistic. ## Bayesian p-value is small so we can conclude that it is inadequate. set.seed(1) ## Set a seed for reproducibility spina_ex <- bict(formula = y ~ (S1 + S2 + S3 + eth)^2, data = spina, n.sample = 25000) ## Allow for model uncertainty and request 25000 iterations of the incomplete contingency ## table MCMC algorithm. spina_ex_summ <- summary(spina_ex, n.burnin = 5000) ## Summarise the model using a burn-in of 5000 and the Freeman-Tukey discrepancy ## statistic.x spina_ex_summ ## Print the summary spina_tot <- total_pop(spina_ex, n.burnin = 5000) ## Use the total_pop function to access the MCMC sample from the total population size ## after a burn-in of 5000. round(mean(1/spina_tot$TOT) / mean(1/(spina_tot$TOT^2)),0) ## Calculate a point estimate derived from minimising the posterior expectation of the ## relative square error loss function. ############################################################################################ ## ## ## Section 4.4: Estimating PWID in Scotland Incomplete Contingency Table ## ## ## ############################################################################################ data("ScotPWID", package = "conting") ## Load the ScotPWID data cens <- find_cens(sources = ~ S1 + S2 + S3 + S4, cens_source = ~S4, data = ScotPWID) ## Find the censored cells by specifying the four sources as a formula for the argument ## "sources" and telling R that the "S4" source is censored using the argument ## "cens_source". ScotPWID[cens,] ## Print out the censored cells. set.seed(1) ## Set a seed for reproducibility ## NOTE THAT THE CODE BELOW WILL TAKE SEVERAL HOURS TO COMPLETE. scot_ex <- bict(formula = y ~ (S1 + S2 + S3 + S4 + Age + Gender + Region)^2, data = ScotPWID, cens = cens, n.sample = 2000000) ## Request 2 million iterations of the MCMC algorithm for incomplete contingency tables, ## using the Sabanes-Bove & Held prior and telling R which cells are censored usin the ## cens argument. Note that, unlike the example in the paper, this will not save output to ## external files. That can be accomplished by setting the "save" argument. First, set a ## sensible working directory using the function setwd() and then use the following code: ## ## scot_ex <- bict(formula = y ~ (S1 + S2 + S3 + S4 + Age + Gender + Region)^2, data = ScotPWID, cens = cens, ## n.sample = 2000000, save = 1000) ## ## list.files() ## This will show you the output files in the working directory. ## ## unlink(x = c("BETA.txt", "MHACC.txt", "MODEL.txt", "RJACC.txt", "SIG.txt", "Y0.txt")) ## This will delete the external files. However you can still use the following code since ## the MCMC output has been read back into R. scot_ex_bp <- bayespval(scot_ex, n.burnin = 200000, thin = 5, statistic = "FreemanTukey") ## Assess model adequacy using a burn-in of 200000 and the Freeman-Tukey discrepancy statistic. scot_ex_bp ## Print out the model adequacy assessment. inter_probs(scot_ex, n.burnin = 200000, thin = 5, cutoff = 0.70) ## Provide the posterior probability of the log-linear parameters after a burn-in of ## 200000 iterations and thinning the MCMC output to every 5 iterations. Note that only ## log-linear parameters with posterior probability greater than 0.70 will be displayed. scot_ex_tot <- total_pop(scot_ex, n.burnin = 200000, thin = 5) ## Access the MCMC sample from the total population size after a burn-in of 200000 ## iterations. scot_ex_tot ## Print this object to get summary statistics on the posterior distribution of the total ## population size. plot(scot_ex_tot) ## Plot a histogram of the MCMC sample from the total population size.