############################################################################# # mipfp: A R Package for Multidimensional Array Fitting # # and Simulating Multivariate Bernouilli Distributions # # # # This R script can be used to replicate the results of the paper. # # Version: 2 May 2016 # # Authors: J. Barthelemey and T. Suesse # # License: GPL-2 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) # ############################################################################# # loading the package library("mipfp") # setting a random seed set.seed(1234) # --------------------------------------------------------------- # Updating a initial contingency table according to known margins # --------------------------------------------------------------- # loading the data data("spnamur", package = "mipfp") # subsetting the data frame, keeping only the first 3 variables spnamur.sub <- subset(spnamur, select = Household.type:Prof.status) # true table true.table <- table(spnamur.sub) # extracting the margins tgt.hht <- apply(true.table, 1, sum) tgt.hht.gen <- apply(true.table, c(1,2), sum) tgt.gen.pro <- apply(true.table, c(2,3), sum) tgt.data <- list(tgt.hht, tgt.hht.gen, tgt.gen.pro) tgt.list.dims <- list(1, c(1,2), c(2,3)) # creating the seed, a 10% sample of spnamur sample.idx <- sample(nrow(spnamur), ceiling(nrow(spnamur) * 0.10)) seed.df <- spnamur.sub[sample.idx, ] seed.table <- table(seed.df) # applyting the different fitting methods r.ipfp <- Estimate(seed = seed.table, target.list = tgt.list.dims, target.data = tgt.data, method = "ipfp") r.ml <- Estimate(seed = seed.table, target.list = tgt.list.dims, target.data = tgt.data, method = "ml") r.chi2 <- Estimate(seed = seed.table, target.list = tgt.list.dims, target.data = tgt.data, method = "chi2") r.lsq <- Estimate(seed = seed.table, target.list = tgt.list.dims, target.data = tgt.data, method = "lsq") # printing the summary of ipfp method summary(r.ipfp) # 0.95 level confidence interval of the estimates (upper bounds) confint(r.ipfp) # print the maximum absolute deviation between targets and # generated margins CompareMaxDev(list(r.ipfp, r.ml, r.chi2, r.lsq), echo = TRUE) # compute the maximum absolute deviation between the true and # estimated tables CompareMaxDev(list(r.ipfp, r.ml, r.chi2, r.lsq), echo = TRUE, true.table = true.table) # ----------------------------------------------- # Simulating multivariate Bernoulli distributions # ----------------------------------------------- # loading the data data("Qaqish", package = "mipfp") or <- Qaqish$or or # marginal and joint probabilities p <- c(0.2, 0.4, 0.6, 0.8) p.joint <- ObtainMultBinaryDist(odds = or, marg.probs = p) # simulating 100,000 data of size 4 from the obtained joint-distribution y.sim <- RMultBinary(n = 1e5, mult.bin.dist = p.joint)$binary.sequences # verifying the simulated marginal probabilities apply(y.sim, 2, mean) # verifying the correlation and odd ratios structure cor(y.sim) Odds2Corr(Qaqish$or, p)$corr Corr2Odds(corr = cor(y.sim), marg.probs = apply(y.sim, 2, mean))$odds