############################################################################### # Extended Poisson Process Models using the matrix exponential function ############################################################################### library("BinaryEPPM") library("lmtest") ############################################################################### cat("Morgan, Statistics in Toxicology, 1996\n") cat("Data of Williams: only litters sizes greater than or equal to 7 used\n") data("Williams.litters", package = "BinaryEPPM") output.fn <- BinaryEPPM(number.surviving ~ 1 + vdose, data = Williams.litters) summary(output.fn) output.fn <- update(output.fn, model.name = "beta binomial") logLik(output.fn) output.fn <- update(output.fn, model.name = "correlated binomial") logLik(output.fn) output.fn <- BinaryEPPM(number.surviving ~ 1 + vdose, data = Williams.litters, method = "BFGS", control = list(REPORT = 1, trace = 1)) summary(output.fn) output.fn <- update(output.fn, model.name = "beta binomial") logLik(output.fn) output.fn <- update(output.fn, model.name = "correlated binomial") logLik(output.fn) a.method <- "BFGS" attr(a.method, which = "grad.method") <- "Richardson" output.fn <- BinaryEPPM(number.surviving ~ 1 + vdose, data = Williams.litters, method = a.method) summary(output.fn) output.fn <- update(output.fn, model.name = "beta binomial") logLik(output.fn) output.fn <- update(output.fn, model.name = "correlated binomial") logLik(output.fn) residuals(output.fn, type = "response") residuals(output.fn, type = "pearson") residuals(output.fn, type = "spearson") residuals(output.fn, type = "likelihood") residuals(output.fn, type = "deviance") residuals(output.fn, type = "sdeviance") fitted.values(output.fn) ############################################################################### cat("Number of deaths per number of implants data of Kupper & Haseman (1978)\n") data("KupperHaseman.case", package = "BinaryEPPM") ############################################################################### # The following two blocks of code are included to show what results when the # correlation parameter rho reaches the lower or upper value of a range within # which it has to lie in order for the probability mass function to be valid. # When a bounding value is reached the iteration terminates with a message that # a bounding value has been reached. ############################################################################### KupperHaseman.case.control <- data.frame(Number.Deaths = KupperHaseman.case$Number.Deaths[1:10], Number.Implants = KupperHaseman.case$Number.Implants[1:10]) KupperHaseman.case.treated <- data.frame(Number.Deaths = KupperHaseman.case$Number.Deaths[11:20], Number.Implants = KupperHaseman.case$Number.Implants[11:20]) initial <- c(log(- log(1 - 0.1653)), 0.0149) output.fn <- BinaryEPPM(Number.Deaths / Number.Implants ~ 1, data = KupperHaseman.case.control, model.type = "p only", model.name = "correlated binomial", initial = initial, method = "BFGS", control = list(trace = 1, REPORT = 1)) summary(output.fn) initial <- c(log(- log(1 - 0.1653)), 0.0216) output.fn <- BinaryEPPM(Number.Deaths / Number.Implants ~ 1, data = KupperHaseman.case.treated, model.type = "p only", model.name = "correlated binomial", initial = initial, method = "BFGS", control = list(trace = 1, REPORT = 1)) summary(output.fn) ############################################################################### # The following code duplicates results given in Kupper and Haseman (1978). In # particular it shows how to duplicate Table 1 of that article, which is a table # of limiting bounds for rho given ranges of values for n the sample size and p # the probability of a success. ############################################################################### # model with equal p but unequal scale-factors output.fn <- BinaryEPPM(Number.Deaths / Number.Implants ~ 1 | 0 + Group, data = KupperHaseman.case, model.name = "correlated binomial") summary(output.fn) # log likelihood for model in Kupper and Haseman (1978) with equal p's but unequal rho's # need to construct link because outside the package local.link <- "cloglog" attr(local.link, which = "p") <- make.link(local.link) covariates.matrix.p <- matrix(c(rep(1, 10)), ncol = 1) covariates.matrix.scalef <- matrix(c(rep(1, 10)), ncol = 1) offset.p <- matrix(c(rep(0, 10)), ncol = 1) offset.scalef <- matrix(c(rep(0, 10)), ncol = 1) # controls nsuccess <- list(c(1, rep(0, 5)), c(0, 0, 1, rep(0, 4)), c(1, rep(0, 7)), c(1, rep(0, 7)), c(1, rep(0, 8)), c(1, rep(0, 8)), c(1, rep(0, 8)), c(0, 1, rep(0, 8)), c(0, 0, 1, rep(0, 7)), c(0, 1, rep(0, 9))) ntrials <- list(c(rep(5, 6)), c(rep(6, 7)), c(rep(7, 8)), c(rep(7, 8)), c(rep(8, 9)), c(rep(8, 9)), c(rep(8, 9)), c(rep(9, 10)), c(rep(9, 10)), c(rep(10, 11))) weights <- list(c(rep(1, 6)), c(rep(1, 7)), c(rep(1, 8)), c(rep(1, 8)), c(rep(1, 9)), c(rep(1, 9)), c(rep(1, 9)), c(rep(1, 10)), c(rep(1, 10)), c(rep(1, 11))) p.0 <- log(- log( 1- 0.0765)) parameter <- c(p.0, 0.0023) loglik.controls <- LL.Regression.Binary(parameter = parameter, model.type = "p only", model.name = "correlated binomial", link = local.link, ntrials = ntrials, nsuccess = nsuccess, covariates.matrix.p = covariates.matrix.p, covariates.matrix.scalef = covariates.matrix.scalef, offset.p = offset.p, offset.scalef = offset.scalef, weights = weights) loglik.controls p.c <- log(- log(1 - 0.1653)) parameter <- c(p.c, 0.0149) loglik.controls <- LL.Regression.Binary(parameter, model.type = "p only", model.name = "correlated binomial", link = local.link, ntrials = ntrials, nsuccess = nsuccess, covariates.matrix.p = covariates.matrix.p, covariates.matrix.scalef = covariates.matrix.scalef, offset.p = offset.p, offset.scalef = offset.scalef, weights = weights) loglik.controls # treated p.1 <- log(- log(1 - 0.2260)) nsuccess <- list(c(1, rep(0, 5)), c(0, 0, 1, rep(0, 3)), c(0, 1, rep(0, 6)), c(1, rep(0, 8)), c(0, 0, 1, rep(0, 6)), c(rep(0, 3), 1, rep(0, 5)), c(1, rep(0, 9)), c(rep(0, 4), 1, rep(0, 5)), c(0, 1, rep(0, 9)), c(rep(0, 6), 1, rep(0, 4))) ntrials <- list(c(rep(5, 6)), c(rep(5, 6)), c(rep(7, 8)), c(rep(8, 9)), c(rep(8, 9)), c(rep(8, 9)), c(rep(9, 10)), c(rep(9, 10)), c(rep(10, 11)), c(rep(10, 11))) weights <- list(c(rep(1, 6)), c(rep(1, 6)), c(rep(1, 8)), c(rep(1, 9)), c(rep(1, 9)), c(rep(1, 9)), c(rep(1, 10)), c(rep(1, 10)), c(rep(1, 11)), c(rep(1, 11))) parameter <- c(p.1, 0.0267) loglik.treated <- LL.Regression.Binary(parameter, model.type = "p only", model.name = "correlated binomial", link = local.link, ntrials = ntrials, nsuccess = nsuccess, covariates.matrix.p = covariates.matrix.p, covariates.matrix.scalef = covariates.matrix.scalef, offset.p = offset.p, offset.scalef = offset.scalef, weights = weights) loglik.treated parameter <- c(p.c, 0.0216) loglik.treated <- LL.Regression.Binary(parameter, model.type = "p only", model.name = "correlated binomial", link = local.link, ntrials = ntrials, nsuccess = nsuccess, covariates.matrix.p = covariates.matrix.p, covariates.matrix.scalef = covariates.matrix.scalef, offset.p = offset.p, offset.scalef = offset.scalef, weights = weights) loglik.treated # Table 1 of Kupper & Haseman (1978) limit.matrix <- matrix(c(rep(0, 42)), ncol = 6) colnames(limit.matrix) <- c("0.1 lower", "0.1 upper", "0.3 lower", "0.3 upper", "0.5 lower", "0.5 upper") rownames(limit.matrix) <- c("2", "3", "5", "7", "10", "15", "20") vnt <- c(2, 3, 5, 7, 10, 15, 20) vp <- c(0.1, 0.3, 0.5) # need to construct link because outside the package local.link <- "cloglog" attr(local.link, which = "p") <- make.link(local.link) vlp <- attr(local.link, which = "p")$linkfun(vp) for (i in 1:length(vnt)) { for (j in 1:length(vlp)) { output.temp <- Model.BCBinProb(parameter = c(vlp[j], 0), model.type = "p only", model.name = "correlated binomial", link = local.link, ntrials = list(c(rep(vnt[i], (vnt[i] + 1)))), covariates.matrix.p = matrix(c(1), nrow = 1)) limit.matrix[i, 2*j-1] <- output.temp$Dparameters$lower.limit limit.matrix[i, 2*j] <- output.temp$Dparameters$upper.limit } } print(limit.matrix) ############################################################################### cat("Yorkshires pig litter data of Brooks, James, and Gray (1991)\n") data("Yorkshires.litters", package = "BinaryEPPM") # Fitting broken stick regression model output.fn.two <- BinaryEPPM(data = Yorkshires.litters, number.success ~ 1 | 1 ) mean.size <- mean(Yorkshires.litters$"vsize") mean.var <- c(output.fn.two$coefficients$p.est[1], output.fn.two$coefficients$scalef.est[1]) # Because Yorkshires.litters is a list using the following to add covariates Yorkshires.litters[["scaled.size"]] <- Yorkshires.litters$"vsize" - mean.size cat("beta binomial and generalized binomial, subsets (1:5), (6:9)\n") Results <- optim(par = mean.var, fn = function(mean.var, input.data, model.names, subsets, ...){ local.data <- input.data local.data[["mean.p"]] <- rep(mean.var[[1]], length(local.data[[1]])) local.data[["mean.scalef"]] <- rep(mean.var[[2]], length(local.data[[1]])) first.subset <- BinaryEPPM(data = local.data, model.name = model.names[1], subset = subsets[[1]], number.success ~ 0 + scaled.size + offset(mean.p) | 0 + scaled.size + offset(mean.scalef)) second.subset <- update(first.subset, model.name = model.names[2], subset = subsets[[2]]) sum.logL <- logLik(first.subset) + logLik(second.subset) attr(sum.logL, which = "df") <- attr(logLik(first.subset), which = "df") + attr(logLik(second.subset), which = "df") attr(sum.logL, which = "nobs") <- attr(logLik(first.subset), which = "nobs") + attr(logLik(second.subset), which = "nobs") return(sum.logL) }, input.data = Yorkshires.litters, model.names = c("beta binomial", "generalized binomial"), subsets = list(1:5, 6:9), control = list(fnscale = -1), hessian = TRUE) Results$value Yorkshires.litters[["mean.p"]] <- rep(Results$par[[1]], length(Yorkshires.litters[[1]])) Yorkshires.litters[["mean.scalef"]] <- rep(Results$par[[2]], length(Yorkshires.litters[[1]])) first.subset <- BinaryEPPM(data = Yorkshires.litters, model.name = "beta binomial", subset = 1:5, number.success ~ 0 + scaled.size + offset(mean.p) | 0 + scaled.size + offset(mean.scalef)) second.subset <- update(first.subset, model.name = "generalized binomial", subset = 6:9) print(data.frame(intercepts = c("p", "scale factor"), Results$par, se = c(sqrt(diag(- solve(Results$hessian))))), row.names = FALSE) print(data.frame(model = c("BB p", "BB scale factor", "GB p", "GB scale factor"), slope = c(first.subset$coefficients$p.est, first.subset$coefficients$scalef.est, second.subset$coefficients$p.est, second.subset$coefficients$scalef.est), se = c(sqrt(diag(first.subset$vcov)), sqrt(diag(second.subset$vcov)))), row.names = FALSE) print(data.frame(size = Yorkshires.litters$vsize, scaled = Yorkshires.litters$scaled.size, mean = c(predict(first.subset, type = "mean"), predict(second.subset, type = "mean")), variance = c(predict(first.subset, type = "variance"), predict(second.subset, type = "variance")), p = c(predict(first.subset, type = "p"), predict(second.subset, type = "p")), scale.factor = c(predict(first.subset, type = "scale.factor"), predict(second.subset, type = "scale.factor")), lower = c(predict(first.subset, type = "scale.factor.limits")[["lower"]], predict(second.subset, type = "scale.factor.limits")[["lower"]]), upper = c(predict(first.subset, type = "scale.factor.limits")[["upper"]], predict(second.subset, type = "scale.factor.limits")[["upper"]])), row.names = FALSE) ############################################################################### cat("Titanic survivors\n") data("Titanic.survivors.case", package = "BinaryEPPM") # link complementary log-log output.fn <- BinaryEPPM(number.survive / number.passengers ~ age + sex + pclass, model.type = "p only", model.name = "binomial", data = Titanic.survivors.case) names(output.fn$optim$par) <- c("Intercept", "age adult", "sex male", "class 2nd class", "class 3rd class") summary(output.fn) AIC(output.fn) output.fn.one <- update(output.fn, model.name = "beta binomial") names(output.fn.one$optim$par) <- c("Intercept", "age adult", "sex male", "class 2nd class", "class 3rd class", "theta") summary(output.fn.one) AIC(output.fn.one) lrtest(output.fn, output.fn.one) waldtest(output.fn, output.fn.one) waldtest(output.fn, output.fn.one, vcov = vcov) waldtest(output.fn, output.fn.one, test = c("Chisq", "F"), vcov = vcov) predict(output.fn.one, type = "distribution") predict(output.fn.one, type = "distribution.parameters") layout(matrix(1:6, byrow = TRUE, ncol = 2)) plot(output.fn.one, which = 1, type = "response") plot(output.fn.one, which = 2, type = "pearson") plot(output.fn.one, which = 3, type = "spearson") plot(output.fn.one, which = 4, type = "likelihood") plot(output.fn.one, which = 5, type = "deviance") plot(output.fn.one, which = 6, type = "sdeviance") ############################################################################### # Praters gasoline data as in the vignette for betareg cat("Praters gasoline data\n") # yield is a proportion between 0 and 1 not a binary variable # reconstructed as a binary variable in BinaryEPPMworkspace data("GasolineYield", package = "BinaryEPPM") a.method <- "BFGS" attr(a.method, which = "grad.method") <- "Richardson" # estimates from betareg article with phi translated to scalefactor then to intercept of # linear predictor i.e., scale-factor = log(1000/(1+440.3)) initial <- c(-6.1595710, 1.7277289, 1.3225969, 1.5723099, 1.0597141, 1.1337518, 1.0401618, 0.5436922, 0.4959007, 0.3857930, 0.0109669, log(1000 / 441.3)) names(initial) <- c("(Intercept)", "batch1", "batch2", "batch3", "batch4", "batch5", "batch6", "batch7", "batch8", "batch9", "temp", "(Intercept)") gy_logit <- BinaryEPPM(percent / vthousand ~ batch + temp | 1, data = GasolineYield, model.type = "p and scale-factor", model.name = "beta binomial", link = "logit", initial = initial, method = a.method) summary(gy_logit) # plots as in Beta Regression in R, Cribari-Neto and Zeileis, 2010. layout(matrix(1:6, byrow = TRUE, ncol = 2)) plot(gy_logit, which = 1:4) plot(gy_logit, which = 5, type = "sdeviance") plot(gy_logit, which = 1, type = "sdeviance") gy_logit4 <- update(gy_logit, initial = gy_logit$optim$par, subset = -4) coef(gy_logit4, prtpar = "scale.factor") initial <- c(gy_logit$optim$par, 0) names(initial) <- c("(Intercept)", "batch1", "batch2", "batch3", "batch4", "batch5", "batch6", "batch7", "batch8", "batch9", "temp", "(Intercept)", "temp") gy_logit2 <- BinaryEPPM(percent / vthousand ~ batch + temp | temp, data = GasolineYield, model.type = "p and scale-factor", model.name = "beta binomial", link = "logit", initial = initial, method = "BFGS") coef(gy_logit2, prtpar = "scale.factor") lrtest(gy_logit, gy_logit2) initial <- c(-2.788768392, 0.899816790, 0.651053334, 0.765141405, 0.532867686, 0.548732470, 0.515271564, 0.287060244, 0.248332244, 0.183942597, 0.005360537, 0.0001042918) names(initial) <- c("(Intercept)", "batch1", "batch2", "batch3", "batch4", "batch5", "batch6", "batch7", "batch8", "batch9", "temp", "(Intercept)") gy_loglog <- update(gy_logit, initial = initial, link = "loglog") summary(gy_logit)$pseudo.r.squared summary(gy_loglog)$pseudo.r.squared AIC(gy_logit, gy_logit2, gy_loglog) initial <- c(gy_logit$optim$par[1:11], 0, gy_logit$optim$par[12]) wk.regressor <- predict(gy_logit, type = "linear.predictor.p")^2 gy_temp <- update(gy_logit, initial = initial, formula = percent / vthousand ~ batch + temp + wk.regressor) lrtest(gy_logit, gy_temp) initial <- c(gy_loglog$optim$par[1:11], 0, gy_loglog$optim$par[12]) wk.regressor <- predict(gy_loglog, type = "linear.predictor.p")^2 gy_temp <- update(gy_loglog, initial = c(gy_loglog$optim$par, 0), formula = percent / vthousand ~ batch + temp + wk.regressor) lrtest(gy_loglog, gy_temp) # plots as in Beta Regression in R, Cribari-Neto and Zeileis, 2010. plot(abs(residuals(gy_loglog, type = "response")), abs(residuals(gy_logit, type = "response"))) abline(0, 1, lty = 2) ###############################################################################