#################################################################################### # Extended Poisson Process Models using the matrix exponential function #################################################################################### library("BinaryEPPM") library("lmtest") #################################################################################### cat("Faddy & Smith (2008) Applied Statistics\n") cat("Rope spores in potato flour data\n") data("ropespores.case", package = "BinaryEPPM") cat("Regressions of binomial models\n") cat("Model of Faddy & Smith (2012)\n") output.fn <- BinaryEPPM(data = ropespores.case, number.spores / number.tested ~ 1 + offset(logdilution), model.name = "binomial") # generalized binomial Faddy and Smith (2012) output.fn.one <- update(output.fn, model.type = "p only", model.name = "generalized binomial") summary(output.fn.one) lrtest(output.fn, output.fn.one) AIC(output.fn, output.fn.one) # profile likelihood for power of power logit output.fn.two <- update(output.fn.one, link = "powerlogit") Results <- optim(par = 1, fn = function(par, input.data, ...) { local.link <- "powerlogit" attr(local.link, which = "power") <- par slogL <- update(output.fn.two, link = local.link)$loglik return(slogL)}, input.data = ropespores.case, method = "Brent", lower = 1/3, upper = 3, control = list(fnscale = -1), hessian = TRUE) se <- sqrt(- solve(Results$hessian)[1, 1]) X2 <- round(-2 * (output.fn.one$loglik - Results$value), digits = 4) cat(paste("\n", "power", round(Results$par, digits = 4), "se", round(sqrt(- solve(Results$hessian)[1, 1]), digits = 4), "log likelihood", round(Results$value, digits = 4), "\n", sep = " ")) #################################################################################### cat("Yorkshires pig litter data of Brooks, James, and Gray (1991)\n") data("Yorkshires.litters", package = "BinaryEPPM") cat("binomial, litter size as a factor\n") output.fn <- BinaryEPPM(data = Yorkshires.litters, model.name = "binomial", number.success ~ 0 + fsize) cat(paste("\n", "generalized Pearson goodness of fit statistic", round(sum(residuals(output.fn, type = "pearson")^2), digits = 4), "on", sum(sapply(1:length(Yorkshires.litters$number.success), function(i) { sum(c(Yorkshires.litters$number.success[[i]]))})) - length(attr(Yorkshires.litters$fsize, which = "levels")), "df","\n", sep = " ")) cat("generalized binomial\n") output.fn <- BinaryEPPM(data = Yorkshires.litters, model.name = "binomial", number.success ~ 1 + vsize) output.fn.one <- BinaryEPPM(data = Yorkshires.litters, number.success ~ 1 + vsize | 1) output.fn.two <- BinaryEPPM(data = Yorkshires.litters, number.success ~ 1 + vsize | 1 + vsize) # likelihood ratio tests lrtest(output.fn, output.fn.one, output.fn.two) # print estimates of p and scale-factor with distribution parameters print(data.frame(size = Yorkshires.litters$vsize, mean = predict(output.fn.two, type = "mean"), variance = predict(output.fn.two, type = "variance"), p = predict(output.fn.two, type = "p"), scale.factor = predict(output.fn.two, type = "scale.factor"), lower = c(predict(output.fn.two, type = "scale.factor.limits")[["lower"]]), upper = c(predict(output.fn.two, type = "scale.factor.limits")[["upper"]])), row.names = FALSE) approx.lp.p <- predict(output.fn.two, type = "linear.predictor.p") approx.lp.sf <- predict(output.fn.two, type = "linear.predictor.scale.factor") exact.lp.p <- log( -log(1 - predict(output.fn.two, type = "p"))) exact.lp.sf <- log(predict(output.fn.two, type = "scale.factor")) # pdf(".../Figure_1.pdf") plot(x = c(5, 13), y = c(-0.40, 0.15), xlab = "litter size", ylab = "linear predictor values", type = "n") lines(x = Yorkshires.litters$vsize, y = approx.lp.p, lty = 1) points(x = Yorkshires.litters$vsize, y = exact.lp.p, pch = 1) lines(x = Yorkshires.litters$vsize, y = approx.lp.sf, lty = 2) points(x = Yorkshires.litters$vsize, y = exact.lp.sf, pch = 3) legend(5.1, -0.2, legend = c("log(-log(1-p_s)", "log(f_s)"), pch = c(1, 3), cex = 1.0) # dev.off() # Fitting regression model with single linear predictor function but split # error distributions in.par <- c(output.fn.two$coefficients$p.est, output.fn.two$coefficients$scalef.est) Results <- optim(par = in.par, fn = function(in.par, in.data, model.names, subsets, ...) { subset1 <- BinaryEPPM(data = in.data, model.name = model.names[1], subset = subsets[[1]], initial = in.par, number.success ~ 1 + vsize | 1 + vsize, control = list(maxit = 1)) subset2 <- BinaryEPPM(data = in.data, model.name = model.names[2], subset = subsets[[2]], initial = in.par, number.success ~ 1 + vsize | 1 + vsize, control = list(maxit = 1)) slogL <- logLik(subset1) + logLik(subset2) attr(slogL, which = "df") <- attr(logLik(subset1), which = "df") + attr(logLik(subset2), which = "df") attr(slogL, which = "nobs") <- attr(logLik(subset1), which = "nobs") + attr(logLik(subset2), which = "nobs") return(slogL) }, in.data = Yorkshires.litters, model.names = c("beta binomial", "generalized binomial"), subsets = list(1:5, 6:9), control = list(fnscale = -1), hessian = TRUE) cat(paste("\n", "log likelihood", round(Results$value, digits = 4), "\n", sep = " ")) print(data.frame(parameters = c("intercept p", "slope p", "intercept scale factor", "slope scale factor"), Results$par, se = c(sqrt(diag(solve(Results$hessian))))), row.names = FALSE) subset1 <- BinaryEPPM(data = Yorkshires.litters, model.name = "beta binomial", subset = 1:5, initial = Results$par, number.success ~ 1 + vsize | 1 + vsize, control = list(maxit = 1)) subset2 <- BinaryEPPM(data = Yorkshires.litters, model.name = "generalized binomial", subset = 6:9, initial = Results$par, number.success ~ 1 + vsize | 1 + vsize, control = list(maxit = 1)) print(data.frame(size = Yorkshires.litters$vsize, mean = c(predict(subset1, type = "mean"), predict(subset2, type = "mean")), variance = c(predict(subset1, type = "variance"), predict(subset2, type = "variance")), p = c(predict(subset1, type = "p"), predict(subset2, type = "p")), scale.factor = c(predict(subset1, type = "scale.factor"), predict(subset2, type = "scale.factor")), lower = c(predict(subset1, type = "scale.factor.limits")[["lower"]], predict(subset2, type = "scale.factor.limits")[["lower"]]), upper = c(predict(subset1, type = "scale.factor.limits")[["upper"]], predict(subset2, type = "scale.factor.limits")[["upper"]])), row.names = FALSE) # predicting scale factor and limits for the generalized binomial for a size of 14 newdata <- data.frame(vsize = 14, vnmax = 14, mean.p = Results$par[[1]], mean.scalef = Results$par[[2]]) print(data.frame(size = newdata$vsize, p = predict(subset2, newdata, type = "p"), scale.factor = predict(subset2, newdata, type = "scale.factor"), lower = predict(subset2, newdata = newdata, type = "scale.factor.limits")[["lower"]], upper = predict(subset2, newdata = newdata, type = "scale.factor.limits")[["upper"]]), row.names = FALSE) #################################################################################### cat("Hiroshima chromosome aberrations data from Prentice 1986\n") data("Hiroshima.grouped", package = "BinaryEPPM") output.group <- BinaryEPPM(number.aberrations ~ gz + gzz | gz + gzz, data = Hiroshima.grouped, model.type = "p and scale-factor", model.name = "beta binomial", link = "logit", pseudo.r.squared.type = "max-rescaled R squared") cat("Hiroshima chromosome aberrations data from Morel and Neerchal, 2012\n") data("Hiroshima.case", package = "BinaryEPPM") # using case data with initial estimates from dose grouped frequency data initial <- output.group$optim$par names(initial) <- c("(Intercept)", "z", "zz", "(Intercept)", "z", "zz") output.case <- BinaryEPPM(t/m ~ z + zz | z + zz, data = Hiroshima.case, initial = initial, model.type = "p and scale-factor", model.name = "beta binomial", link = "logit", pseudo.r.squared.type = "max-rescaled R squared") summary(output.case) ############################################################################## # foodstamp data set as mentioned in Kunsch, Stefanski, Carroll (1989) # and elsewhere. Logistic regression used. # duplicating using weights in BinaryEPPM cat("Foodstamp data\n") # data as a data frame data("foodstamp.case", package = "BinaryEPPM") output.fn <- BinaryEPPM(participation / n ~ tenancy + suppl.income + income, data = foodstamp.case, weights = foodstamp.case$weights1, model.name = "binomial", link = "logit", pseudo.r.squared.type = "max-rescaled R squared") summary(output.fn) # data as a list data("foodstamp.grouped", package = "BinaryEPPM") attr(foodstamp.grouped$l.weights1, which = "normalise") <- TRUE attr(foodstamp.grouped$l.weights1, which = "norm.to.n") <- 150 output.fn <- BinaryEPPM(l.participation ~ tenancy + suppl.income + income, data = foodstamp.grouped, model.name = "binomial", link = "logit", weights = foodstamp.grouped$l.weights1, pseudo.r.squared.type = "max-rescaled R squared") summary(output.fn) ##############################################################################