################################################################################ library("CountsEPPM") ################################################################################ data("ceriodaphnia.group") names.parameters <- c('log(mean) Intercept', 'log(mean) dose', 'log(mean) dose^2', 'log(variance) Intercept', 'log(variance) dose', 'log(variance) dose^2', 'log(b)') for ( i in 1:3 ) { if (i==1) { output.fn <- CountsEPPM(number.young ~ 1 + vdose + vdose2 | 1 + vdose + vdose2, data = ceriodaphnia.group, control=list(maxit = 2000)) output.fn$estses[[1]] <- names.parameters summary(output.fn) } else { initial <- output.fn$estses[[2]] names(initial) <- output.fn$estses[[1]] output.fn <- CountsEPPM(number.young ~ 1 + vdose + vdose2 | 1 + vdose + vdose2, data = ceriodaphnia.group, initial = initial, control = list(maxit = 2000)) summary(output.fn) }} # end of for loop output.fn <- CountsEPPM(number.young ~ 1 + vdose + vdose2 | 1 + vdose + vdose2, data = ceriodaphnia.group, initial = initial, optimization.method = 'nlm', control = list(print.level = 1)) summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes') print(output.distribution$FDparameters) # initial estimates from Faddy & Smith (2011) initial <- c(3.14040169, 0.17348624, -0.01961986, 4.43886332, -0.49261261, 0.02773696, -0.14002100) names(initial) <- output.fn$estses[[1]] output.fn <- CountsEPPM(number.young ~ 1 + vdose + vdose2 | 1 + vdose + vdose2, data = ceriodaphnia.group, initial = initial) summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes') print(output.distribution$FDparameters) #################################################################################### data("Williams.litters") output.fn <- CountsEPPM(number.implants ~ 0 + fdose | 1, Williams.litters, model = 'general', control=list(maxit = 2000), scale.factor.model = 'yes') output.fn$estses[[1]] <- c("dose 0", "dose 0.75", "dose 1.5", "dose 3.0", "log(scale factor)", "log(b)") summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes') print(output.distribution$FDparameters) initial <- output.fn$estses[[2]][1:5] names(initial) <- output.fn$estses[[1]][1:5] output.fn <- CountsEPPM(number.implants ~ 0 + fdose | 1, Williams.litters, model = 'limiting', initial = initial, scale.factor.model = 'yes') summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes') print(output.distribution$FDparameters) #################################################################################### data("Luningetal.litters") print("Data of Luning, et al., 1996") print("Faddy distribution model - mean & variance") output.fn <- CountsEPPM(number.trials ~ 0 + fdose | 0 + fdose, Luningetal.litters, ltvalue = 4, utvalue = 11, control = list(maxit = 2000), scale.factor.model = 'yes') summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.probabilities = 'yes') # limiting model output.fn <- CountsEPPM(number.trials ~ 0 + fdose | 0 + fdose, Luningetal.litters, model = 'limiting', ltvalue = 4, utvalue = 11, control = list(maxit = 1000), scale.factor.model = 'yes') summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes') print(output.distribution$FDparameters) # fixed b varying over range vlogfixed.b <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19) vloglikelihood <- rep(0, 20) for ( i in 1:20 ) { if (i == 1) { output.fn <- CountsEPPM(number.trials ~ 0 + fdose | 0 + fdose, Luningetal.litters, model = 'general fixed b', ltvalue = 4, utvalue = 11, fixed.b = exp(vlogfixed.b[i]), scale.factor.model = 'yes') output.fn$estses[[1]] <- c('mean dose 0', 'mean dose 300', 'mean dose 600', 'scale factor dose 0', 'scale factor 300', 'scale factor 600') } else { output.fn <- CountsEPPM(number.trials ~ 0 + fdose | 0 + fdose, Luningetal.litters, model = 'general fixed b', initial = initial, ltvalue = 4, utvalue = 11, fixed.b = exp(vlogfixed.b[i]), scale.factor.model = 'yes') } # end of if (i==1) initial <- output.fn$estses[[2]] names(initial) <- output.fn$estses[[1]] vloglikelihood[i] <- output.fn$loglikelihood } # end of for loop plot(vlogfixed.b, vloglikelihood, xlim = c(0, 25),ylim = c(-2658, -2653), main = "Profile likelihood for log(b) Luning litters data", xlab = "log(b)", ylab = "log(likelihood)") points(20, output.fn$loglikelihood, pch = 16) text(20.1, output.fn$loglikelihood, "limiting model", pos = 4,offset = .5,cex = .7) par(ask = TRUE) jpeg("C:\\Users\\U0117151\\Desktop\\Rjobs\\PoissonProcessModels\\JournalOfStatististicalSoftware\\Under_Over_Dispersed_Counts_Version_2.0\\ProfileLikelihoodLuningLitters.jpg") plot(vlogfixed.b,vloglikelihood,xlim=c(0,25),ylim=c(-2658,-2653), xlab="log(b)",ylab="log(likelihood)") points(20,output.fn$loglikelihood,pch=16) text(20.1,output.fn$loglikelihood,"limiting model",pos=4,offset=0.5,cex=0.7) dev.off() #################################################################################### data("herons.group") output.fn <- CountsEPPM(number.attempts ~ 0 + group, herons.group, model.type = 'mean only', model = 'negative binomial') output.fn$estses[[1]] <- c('Adult mean', 'Immature mean', 'log(b)') summary(output.fn) output.fn <- CountsEPPM(number.attempts ~ 0 + group, herons.group, model.type = 'mean only', model = 'Faddy distribution') output.fn$estses[[1]] <- c('Adult mean', 'Immature mean', 'c', 'log(b)') summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.probabilities = 'yes') print(output.distribution) output.fn$vnmax <- output.fn$vnmax + 60 output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes') print(output.distribution) #################################################################################### data("Titanic.survivors.case") # offset of log(cases) lncases <- log(Titanic.survivors.case$cases) output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, model.type = 'mean only', model = 'negative binomial', offset = list(lncases), control = list(maxit=2000)) output.fn$estses[[1]] <- c('(Intercept)', 'age adult', 'sex male', 'class 2nd class', 'class 3rd class', 'log(b)') summary(output.fn) # negative binomial, fixed b at value from negative binomial fit initial <- output.fn$estses[[2]][1:5] names(initial) <- c('(Intercept)', 'age adult', 'sex male', 'class 2nd class', 'class 3rd class') output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, model.type = 'mean only', model = 'negative binomial fixed b', initial = initial, offset = list(lncases), control = list(maxit=2000), fixed.b = exp(output.fn$estses[[2]][6])) summary(output.fn) # negative binomial, fixed b at value from Hilbe (2011, p268) initial <- output.fn$estses[[2]][1:5] names(initial) <- c('(Intercept)', 'age adult', 'sex male', 'class 2nd class', 'class 3rd class') output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, model.type = 'mean only', model = 'negative binomial fixed b', initial = initial, offset = list(lncases), control = list(maxit=2000), fixed.b = 9.615385) output.fn$estses[[1]] <- c('(Intercept)', 'age adult', 'sex male', 'class 2nd class', 'class 3rd class') summary(output.fn) output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, model.type = 'mean only', model = 'Faddy distribution', offset = list(lncases), control = list(maxit = 5000)) output.fn$estses[[1]] <- c('(Intercept)', 'age adult', 'sex male', 'class 2nd class', 'class 3rd class', 'c', 'log(b)') summary(output.fn) # Single variance, variance model # to get proper distributions for the initial estimates both mean & variance # need to be offset by lncases initial <- c(output.fn$estses[[2]][1:5],0, output.fn$estses[[2]][7]) names(initial) <- c('(Intercept)', 'age adult', 'sex male', 'class 2nd class', 'class 3rd class', 'intercept variance', 'log(b)') output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, initial = initial, offset = list(lncases,lncases), control = list(maxit = 3000)) summary(output.fn) # Single scale factor, scale factor model to get proper distributions # for the initial estimates both mean & scale factor # need to be offset by lncases for ( i in 1:3) { if (i==1) { output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, offset = list(lncases, lncases), control = list(maxit = 4000), scale.factor.model = 'yes') output.fn$estses[[1]] <- c('Intercept mean', 'age adult', 'sex male', 'class 2nd class', 'class 3rd class', 'Intercept scale', 'log(b)') } else { initial <- output.fn$estses[[2]] names(initial) <- output.fn$estses[[1]] output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, offset = list(lncases, lncases),initial = initial, control = list(maxit = 4000), scale.factor.model = 'yes') }} # end of for loop summary(output.fn) initial <- output.fn$estses[[2]] names(initial) <- output.fn$estses[[1]] output.fn <- CountsEPPM(survive ~ age + sex + class, Titanic.survivors.case, offset = list(lncases, lncases), initial = initial, optimization.method = 'nlm', control = list(print.level = 1,iterlim = 2000), scale.factor.model = 'yes') summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters='yes') print(output.distribution$FDparameters) ############################################################################## data("takeover.bids.case") # dropping unused variables from the data set takeover.bids.case <- transform(takeover.bids.case, TAKEOVER=NULL, CONSTANT=NULL) # taking out the count dependent variable NUMBIDS <- takeover.bids.case$NUMBIDS # taking out factors and setting them up as such LEGLREST <- factor(takeover.bids.case$LEGLREST, levels = c(0,1)) REALREST <- factor(takeover.bids.case$REALREST, levels = c(0,1)) FINREST <- factor(takeover.bids.case$FINREST, levels = c(0,1)) REGULATN <- factor(takeover.bids.case$REGULATN, levels = c(0,1)) WHTKNGHT <- factor(takeover.bids.case$WHTKNGHT, levels = c(0,1)) # dropping the count and factors from the data frame takeover.bids.case <- transform(takeover.bids.case, NUMBIDS = NULL, LEGLREST = NULL, REALREST = NULL, FINREST = NULL, REGULATN = NULL, WHTKNGHT = NULL) # calculating means and standard deviations for rescaling cont.var.mean <- lapply(as.list(takeover.bids.case), mean) cont.var.std <- lapply(as.list(takeover.bids.case), sd) # scaling the continuous variables takeover.bids.case <- as.data.frame(scale(takeover.bids.case, center = TRUE, scale = TRUE)) # putting the count and factors back into the data frame takeover.bids.case <- data.frame(NUMBIDS, takeover.bids.case, LEGLREST, REALREST, FINREST, REGULATN, WHTKNGHT) # mean and variance, only intercept in scale factor model initial <- c(-0.42697945, 0.33784345, -0.37762929, 0.01024449, 0.63770323, -0.16315422, -0.09519243, 0.73136256, -0.59911561, -0.02419913, -0.04438814, -46.28487805) names(initial) <- c('(Intercept) mean', 'LEGLREST mean', 'REALREST mean', 'FINREST mean', 'WHTKNGHT mean', 'BIDPREM mean', 'INSTHOLD mean', 'SIZE mean', 'SIZESQ mean', 'REGULATN mean', '(Intercept) scale' , 'log(b)') output.fn <- CountsEPPM(NUMBIDS ~ LEGLREST + REALREST + FINREST + WHTKNGHT + BIDPREM + INSTHOLD + SIZE + SIZESQ + REGULATN, data = takeover.bids.case, initial = initial, optimization.method = 'nlm', control=list(print.level = 1,iterlim = 1), scale.factor.model = 'yes') summary(output.fn) AIC(output.fn) BIC(output.fn) # saturated model in mean and variance initial <- c(-0.566854145, 0.267698554, -0.166646016, 0.679694180, 0.810712355, -0.314938580, -0.151897896, 0.932242219, -0.858111289, 0.245379572,-0.691209393, -0.104513779, 0.214444684, 0.533925230, 0.267838551, -0.159394702, -0.005098182, 0.451388246, -0.937629929, 0.264256025, -5.331921990) names(initial) <- c('(Intercept) mean', 'LEGLREST mean', 'REALREST mean', 'FINREST mean', 'WHTKNGHT mean', 'BIDPREM mean', 'INSTHOLD mean', 'SIZE mean', 'SIZESQ mean', 'REGULATN mean', '(Intercept) scale', 'LEGLREST scale', 'REALREST scale', 'FINREST scale', 'WHTKNGHT scale', 'BIDPREM scale', 'INSTHOLD scale', 'SIZE scale', 'SIZESQ scale', 'REGULATN scale', 'log(b)') output.fn <- CountsEPPM(NUMBIDS ~ LEGLREST + REALREST + FINREST + WHTKNGHT+BIDPREM + INSTHOLD + SIZE + SIZESQ + REGULATN | LEGLREST+REALREST + FINREST + WHTKNGHT + BIDPREM + INSTHOLD + SIZE + SIZESQ + REGULATN, data = takeover.bids.case, optimization.method = 'nlm', control = list(print.level = 1,iterlim = 1), initial = initial, scale.factor.model = 'yes') summary(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes', output.probabilities = 'yes') print(output.distribution$FDparameters) wkv <- takeover.bids.case$NUMBIDS + 1 Expected <- rep(0, 126) for ( i in 1:126 ) { wks <- wkv[i] Expected[i] <- output.distribution$probabilities[[i]][wks] if (round(Expected[i], digits = 9) == 0) { Expected[i] <- 1 } } Observed <- rep(1, 126) chisq <- sum((Observed - Expected) * (Observed - Expected) / Expected) print(chisq) Waldv <- rep(0, 126) for ( i in 1:126) { Waldv[i] <- ((takeover.bids.case$NUMBIDS[i] - output.distribution$means$mean.par[i])^2)/ output.distribution$means$mean.par[i] if (output.distribution$means$variance.par[i] == 0) { Waldv[i] <- 0 } } Waldv sum(Waldv) # rescaling the continuous variable estimates and standard errors # intercepts output.fn$estses[[2]][c(1,11)] <- output.fn$estses[[2]][c(1,11)] - output.fn$estses[[2]][c(6,16)] * cont.var.mean[[3]] / cont.var.std[[3]] - output.fn$estses[[2]][c(7,17)] * cont.var.mean[[4]] / cont.var.std[[4]] - output.fn$estses[[2]][c(8,18)] * cont.var.mean[[5]] / cont.var.std[[5]] - output.fn$estses[[2]][c(9,19)] * cont.var.mean[[6]] / cont.var.std[[6]] # BIDPREM, INSTHOLD, SIZE, SIZESQ output.fn$estses[[2]][c(6:9, 16:19)] <- output.fn$estses[[2]][c(6:9, 16:19)] / cont.var.std[[3]] output.fn$estses[[3]][c(6:9, 16:19)] <- output.fn$estses[[3]][c(6:9, 16:19)] / cont.var.std[[3]] summary(output.fn) AIC(output.fn) BIC(output.fn) output.distribution <- distribution.CountsEPPM(output.fn, output.FDparameters = 'yes') print(output.distribution$FDparameters) ################################################################################