### Computing code for reproducing the results of Sections 4 qand 5 in the ### paper. library("mdscore") #------------------------4. Simulation Experiment---------------------------# library("xtable") library("VGAM") waldfun <- function(rr, f1, terms) wald.test(f1[[rr]], terms = terms)[1] lrtfun <- function(rr, f0, f1) lr.test(f0[[rr]], f1[[rr]])[1] rate1 <- function(ss, aa, df) { cc <- qchisq(aa, df = df, lower = FALSE) round(100 * mean(ss >= cc), 2) } #------------------------4.1 Normal results---------------------------------# simul.norm <- function(r, nvec, alpha, beta0, phip) { set.seed(123) resf <- NULL a <- 2; b <- 0.2 for (n in nvec){ x2 <- rgamma(n, shape = a, scale = b) x3 <- rgamma(n, shape = a, scale = b) x4 <- rgamma(n, shape = a, scale = b) x5 <- rgamma(n, shape = a, scale = b) x6 <- rgamma(n, shape = a, scale = b) X <- model.matrix(~ x2 + x3 + x4 + x5 + x6) eta <- X %*% beta0 mu <- eta yr <- replicate(r, rnorm(n, mean = mu, sd = 1/sqrt(phip))) fit0 <- apply(yr, 2, function(yy) glm(yy ~ x5 + x6, family = gaussian())) out <- sapply(fit0, function(z) mdscore(z, X1 = X[, 2:4], phi = NULL)) sr <- as.numeric(out[1, ]) srcor <- as.numeric(out[2, ]) fitf <- apply(yr, 2, function(yy) glm(yy ~ x2 + x3 + x4 + x5 + x6, family = gaussian())) wld <- sapply(1:r, waldfun, f1 = fitf, terms = 2:4) lrt <- sapply(1:r, lrtfun, f0 = fit0, f1 = fitf) if(any(sr <= 0)) stop("Score statistic has non-positive values") if(any(srcor <= 0)) stop("Mod. score statistic has non-positive values") if(any(wld <= 0)) stop("Wald statistic has non-positive values") if(any(lrt <= 0)) stop("LRT statistic has non-positive values") res <- cbind(wld, lrt, sr, srcor) colnames(res) <- c(paste("wldn", n, sep = ""), paste("lrtn", n, sep = ""), paste("srn", n, sep = ""), paste("srcorn", n, sep = "")) resf <- cbind(resf, res) rm(fit0, fitf, wld, lrt, sr, srcor, res) } return(resf) } alpha <- c(0.10, 0.05, 0.01) nvec <- c(20, 25, 30, 40) beta0 <- c(20, 0, 0, 0, 10, 10) phip <- 4 stats <- simul.norm(r = 30000, nvec = nvec, beta0 = beta0, phip = phip) type1 <- rbind(apply(stats[, 1:4], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 1:4], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 1:4], 2, rate1, aa = alpha[3], df = 3), apply(stats[, 5:8], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 5:8], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 5:8], 2, rate1, aa = alpha[3], df = 3), apply(stats[, 9:12], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 9:12], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 9:12], 2, rate1, aa = alpha[3], df = 3), apply(stats[, 13:16], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 13:16], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 13:16], 2, rate1, aa = alpha[3], df = 3)) colnames(type1) <- c("Wald", "LR", "Score", "ModScore") nlab <- c(nvec[1], "", "", nvec[2], "", "", nvec[3], "", "", nvec[4], "", "") alab <- rep(100 * alpha, 4) type1 <- data.frame(nlab, alab, type1) # Results for Table 1 type1 print(xtable(type1, digits = c(0, 0, 0, 1, 1, 1, 1)), include.rownames = FALSE, file = "normal-simul.tex") #------------------------4.2 Gamma results----------------------------------# simul.gamma <- function(r, nvec, alpha, beta0, phip){ set.seed(123) resf <- NULL a <- 2; b <- 0.2 for (n in nvec){ x2 <- round(rgamma(n, shape = a, scale = b), 1) x3 <- round(rgamma(n, shape = a, scale = b), 1) x4 <- round(rgamma(n, shape = a, scale = b), 1) x5 <- round(rgamma(n, shape = a, scale = b), 1) x6 <- round(rgamma(n, shape = a, scale = b), 1) X <- model.matrix(~ x2 + x3 + x4 + x5 + x6) eta <- X %*% beta0 mu <- exp(eta) yr <- replicate(r, rgamma(n, shape = phip, scale = mu/phip)) fit0 <- apply(yr, 2, function(yy) glm(yy ~ x5 + x6, family = Gamma("log"))) out <- sapply(fit0, function(z) mdscore(z, X1 = X[, 2:4], phi = NULL)) sr <- as.numeric(out[1, ]) srcor <- as.numeric(out[2, ]) fitf <- apply(yr, 2, function(yy) glm(yy ~ x2 + x3 + x4 + x5 + x6, family = Gamma("log"))) wld <- sapply(1:r, waldfun, f1 = fitf, terms = 2:4) lrt <- sapply(1:r, lrtfun, f0 = fit0, f1 = fitf) res <- cbind(wld, lrt, sr, srcor) colnames(res) <- c(paste("wldn", n, sep = ""), paste("lrtn", n, sep = ""), paste("srn", n, sep = ""), paste("srcorn", n, sep = "")) resf <- cbind(resf, res) rm(fit0, fitf, wld, lrt, sr, srcor, res) } return(resf) } alpha <- c(0.10, 0.05, 0.01) nvec <- c(20, 25, 30, 40) beta0 <- c(2, 0, 0, 0, 1, 1) phip <- 40 stats <- simul.gamma(r = 30000, nvec = nvec, beta0 = beta0, phip = phip) type1 <- rbind(apply(stats[,1:4], 2, rate1, aa = alpha[1], df = 3), apply(stats[,1:4], 2, rate1, aa = alpha[2], df = 3), apply(stats[,1:4], 2, rate1, aa = alpha[3], df = 3), apply(stats[,5:8], 2, rate1, aa = alpha[1], df = 3), apply(stats[,5:8], 2, rate1, aa = alpha[2], df = 3), apply(stats[,5:8], 2, rate1, aa = alpha[3], df = 3), apply(stats[,9:12], 2, rate1, aa = alpha[1], df = 3), apply(stats[,9:12], 2, rate1, aa = alpha[2], df = 3), apply(stats[,9:12], 2, rate1, aa = alpha[3], df = 3), apply(stats[,13:16], 2, rate1, aa = alpha[1], df = 3), apply(stats[,13:16], 2, rate1, aa = alpha[2], df = 3), apply(stats[,13:16], 2, rate1, aa = alpha[3], df = 3)) colnames(type1) <- c("Wald", "LR", "Score", "ModScore") nlab <- c(nvec[1], "", "", nvec[2], "", "", nvec[3], "", "", nvec[4], "", "") alab <- rep(100 * alpha, 4) type1 <- data.frame(nlab, alab, type1) # Results for Table 1 type1 print(xtable(type1, digits = c(0, 0, 0, 1, 1, 1, 1)), include.rownames = FALSE, file = "gamma-simul.tex") #------------------------4.3 Inverse Gaussian results-----------------------# simul.invg <- function(r, nvec, alpha, beta0, phip){ set.seed(123) resf <- NULL a <- 2; b <- 0.2 for (n in nvec){ x2 <- rgamma(n, shape = a, scale = b) x3 <- rgamma(n, shape = a, scale = b) x4 <- rgamma(n, shape = a, scale = b) x5 <- rgamma(n, shape = a, scale = b) x6 <- rgamma(n, shape = a, scale = b) X <- model.matrix(~ x2 + x3 + x4 + x5 + x6) eta <- X %*% beta0 mu <- 1/eta yr <- replicate(r, rinv.gaussian(n, mu = mu, lambda = phip)) fit0 <- apply(yr, 2, function(yy) glm(yy ~ x5 + x6, family = inverse.gaussian("inverse"))) out <- sapply(fit0, function(z) mdscore(z, X1 = X[, 2:4], phi = NULL)) sr <- as.numeric(out[1, ]) srcor <- as.numeric(out[2, ]) fitf <- apply(yr, 2, function(yy) glm(yy ~ x2 + x3 + x4 + x5 + x6, family = inverse.gaussian("inverse"))) wld <- sapply(1:r, waldfun, f1 = fitf, terms = 2:4) lrt <- sapply(1:r, lrtfun, f0 = fit0, f1 = fitf) if(any(sr <= 0)) stop("Score statistic has non-positive values") if(any(srcor <= 0)) stop("Mod. score statistic has non-positive values") if(any(wld <= 0)) stop("Wald statistic has non-positive values") if(any(lrt <= 0)) stop("LRT statistic has non-positive values") res <- cbind(wld, lrt, sr, srcor) colnames(res) <- c(paste("wldn", n, sep = ""), paste("lrtn", n, sep = ""), paste("srn", n, sep = ""), paste("srcorn", n, sep = "")) resf <- cbind(resf, res) rm(fit0, fitf, wld, lrt, sr, srcor, res) } return(resf) } alpha <- c(0.10, 0.05, 0.01) nvec <- c(20, 25, 30, 40) beta0 <- c(2, 0, 0, 0, 1, 1)/15 phip <- 40 stats <- simul.invg(r = 30000, nvec = nvec, beta0 = beta0, phip = phip) type1 <- rbind(apply(stats[, 1:4], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 1:4], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 1:4], 2, rate1, aa = alpha[3], df = 3), apply(stats[, 5:8], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 5:8], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 5:8], 2, rate1, aa = alpha[3], df = 3), apply(stats[, 9:12], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 9:12], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 9:12], 2, rate1, aa = alpha[3], df = 3), apply(stats[, 13:16], 2, rate1, aa = alpha[1], df = 3), apply(stats[, 13:16], 2, rate1, aa = alpha[2], df = 3), apply(stats[, 13:16], 2, rate1, aa = alpha[3], df = 3)) colnames(type1) <- c("Wald", "LR", "Score", "ModScore") nlab <- c(nvec[1], "", "", nvec[2], "", "", nvec[3], "", "", nvec[4], "", "") alab <- rep(100 * alpha, 4) type1 <- data.frame(nlab, alab, type1) # Results for Table 1 type1 print(xtable(type1, digits = c(0, 0, 0, 1, 1, 1, 1)), include.rownames = FALSE, file = "invg-simul.tex") rm(list = ls()) #--------------------------------5. Applications----------------------------# #-----------------------------Example 5.1-----------------------------------# options(prompt = "R> ", continue = "+ ", width = 70, useFancyQuotes = FALSE) library("mdscore") data("case1102", package = "Sleuth3") d <- transform(case1102, TLrat = Brain/Liver, Ltime = log(Time), Lwrat = log((Weight+Loss)/Weight), Treat = factor(Treatment == "BD", labels = c("NS", "BD"))) fitf <- glm(TLrat ~ Ltime * Treat + Days + Sex + Lwrat + Tumor + Treat * Lwrat, data = d, family = Gamma("log")) summary(fitf, dispersion = gamma.dispersion(fitf)) gamma.shape(fitf) est <- c(as.numeric(coef(fitf)), gamma.shape(fitf)[[1]]) est <- c(coef(fitf), gamma.shape(fitf)[[1]]) se <- c(sqrt(diag(vcov(fitf, dispersion = 1/gamma.shape(fitf)$alpha))), gamma.shape(fitf)[[2]]) ests <- data.frame(Estimate = round(est, 5), SE = round(se, 5)) # Results for Table 2 xtable(ests, digits = c(0, 3, 3)) X <- model.matrix(fitf) fit0 <- glm(TLrat ~ Ltime * Treat + Sex + Lwrat + Tumor + Days, data = d, family = Gamma("log")) test <- mdscore(fit0, X1 = X[,9], phi = NULL) summary(test) sw <- wald.test(fitf, terms = 9)$W sw <- c(sw, pchisq(sw, df = test$df, lower = FALSE)) slr <- lr.test(fit1 = fit0,fit2 = fitf)$LR slr <- c(slr, pchisq(slr, df = test$df, lower = FALSE)) sr <- c(test$Sr, pchisq(test$Sr, df = test$df, lower = FALSE)) src <- c(test$Sr_cor, pchisq(test$Sr_cor, df = test$df, lower = FALSE)) res <- data.frame(Sw = round(sw, 4), SLR = round(slr, 4), SR = round(sr, 4), SRc = round(src, 4)) # Results for Table 3 library("xtable") print(xtable(res, digits = c(0,4, 4,4,4)), include.rownames = FALSE) ### Plot for Figure 1. The following code was adapted from the routines ### provided by Gilberto Alvarenga de Paula and are available at ### http://www.ime.usp.br/~giapaula/envel_gama. fit.model <- fitf par(mfrow = c(1,1)) X <- model.matrix(fit.model) n <- nrow(X) p <- ncol(X) w <- fit.model$weights W <- diag(w) H <- solve(t(X) %*% W %*% X) H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W) h <- diag(H) ro <- resid(fit.model,type = "response") fi <- (n - p)/sum((ro/(fitted(fit.model)))^2) td <- resid(fit.model,type = "deviance") * sqrt(fi/(1 - h)) # e <- matrix(0, n, 100) # for(i in 1:100){ resp <- rgamma(n,fi) resp <- (fitted(fit.model)/fi) * resp fit <- glm(resp ~ X, family = Gamma(link = log)) w <- fit$weights W <- diag(w) H <- solve(t(X) %*% W %*% X) H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W) h <- diag(H) ro <- resid(fit,type = "response") phi <- (n - p)/sum((ro/(fitted(fit)))^2) e[, i] <- sort(resid(fit, type = "deviance") * sqrt(phi/(1 - h))) } # e1 <- numeric(n) e2 <- numeric(n) # for(i in 1:n){ eo <- sort(e[i, ]) e1[i] <- (eo[2] + eo[3])/2 e2[i] <- (eo[97] + eo[98])/2} # med <- apply(e, 1, mean) faixa <- range(td, e1, e2) par(pty = "s") qqnorm(td, xlab = "Standard normal quantile", ylab = "Deviance residual", ylim = faixa, pch = 16, main = "") par(new = TRUE) # qqnorm(e1, main = "", axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 1, cex = 0.6) par(new = T) qqnorm(e2, main = "", axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 1, cex = 0.6) par(new = T) qqnorm(med, main = "", axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 2, cex = 0.6) rm(list = ls()) #-----------------------------Example 5.2-----------------------------------# options(prompt = "R> ", continue = "+ ", width = 70, useFancyQuotes = FALSE) library("mdscore") fit.model <- glm(y ~ cut * lot, data = strength, family = inverse.gaussian("inverse")) fitf <- glm(y ~ cut * lot, data = strength, family = inverse.gaussian("inverse")) summary(fitf) X <- model.matrix(fit.model, data = strength) fit0 <- glm(y ~ cut + lot, data = strength, family = inverse.gaussian("inverse")) test <- mdscore(fit0, X1 = X[, 7:10]) summary(test) sw <- wald.test(fitf, terms = 7:10)$W sw <- c(sw, pchisq(sw, df = test$df, lower = FALSE)) slr <- lr.test(fit1 = fit0, fit2 = fitf)$LR slr <- c(slr, pchisq(slr, df = test$df, lower = FALSE)) sr <- c(test$Sr, pchisq(test$Sr, df = test$df, lower = FALSE)) src <- c(test$Sr_cor, pchisq(test$Sr_cor, df = test$df, lower = FALSE)) res <- data.frame(Sw = round(sw, 4), SLR = round(slr, 4), SR = round(sr, 4), SRc = round(src, 4)) # Results for Table 4 library("xtable") print(xtable(res, digits = c(0, 4, 4, 4, 4)), include.rownames = FALSE) ### Plot for Figure 2. The following code was adapted from the routines ### provided by Gilberto Alvarenga de Paula and are available at ### http://www.ime.usp.br/~giapaula/envel_ninv. fit.model <- fitf rig <- function(n, mu = stop("no shape arg"), lambda = 1) { # Random variates from inverse Gaussian distribution # Reference: # Chhikara and Folks, The Inverse Gaussian Distribution, # Marcel Dekker, 1989, page 53. # GKS 15 Jan 98 # if (any(mu <= 0)) stop("mu must be positive") if (any(lambda <= 0)) stop("lambda must be positive") if (length(n) > 1) n <- length(n) if (length(mu) > 1 && length(mu) != n) mu <- rep(mu, length = n) if (length(lambda) > 1 && length(lambda) != n) lambda <- rep(lambda, length = n) y2 <- rchisq(n, 1) u <- runif(n) r1 <- mu/(2 * lambda) * (2 * lambda + mu * y2 - sqrt(4 * lambda * mu * y2 + mu^2 * y2^2)) r2 <- mu^2/r1 ifelse(u < mu/(mu + r1), r1, r2) } #----------------------------------------------------------------# # #par(mfrow=c(1, 1)) X <- model.matrix(fit.model) n <- nrow(X) p <- ncol(X) w <- fit.model$weights mu <- predict(fit.model, type = "response") W <- diag(w) H <- solve(t(X) %*% W %*% X) H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W) h <- diag(H) ro <- resid(fit.model,type="response") fi <- (n - p)/sum((ro^2)/(fitted(fit.model)^3)) td <- resid(fit.model,type = "deviance")*sqrt(fi/(1 - h)) # e <- matrix(0, n, 100) # for(i in 1:100){ resp <- rig(n, mu, fi) #fit <- glm(resp ~ X, family = inverse.gaussian(link = identity)) fit <- glm(resp ~ X, family = inverse.gaussian("inverse")) w <- fit$weights W <- diag(w) H <- solve(t(X) %*% W %*% X) H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W) h <- diag(H) ro <- resid(fit, type = "response") phi <- (n - p)/sum((ro^2)/(fitted(fit)^3)) e[, i] <- sort(resid(fit, type = "deviance")*sqrt(phi/(1 - h)))} # e1 <- numeric(n) e2 <- numeric(n) # for(i in 1:n){ eo <- sort(e[i, ]) e1[i] <- (eo[2] + eo[3])/2 e2[i] <- (eo[97] + eo[98])/2} # med <- apply(e, 1, mean) faixa <- range(td, e1, e2) par(pty = "s") qqnorm(td,xlab = "Standard normal quantile", ylab = "Deviance residual", ylim = faixa, pch = 16) par(new = TRUE) # qqnorm(e1, axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 1) par(new = TRUE) qqnorm(e2, axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 1) par(new = TRUE) qqnorm(med,axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 2)