### Computing code for reproducing the results of Sections 4 qand 5 in the 
### paper.
library("mdscore")

#------------------------4. Simulation Experiment---------------------------# 
library("xtable")
library("VGAM")

waldfun <- function(rr, f1, terms) wald.test(f1[[rr]], terms = terms)[1]
lrtfun <- function(rr, f0, f1) lr.test(f0[[rr]], f1[[rr]])[1]

rate1 <- function(ss, aa, df) {   
  cc   <- qchisq(aa, df = df, lower = FALSE)
  round(100 * mean(ss >= cc), 2)
}

#------------------------4.1 Normal results---------------------------------# 
simul.norm <- function(r, nvec, alpha, beta0, phip) {
  
  set.seed(123)
  resf <- NULL
  a <- 2; b <- 0.2
  
  for (n in nvec){
    
    x2 <- rgamma(n, shape = a, scale = b)
    x3 <- rgamma(n, shape = a, scale = b)
    x4 <- rgamma(n, shape = a, scale = b)
    x5 <- rgamma(n, shape = a, scale = b)
    x6 <- rgamma(n, shape = a, scale = b)
    
    X    <- model.matrix(~ x2 + x3 + x4 + x5 + x6)
    eta  <- X %*% beta0
    mu   <- eta
    yr   <- replicate(r, rnorm(n, mean = mu, sd = 1/sqrt(phip)))
    
    fit0 <- apply(yr, 2, function(yy) glm(yy ~ x5 + x6, family = gaussian()))
    
    out   <- sapply(fit0, function(z) mdscore(z, X1 = X[, 2:4], phi = NULL))
    sr    <- as.numeric(out[1, ])
    srcor <- as.numeric(out[2, ])
    
    fitf <- apply(yr, 2, 
                  function(yy) glm(yy ~ x2 + x3 + x4 + x5 + x6, 
                                   family = gaussian()))
    
    wld <- sapply(1:r, waldfun, f1 = fitf, terms = 2:4)
    lrt <- sapply(1:r, lrtfun, f0 = fit0, f1 = fitf)
    
    if(any(sr <= 0))    stop("Score statistic has non-positive values")
    if(any(srcor <= 0)) stop("Mod. score statistic has non-positive values")
    if(any(wld <= 0))   stop("Wald statistic has non-positive values")
    if(any(lrt <= 0))   stop("LRT statistic has non-positive values")
    
    res <- cbind(wld, lrt, sr, srcor)    
    colnames(res) <- c(paste("wldn", n, sep = ""),
                       paste("lrtn", n, sep = ""),  
                       paste("srn", n, sep = ""), 
                       paste("srcorn", n, sep = ""))
    resf <- cbind(resf, res)
    rm(fit0, fitf, wld, lrt, sr, srcor, res)
    
  }
  return(resf)
  
}

alpha <- c(0.10, 0.05, 0.01)
nvec  <- c(20, 25, 30, 40)
beta0 <- c(20, 0, 0, 0, 10, 10)
phip  <- 4

stats <- simul.norm(r = 30000, nvec = nvec, beta0 = beta0, phip = phip)
type1 <- rbind(apply(stats[, 1:4],   2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 1:4],   2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 1:4],   2, rate1, aa = alpha[3], df = 3),
               apply(stats[, 5:8],   2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 5:8],   2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 5:8],   2, rate1, aa = alpha[3], df = 3),
               apply(stats[, 9:12],  2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 9:12],  2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 9:12],  2, rate1, aa = alpha[3], df = 3),
               apply(stats[, 13:16], 2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 13:16], 2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 13:16], 2, rate1, aa = alpha[3], df = 3))
colnames(type1) <- c("Wald", "LR", "Score", "ModScore")

nlab <- c(nvec[1], "", "", nvec[2], "", "", 
          nvec[3], "", "", nvec[4], "", "")
alab <- rep(100 * alpha, 4)
type1 <- data.frame(nlab, alab, type1)

# Results for Table 1
type1
print(xtable(type1, digits = c(0, 0, 0, 1, 1, 1, 1)),
      include.rownames = FALSE,
      file = "normal-simul.tex")

#------------------------4.2 Gamma results----------------------------------# 
simul.gamma <- function(r, nvec, alpha, beta0, phip){
  
  set.seed(123)
  resf <- NULL
  a <- 2; b <- 0.2
  
  for (n in nvec){
    
    x2 <- round(rgamma(n, shape = a, scale = b), 1)
    x3 <- round(rgamma(n, shape = a, scale = b), 1)
    x4 <- round(rgamma(n, shape = a, scale = b), 1)
    x5 <- round(rgamma(n, shape = a, scale = b), 1)
    x6 <- round(rgamma(n, shape = a, scale = b), 1)
    
    X    <- model.matrix(~ x2 + x3 + x4 + x5 + x6)
    eta  <- X %*% beta0
    mu   <- exp(eta)
    yr   <- replicate(r, rgamma(n, shape = phip, scale = mu/phip))
    fit0 <- apply(yr, 2, function(yy) glm(yy ~ x5 + x6, family = Gamma("log")))
    
    out   <- sapply(fit0, function(z) mdscore(z, X1 = X[, 2:4], phi = NULL))
    sr    <- as.numeric(out[1, ])
    srcor <- as.numeric(out[2, ])
    
    fitf <- apply(yr, 2, 
                  function(yy) glm(yy ~ x2 + x3 + x4 + x5 + x6, 
                                   family = Gamma("log")))
    
    wld <- sapply(1:r, waldfun, f1 = fitf, terms = 2:4)
    lrt <- sapply(1:r, lrtfun, f0 = fit0, f1 = fitf)
    
    res <- cbind(wld, lrt, sr, srcor)    
    colnames(res) <- c(paste("wldn", n, sep = ""),
                       paste("lrtn", n, sep = ""),  
                       paste("srn", n, sep = ""), 
                       paste("srcorn", n, sep = ""))
    resf <- cbind(resf, res)
    rm(fit0, fitf, wld, lrt, sr, srcor, res)
    
  }
  return(resf)
  
}

alpha <- c(0.10, 0.05, 0.01)
nvec  <- c(20, 25, 30, 40)
beta0 <- c(2, 0, 0, 0, 1, 1)
phip  <- 40

stats <- simul.gamma(r = 30000, nvec = nvec, beta0 = beta0, phip = phip)
type1 <- rbind(apply(stats[,1:4],   2, rate1, aa = alpha[1], df = 3),
               apply(stats[,1:4],   2, rate1, aa = alpha[2], df = 3),
               apply(stats[,1:4],   2, rate1, aa = alpha[3], df = 3),
               apply(stats[,5:8],   2, rate1, aa = alpha[1], df = 3),
               apply(stats[,5:8],   2, rate1, aa = alpha[2], df = 3),
               apply(stats[,5:8],   2, rate1, aa = alpha[3], df = 3),
               apply(stats[,9:12],  2, rate1, aa = alpha[1], df = 3),
               apply(stats[,9:12],  2, rate1, aa = alpha[2], df = 3),
               apply(stats[,9:12],  2, rate1, aa = alpha[3], df = 3),
               apply(stats[,13:16], 2, rate1, aa = alpha[1], df = 3),
               apply(stats[,13:16], 2, rate1, aa = alpha[2], df = 3),
               apply(stats[,13:16], 2, rate1, aa = alpha[3], df = 3))
colnames(type1) <- c("Wald", "LR", "Score", "ModScore")

nlab <- c(nvec[1], "", "", nvec[2], "", "", 
          nvec[3], "", "", nvec[4], "", "")
alab <- rep(100 * alpha, 4)
type1 <- data.frame(nlab, alab, type1)

# Results for Table 1
type1
print(xtable(type1, digits = c(0, 0, 0, 1, 1, 1, 1)),
      include.rownames = FALSE,
      file = "gamma-simul.tex")

#------------------------4.3 Inverse Gaussian results-----------------------# 
simul.invg <- function(r, nvec, alpha, beta0, phip){
  
  set.seed(123)
  resf <- NULL
  a <- 2; b <- 0.2
  
  for (n in nvec){
    
    x2 <- rgamma(n, shape = a, scale = b)
    x3 <- rgamma(n, shape = a, scale = b)
    x4 <- rgamma(n, shape = a, scale = b)
    x5 <- rgamma(n, shape = a, scale = b)
    x6 <- rgamma(n, shape = a, scale = b)
    
    X    <- model.matrix(~ x2 + x3 + x4 + x5 + x6)
    eta  <- X %*% beta0
    mu   <- 1/eta
    yr   <- replicate(r, rinv.gaussian(n, mu = mu, lambda = phip))
    
    fit0 <- apply(yr, 2, function(yy) glm(yy ~ x5 + x6, family = inverse.gaussian("inverse")))
    
    out   <- sapply(fit0, function(z) mdscore(z, X1 = X[, 2:4], phi = NULL))
    sr    <- as.numeric(out[1, ])
    srcor <- as.numeric(out[2, ])
    
    fitf <- apply(yr, 2, 
                  function(yy) glm(yy ~ x2 + x3 + x4 + x5 + x6, 
                                   family = inverse.gaussian("inverse")))
    
    wld <- sapply(1:r, waldfun, f1 = fitf, terms = 2:4)
    lrt <- sapply(1:r, lrtfun, f0 = fit0, f1 = fitf)
    
    if(any(sr <= 0))    stop("Score statistic has non-positive values")
    if(any(srcor <= 0)) stop("Mod. score statistic has non-positive values")
    if(any(wld <= 0))   stop("Wald statistic has non-positive values")
    if(any(lrt <= 0))   stop("LRT statistic has non-positive values")
    
    res <- cbind(wld, lrt, sr, srcor)    
    colnames(res) <- c(paste("wldn", n, sep = ""),
                       paste("lrtn", n, sep = ""),  
                       paste("srn", n, sep = ""), 
                       paste("srcorn", n, sep = ""))
    resf <- cbind(resf, res)
    rm(fit0, fitf, wld, lrt, sr, srcor, res)
    
  }
  return(resf)
  
}

alpha <- c(0.10, 0.05, 0.01)
nvec  <- c(20, 25, 30, 40)
beta0 <- c(2, 0, 0, 0, 1, 1)/15
phip  <- 40

stats <- simul.invg(r = 30000, nvec = nvec, beta0 = beta0, phip = phip)
type1 <- rbind(apply(stats[, 1:4],   2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 1:4],   2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 1:4],   2, rate1, aa = alpha[3], df = 3),
               apply(stats[, 5:8],   2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 5:8],   2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 5:8],   2, rate1, aa = alpha[3], df = 3),
               apply(stats[, 9:12],  2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 9:12],  2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 9:12],  2, rate1, aa = alpha[3], df = 3),
               apply(stats[, 13:16], 2, rate1, aa = alpha[1], df = 3),
               apply(stats[, 13:16], 2, rate1, aa = alpha[2], df = 3),
               apply(stats[, 13:16], 2, rate1, aa = alpha[3], df = 3))
colnames(type1) <- c("Wald", "LR", "Score", "ModScore")

nlab <- c(nvec[1], "", "", nvec[2], "", "", 
          nvec[3], "", "", nvec[4], "", "")
alab <- rep(100 * alpha, 4)
type1 <- data.frame(nlab, alab, type1)

# Results for Table 1
type1
print(xtable(type1, digits = c(0, 0, 0, 1, 1, 1, 1)),
      include.rownames = FALSE,
      file = "invg-simul.tex")
rm(list = ls())

#--------------------------------5. Applications----------------------------#

#-----------------------------Example 5.1-----------------------------------# 
options(prompt = "R> ", continue = "+  ", width = 70, useFancyQuotes = FALSE)
library("mdscore")
data("case1102", package = "Sleuth3")
d <- transform(case1102, TLrat = Brain/Liver, Ltime = log(Time),
               Lwrat = log((Weight+Loss)/Weight),
               Treat = factor(Treatment == "BD", labels = c("NS", "BD")))
fitf <- glm(TLrat ~ Ltime * Treat + Days  + Sex + Lwrat + Tumor +
              Treat * Lwrat, data = d, family = Gamma("log"))
summary(fitf, dispersion = gamma.dispersion(fitf))
gamma.shape(fitf)

est <- c(as.numeric(coef(fitf)), gamma.shape(fitf)[[1]])
est <- c(coef(fitf), gamma.shape(fitf)[[1]])
se  <- c(sqrt(diag(vcov(fitf, dispersion = 1/gamma.shape(fitf)$alpha))),
         gamma.shape(fitf)[[2]])
ests <- data.frame(Estimate = round(est, 5), SE = round(se, 5))

# Results for Table 2
xtable(ests, digits = c(0, 3, 3))

X <- model.matrix(fitf)
fit0 <- glm(TLrat ~ Ltime * Treat + Sex + Lwrat + Tumor + Days,
            data = d, family = Gamma("log"))

test <- mdscore(fit0, X1 = X[,9], phi = NULL)
summary(test)
sw  <- wald.test(fitf, terms = 9)$W
sw  <- c(sw, pchisq(sw, df = test$df, lower = FALSE))
slr <- lr.test(fit1 = fit0,fit2 = fitf)$LR
slr <- c(slr, pchisq(slr, df = test$df, lower = FALSE))
sr  <- c(test$Sr, pchisq(test$Sr, df = test$df, lower = FALSE))
src <- c(test$Sr_cor, pchisq(test$Sr_cor, df = test$df, lower = FALSE))
res <- data.frame(Sw = round(sw, 4), SLR = round(slr, 4),
                  SR = round(sr, 4), SRc = round(src, 4))

# Results for Table 3
library("xtable")
print(xtable(res, digits = c(0,4, 4,4,4)), include.rownames = FALSE)

### Plot for Figure 1. The following code was adapted from the routines 
### provided by Gilberto Alvarenga de Paula and are available at
### http://www.ime.usp.br/~giapaula/envel_gama.

fit.model <- fitf
par(mfrow = c(1,1))
X <- model.matrix(fit.model)
n <- nrow(X)
p <- ncol(X)
w <- fit.model$weights
W <- diag(w)
H <- solve(t(X) %*% W %*% X)
H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W)
h <- diag(H)
ro <- resid(fit.model,type = "response")
fi <- (n - p)/sum((ro/(fitted(fit.model)))^2)
td <- resid(fit.model,type = "deviance") * sqrt(fi/(1 - h))
#
e <- matrix(0, n, 100)
#
for(i in 1:100){
  resp <- rgamma(n,fi)
  resp <- (fitted(fit.model)/fi) * resp
  fit <- glm(resp ~ X, family = Gamma(link = log))
  w <- fit$weights
  W <- diag(w)
  H <- solve(t(X) %*% W %*% X)
  H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W)
  h <- diag(H)
  ro <- resid(fit,type = "response")
  phi <- (n - p)/sum((ro/(fitted(fit)))^2)
  e[, i] <- sort(resid(fit, type = "deviance") * sqrt(phi/(1 - h)))
}
#
e1 <- numeric(n)
e2 <- numeric(n)
#
for(i in 1:n){
  eo <- sort(e[i, ])
  e1[i] <- (eo[2] + eo[3])/2
  e2[i] <- (eo[97] + eo[98])/2}
#
med <- apply(e, 1, mean)
faixa <- range(td, e1, e2)
par(pty = "s")
qqnorm(td, xlab = "Standard normal quantile",
       ylab = "Deviance residual", ylim = faixa, pch = 16,
       main = "")
par(new = TRUE)
#
qqnorm(e1, main = "", axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 1, cex = 0.6)
par(new = T)
qqnorm(e2, main = "", axes = F, xlab = "", ylab = "",  type = "l", ylim = faixa, lty = 1, cex = 0.6)
par(new = T)
qqnorm(med, main = "", axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 2, cex = 0.6)
rm(list = ls())

#-----------------------------Example 5.2-----------------------------------# 
options(prompt = "R> ", continue = "+  ", width = 70, useFancyQuotes = FALSE)
library("mdscore")

fit.model <- glm(y ~ cut * lot, data = strength, 
                 family = inverse.gaussian("inverse"))

fitf <- glm(y ~ cut * lot, data = strength, 
            family = inverse.gaussian("inverse"))
summary(fitf)

X    <- model.matrix(fit.model, data = strength)
fit0 <- glm(y ~ cut + lot, data = strength, 
            family = inverse.gaussian("inverse"))

test <- mdscore(fit0, X1 = X[, 7:10])
summary(test)

sw   <- wald.test(fitf, terms = 7:10)$W
sw   <- c(sw, pchisq(sw, df = test$df, lower = FALSE))
slr  <- lr.test(fit1 = fit0, fit2 = fitf)$LR
slr  <- c(slr, pchisq(slr, df = test$df, lower = FALSE))
sr   <- c(test$Sr, pchisq(test$Sr, df = test$df, lower = FALSE))
src  <- c(test$Sr_cor, pchisq(test$Sr_cor, df = test$df, lower = FALSE))
res  <- data.frame(Sw = round(sw, 4), SLR = round(slr, 4),
                   SR = round(sr, 4), SRc = round(src, 4))

# Results for Table 4
library("xtable")
print(xtable(res, digits = c(0, 4, 4, 4, 4)), include.rownames = FALSE)

### Plot for Figure 2. The following code was adapted from the routines 
### provided by Gilberto Alvarenga de Paula and are available at
### http://www.ime.usp.br/~giapaula/envel_ninv.

fit.model <- fitf
rig <- function(n, mu = stop("no shape arg"), lambda = 1)
{
  #  Random variates from inverse Gaussian distribution
  #  Reference:
  #      Chhikara and Folks, The Inverse Gaussian Distribution,
  #      Marcel Dekker, 1989, page 53.
  #  GKS  15 Jan 98
  #
  if (any(mu <= 0)) stop("mu must be positive")
  if (any(lambda <= 0)) stop("lambda must be positive")
  if (length(n) > 1) n <- length(n)
  if (length(mu) > 1 && length(mu) != n) mu <- rep(mu, length = n)
  if (length(lambda) > 1 && length(lambda) != n) lambda <- rep(lambda, length = n)
  y2 <- rchisq(n, 1)
  u <- runif(n)
  r1 <- mu/(2 * lambda) * (2 * lambda + mu * y2 - sqrt(4 * lambda * mu * y2 + mu^2 * y2^2))
  r2 <- mu^2/r1
  ifelse(u < mu/(mu + r1), r1, r2)
}
#----------------------------------------------------------------#                     
#
#par(mfrow=c(1, 1))
X <- model.matrix(fit.model)
n <- nrow(X)
p <- ncol(X)
w <- fit.model$weights
mu <- predict(fit.model, type = "response")
W <- diag(w)
H <- solve(t(X) %*% W %*% X)
H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W)
h <- diag(H)
ro <- resid(fit.model,type="response")
fi <- (n - p)/sum((ro^2)/(fitted(fit.model)^3))
td <- resid(fit.model,type = "deviance")*sqrt(fi/(1 - h))
#
e <- matrix(0, n, 100)
#
for(i in 1:100){
  resp <- rig(n, mu, fi)
  #fit <- glm(resp ~ X, family = inverse.gaussian(link = identity))
  fit <- glm(resp ~ X, family = inverse.gaussian("inverse"))
  w <- fit$weights
  W <- diag(w)
  H <- solve(t(X) %*% W %*% X)
  H <- sqrt(W) %*% X %*% H %*% t(X) %*% sqrt(W)
  h <- diag(H)
  ro <- resid(fit, type = "response")
  phi <- (n - p)/sum((ro^2)/(fitted(fit)^3))
  e[, i] <- sort(resid(fit, type = "deviance")*sqrt(phi/(1 - h)))}
#
e1 <- numeric(n)
e2 <- numeric(n)
#
for(i in 1:n){
  eo <- sort(e[i, ])
  e1[i] <- (eo[2] + eo[3])/2
  e2[i] <- (eo[97] + eo[98])/2}
#
med <- apply(e, 1, mean)
faixa <- range(td, e1, e2)
par(pty = "s")
qqnorm(td,xlab = "Standard normal quantile",
       ylab = "Deviance residual", ylim = faixa, pch = 16)
par(new = TRUE)
#
qqnorm(e1, axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 1)
par(new = TRUE)
qqnorm(e2, axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 1)
par(new = TRUE)
qqnorm(med,axes = F, xlab = "", ylab = "", type = "l", ylim = faixa, lty = 2)