######################################################################
## SQUAREM: An R Package for Off-the-Shelf Acceleration of EM
## License: GPL-2 | GPL-3 [expanded from: GPL (≥ 2)]
## Replication Material
######################################################################
library("SQUAREM")

## 4 Poisson Mixtures
## log likelihood function poissmix.loglik
poissmix.loglik <- function(p, y) {
    i <- 0:(length(y)-1)
    loglik <- y*log(p[1]*exp(-p[2])*p[2]^i/exp(lgamma(i+1)) + 
                    (1 - p[1])*exp(-p[3])*p[3]^i/exp(lgamma(i+1)))
    ## returns negative log likelihood of data
    return(-sum(loglik))
}

## basic EM algorithm
EM.poisson.mixture <- function(p, maxiter = 5000, tol = 1e-08, y) {
    ## y is the data
    iter <- 1
    conv <- FALSE
    pnew <- rep(NA_real_, 3)
    while (iter < maxiter) {
        ##  start of one EM step
        i <- 0:(length(y)-1)
        zi <- p[1]*exp(-p[2])*p[2]^i / 
            (p[1]*exp(-p[2])*p[2]^i + 
             (1 - p[1])*exp(-p[3])*p[3]^i)
        pnew[1] <- sum(y*zi)/sum(y)
        pnew[2] <- sum(y*i*zi)/sum(y*zi)
        pnew[3] <- sum(y*i*(1-zi))/sum(y*(1-zi))
        res <- sqrt(crossprod(pnew - p))
        p <- pnew
        ##  end of one EM step
        if (res < tol) {
            conv <- TRUE
            break
        }
        iter <- iter + 1
    }
    loglik <- poissmix.loglik(p, y = y)
    return(list(par = p, value.objfn = loglik, 
                fpevals = iter, convergence = conv))
}

## function poissmix.em argument for fixptfn in squarem()
poissmix.em <- function(p, y) {
    ## The fixed point mapping giving a single iteration of 
    ## the original EM algorithm
    ## y is the data        
    pnew <- rep(NA_real_, 3)
    i <- 0:(length(y)-1)
    zi <- p[1]*exp(-p[2])*p[2]^i/(p[1]*exp(-p[2])*p[2]^i + 
                                  (1 - p[1])*exp(-p[3])*p[3]^i)
    pnew[1] <- sum(y*zi)/sum(y)
    pnew[2] <- sum(y*i*zi)/sum(y*zi)
    pnew[3] <- sum(y*i*(1-zi))/sum(y*(1-zi))
    p <- pnew
    return(pnew)
}


## Compare Squarem and basic EM algorithm
poissmix.dat <- data.frame(death = 0:9, 
                           freq = c(162, 267, 271, 185, 111, 61, 27, 8, 3, 1))
y <- poissmix.dat$freq
p0 <- c(0.3, 1, 5)

## EM standard loop 
system.time(f0 <- EM.poisson.mixture(p = p0, y = y))
f0

## EM fpiter
system.time(f1 <- fpiter(par = p0, fixptfn = poissmix.em, 
                         objfn = poissmix.loglik, control = list(tol = 1.e-08), y = y))
f1
## Squarem
system.time(f2 <- squarem(par = p0, fixptfn = poissmix.em, 
                          objfn = poissmix.loglik, control = list(tol = 1.e-08), y = y)) 
f2

## randomly generate 5000 starting values and compare
set.seed(407)
sim <- 5000
tem <- tsqem <- fem <- fsqem <- rep(NA_real_, sim)
for (i in 1:sim) {
    print(i)
    p0 <- c(runif(1, 0.05, 0.95), runif(2, 0, 20))
    tem[i] <- system.time(f1 <- fpiter(par = p0, 
                                       fixptfn = poissmix.em, 
                                       objfn = poissmix.loglik, 
                                       control = list(tol = 1.e-08, 
                                                      maxiter = 10000), y = y))[3]
    fem[i] <- f1$fpevals
    tsqem[i] <- system.time(f2 <- squarem(par = p0, 
                                          fixptfn = poissmix.em, 
                                          objfn = poissmix.loglik, 
                                          control = list(tol = 1.e-08, 
                                                         maxiter = 4000), y = y))[3]
    fsqem[i] <- f2$fpevals
    print(c(f1$convergence, f2$convergence))
}
quantile(tem, probs = c(0.025, 0.5, 0.975))
quantile(tsqem, probs = c(0.025, 0.5, 0.975))
quantile(fem, probs = c(0.025, 0.5, 0.975))
quantile(fsqem, probs = c(0.025, 0.5, 0.975))

## Compute Error Curve
poissmix.dat <- data.frame(death = 0:9,
                           freq = c(162, 267, 271, 185, 111, 61, 27, 8, 3, 1))
y <- poissmix.dat$freq
p0 <- c(0.3, 1, 5)
true.theta <- squarem(par = p0, fixptfn = poissmix.em, objfn = poissmix.loglik, control = list(tol = 1.e-18), y = y)$par 
squarem1 <- function(par, fixptfn, objfn, ... , control = list()) {
    ## par = starting value of parameter vector
    ## fixptfn = fixed-point iteration F(x)
    ## for which the solution: F(x*) = x* is sought
    ## objfn = underlying objective function which is minimized at x*

    control.default <- list(K = 1, square = TRUE, method = 3, step.min0 = 1, step.max0 = 1, mstep = 4, kr = 1, objfn.inc = 1, 
                            tol = 1.e-07, maxiter = 1500, trace = FALSE)
        
    namc <- names(control)
    if (!all(namc %in% names(control.default))) 
        stop("unknown names in control: ", namc[!(namc %in% names(control.default))])
    ctrl <- modifyList(control.default, control)
        
    ##
    ## method = 1, 2, or 3, indicating the type of steplength to be used
    ## A key parameter is `step.min0'. This can be either positive or negative if the eigenvalues of the Jacobian of fixed-point mapping are all positive;
    ## We set it to +1 for contraction mappings such as EM and MM algorithms
    ## Must be negative, e.g. -1, if the fixed-point mapping can have negative eiegnvalues
    ##
    ## parameter "objfn.inc" dictates the amount of non-monotonicity in the objective function 
    ## setting objfn.inc=0 would enforce monotonicity, whereas objfn.inc=Inf would be a non-monotonic scheme
    ## The defalut objfn.inc=1 would enforce monotonicity far from solution, but allows for non-monotonicity closer to solution
    ##
    method <- ctrl$method
    maxiter <- ctrl$maxiter
    tol <- ctrl$tol
    step.min <- ctrl$step.min0
    step.max0 <- ctrl$step.max0
    step.max <- ctrl$step.max0
    mstep <- ctrl$mstep
    objfn.inc <- ctrl$objfn.inc
    trace <- ctrl$trace
    
    if (trace) cat("Squarem-1 \n")
    
    if (missing(objfn)) stop("\n squarem2 should be used if objective function is not available \n\n")
        
    iter <- 1
    p <- par
        
    lold <- objfn(p, ...)
    leval <- 1
    if (trace) cat("Objective fn: ", lold, "\n")
    feval <- 0
    conv <- TRUE
    error <- NULL
    while (feval < maxiter) {
        extrap <- TRUE
        p1 <- try(fixptfn(p, ...), silent = TRUE)
        feval <- feval + 1
        if (is(p1, "try-error") || any(is.nan(unlist(p1)))) stop("Error in function evaluation")
        q1 <- p1 - p
        sr2 <- crossprod(q1)
        if (sqrt(sr2) < tol) break
        
        p2 <- try(fixptfn(p1, ...), silent = TRUE)
        feval <- feval + 1
        if (is(p2, "try-error") || any(is.nan(unlist(p2)))) stop("Error in function evaluation")
        
        q2 <- p2 - p1
        sq2 <- sqrt(crossprod(q2))
        if (sq2 < tol) break
        sv2 <- crossprod(q2-q1)
        srv <- crossprod(q1, q2-q1)
                
        alpha <- switch(method, -srv/sv2, -sr2/srv, sqrt(sr2/sv2)) 
        
        alpha <- max(step.min, min(step.max, alpha))
        p.new <- p + 2*alpha*q1 + alpha^2*(q2-q1)
        if (abs(alpha - 1) > 0.01 ) {
            p.new <- try(fixptfn(p.new , ...), silent=TRUE)   # stabilization step
            feval <- feval + 1
        }
        
        if (is(p.new, "try-error") || any(is.nan(p.new))) {
            p.new <- p2
            lnew <- try(objfn(p2, ...), silent = TRUE)
            leval <- leval + 1
            if (alpha == step.max) step.max <- max(step.max0, step.max/mstep)
            alpha <- 1
            extrap <- FALSE
        } else {
            if (is.finite(objfn.inc)) {
                lnew <- try(objfn(p.new, ...), silent = TRUE)
                leval <- leval + 1
            } else lnew <- lold
            if (is(lnew, "try-error") || is.nan(lnew) || 
                (lnew > lold + objfn.inc)) {
                p.new <- p2
                lnew <- try(objfn(p2, ...), silent = TRUE)
                leval <- leval + 1
                if (alpha == step.max) step.max <- max(step.max0, step.max/mstep)
                alpha <- 1
                extrap <- FALSE
            }
        }	
        
        if (alpha == step.max) step.max <- mstep*step.max
        if (step.min < 0 & alpha == step.min) step.min <- mstep*step.min
        
        p <- p.new
        if (!is.nan(lnew)) lold <- lnew 
        if (trace) cat("Objective fn: ", lnew, "  Extrapolation: ", extrap, "  Steplength: ", alpha, "\n")
        iter <- iter + 1
        error <- rbind(error, c(feval, sqrt(crossprod(p - true.theta))))
        
    }
    if (feval > maxiter) conv <- FALSE
    if (is.infinite(objfn.inc)) {
        lold <- objfn(p, ...)
        leval <- leval + 1
    }
    
    return(list(par = p, value.objfn = lold, iter =  iter, fpevals = feval, objfevals  =  leval, convergence = conv, error = error))
}

fpiter1 <- function(par, fixptfn, objfn = NULL, control = list(), ...) {
    control.default <- list(tol = 1e-07, maxiter = 5000, trace = FALSE)
    namc <- names(control)
    if (!all(namc %in% names(control.default))) 
        stop("unknown names in control: ", namc[!(namc %in% names(control.default))])
    ctrl <- modifyList(control.default, control)
    tol <- ctrl$tol
    maxiter <- ctrl$maxiter
    trace <- ctrl$trace
    if (trace) 
        cat("fpiter \n")
    iter <- 1
    objeval <- 0
    conv <- FALSE
    error <- NULL
    while (iter < maxiter) {
        p.new <- fixptfn(par, ...)
        res <- sqrt(crossprod(p.new - par))
        if (res < tol) {
            conv <- TRUE
            break
        }
        if (trace) {
            if (!is.null(objfn)) {
                cat("Iter: ", iter, "Objective fn: ", objfn(par, 
                                                            ...), "\n")
                objeval <- objeval + 1
            }
            else cat("Iter: ", iter, "Residual: ", res, "\n")
        }
        par <- p.new
        iter <- iter + 1
        error <- rbind(error, c(iter, sqrt(crossprod(par - true.theta))))
    }
    loglik.best <- if (!is.null(objfn)) 
                       objfn(par, ...)
                   else NA_real_
    return(list(par = par, value.objfn = loglik.best, fpevals = iter, 
                objfevals = objeval, convergence = conv, error = error))
}

errsq <- squarem1(par = p0, fixptfn = poissmix.em, objfn = poissmix.loglik,
                  control = list(tol = 1.e-08), y = y)$error
errem <- fpiter1(par = p0, fixptfn = poissmix.em, objfn = poissmix.loglik,
                 control = list(tol = 1.e-08), y = y)$error

err.data <- data.frame(rbind(errsq, errem))
err.data$group <- rep(c("Squarem", "EM"), c(nrow(errsq), nrow(errem)))
names(err.data) <- c("feval", "error", "group")
library("ggplot2")
library("latex2exp")
p <- ggplot(err.data[err.data$feval <= 100, ], aes(x = feval, y = error, color = group)) + 
    geom_point() + geom_line() + theme_bw() + xlab("The number of EM Evaluations") + scale_y_log10() + 
    ylab(TeX(paste("Error", "$||\\theta^{(k)} - \\theta^{*}||$", "(log10 scale)"))) + 
    guides(colour = guide_legend(title = "Algorithm")) + 
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 15), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
p

## 5.1 Interval Censoring
library("interval")
## Function to compute alpha matrix and intervals
Aintmap <- function(L, R, Lin = NULL, Rin = NULL) {
    n <- length(L)
    Lin <- rep(FALSE, n)
    Rin <- rep(TRUE, n)
    Lin[L == R] <- TRUE
    Rin[R == Inf] <- FALSE
    if(n != length(R))
        stop("length of L and R must be the same")
    LRvalues <- sort(unique(c(0, L, R, Inf)))
    eps <- min(diff(LRvalues))/2
    Le <- L
    Re <- R
    Le[!Lin] <- L[!Lin]+eps
    Re[!Rin] <- R[!Rin]-eps
    oLR <- order(c(Le, Re+eps/2))
    Leq1.Req2 <- c(rep(1, n), rep(2, n))
    flag <- c(0, diff(Leq1.Req2[oLR]))
    R.right.of.L <- (1:(2*n))[flag == 1]
    intmapR <- c(L, R)[oLR][R.right.of.L]
    intmapL <- c(L, R)[oLR][R.right.of.L - 1]
    intmapRin <- c(Lin, Rin)[oLR][R.right.of.L]
    intmapLin <- c(Lin, Rin)[oLR][R.right.of.L - 1]
    intmap <- matrix(c(intmapL, intmapR), byrow = TRUE, nrow = 2)
    attr(intmap, "LRin") <- matrix(c(intmapLin, intmapRin), byrow = TRUE, nrow = 2)
    k <- dim(intmap)[[2]]
    Lbracket <- rep("(", k)
    Lbracket[intmapLin] <- "["
    Rbracket <- rep(")", k)
    Rbracket[intmapRin] <- "]"
    intname <- paste(Lbracket, intmapL, ", ", intmapR, Rbracket, sep = "")
    A <- matrix(0, n, k, dimnames = list(1:n, intname))
    intmapLe <- intmapL
    intmapLe[!intmapLin] <- intmapL[!intmapLin]+eps
    intmapRe <- intmapR
    intmapRe[!intmapRin] <- intmapR[!intmapRin]-eps
    for (i in 1:n) {
        tempint <- Le[i] <= intmapRe & Re[i] >= intmapLe
        A[i, tempint] <- 1
    }
        
    if (k==1 & intmap[1, 1] == 0 & intmap[2, 1] == Inf) A[A == 0] <- 1  
    return(A)
}

## A wrapper to EM and Squarem
IntervalCensor <- function(Imat,  algorithm = c("EM", "Squarem"), 
                           control.setting = list()) {
        ## Imat = a 2-column matrix containing 
        ## the intervals for each individual
        if (is.list(Imat)) Imat <- as.matrix(Imat)
        N <- nrow(Imat)
        
        ## Use Michael Fay's function to generate `A' matrix
        A <- Aintmap(Imat[, 1], Imat[, 2])
        m <- ncol(A)
        tA <- t(A)
        
        ## EM mapping - a single EM iteration
        intEM <- function(pvec) {  
            Ap <- pvec*tA
            pnew <- colMeans(t(Ap)/colSums(Ap))
            pnew * (pnew > 0)
        }
        
        ## Function of log-likelihood
        loglik <- function(pvec) {
            - sum(log(c(A %*% pvec)))
        }
        
        ## starting values
        pvec <- rep(1/m, length = m)
        
        if (algorithm == "Squarem") {
            ## SQUAREM acceleration
            control.sq.default <- list(K = 1, maxiter = 10000, 
                                       tol = 1.e-08)
            control <- modifyList(control.sq.default, control.setting)
            ans <- squarem(par = pvec, fixptfn = intEM, objfn = loglik, 
                           control = control)
            pvec <- ans$par
            iter <- ans$fpevals
        } else {
            control.em.default <- list(tol = 1.e-08, maxiter = 20000)
            control <- modifyList(control.em.default, control.setting)
            ans <- fpiter(par = pvec, fixptfn = intEM, objfn = loglik, 
                          control = control)
            pvec <- ans$par
            iter <- ans$fpevals
        }
        return(list(par = round(pvec, 4), 
                    loglik = -round(loglik(pvec = pvec), 4), 
                    iters = iter, convergence = ans$convergence))
}

## Modified EM-ICM to have same starting values
EMICMmac.mod <- function(A, EMstep = TRUE, ICMstep = TRUE, 
                         keepiter = FALSE, tol = 1e-07, 
                         tolbis = 1e-07, maxiter = 1000) {
    if (!EMstep && !ICMstep) {
        print("One of EMstep or ICMstep must be true.")
        return(NULL)
    }
    Meps <- .Machine$double.eps
    m <- dim(A)[1]
    n <- dim(A)[2]
    tA <- t(A)
    if (m == 1) {
        ret <- NULL
        ret$sigma <- 1
        ret$weights <- n
        ret$lastchange <- 0
        ret$numiter <- 0
        return(ret)
    }
    WW <- matrix(0, m, n)
    for (i in 1:(m - 1)) WW[i, ] <- A[i, ] - A[i + 1, ]
    WW[m, ] <- A[m, ]
    pvec <- rep(1/m, length = m)
    sigma <- cumsum(pvec)
    numiter <- 0
    oldsigma <- rep(-1, m)
    if (keepiter) 
        iter <- sigma
    while (sqrt(crossprod(oldsigma - sigma)) > tol && 
           numiter <= maxiter) {
               oldsigma <- sigma
               if (EMstep) {
                   pvec <- diff(c(0, sigma))
                   temp <- sweep(A, 1, pvec, FUN = "*")
                   if (sum(apply(temp, 2, sum) == 0) == 0) {
                       pvec <- apply(sweep(temp, 2, 
                                           apply(temp, 2, sum), 
                                           FUN = "/"), 1, sum)
                       sigma <- cumsum(pvec)/sum(pvec)
                   }
                   if (keepiter) 
                       iter <- rbind(iter, sigma)
               }
               if (ICMstep) {
                   Wps <- 1/(t(WW) %*% sigma)
                   weights <- (abs(WW) %*% Wps^2)
                   increment <- as.vector((WW %*% Wps)/weights)
                   sigma <- sigma + increment
                   sigma[m] <- 1
                   if (keepiter) 
                       iter <- rbind(iter, sigma)
                   temp <- PMGA(sigma[-m], weights[-m])
                   poolnum <- c(0, cumsum(temp$poolnum))
                   for (i in 2:length(poolnum)) for (j in (poolnum[i - 
                                                                   1] + 1):poolnum[i]) sigma[j] <- temp$est[i - 1]
                   if (keepiter) 
                       iter <- rbind(iter, sigma)
                   temp <- c(0, sigma)
                   pvec <- diff(c(0, oldsigma))
                   ndir <- diff(c(0, temp[2:(m + 1)])) - pvec
                   pvec <- Bisect(tA, pvec, ndir, Meps, tolbis = 1e-07)
                   sigma <- cumsum(pvec)
                   if (keepiter) 
                       iter <- rbind(iter, sigma)
               }
               numiter <- numiter + 1
           }
    if (numiter == maxiter) 
        warning("EM/ICM may have failed to converge.")
    pf <- diff(c(0, sigma))
    ret <- list(sigma = sigma, pf = pf, llk = sum(log(t(A) %*% pf)), 
                weights = as.vector(weights), 
                lastchange = sigma - oldsigma, numiter = numiter, 
                eps = tol)
    if (keepiter) {
        if (EMstep && ICMstep) 
            dimnames(iter) <- list(c("Seed", rep(c("EM", "Fisher", 
                                                   "PMGA", "Bisect"), numiter)), NULL)
        else if (EMstep) 
            dimnames(iter) <- list(rep("EM", numiter + 1), NULL)
        else dimnames(iter) <- list(c("Seed", rep(c("Fisher", 
                                                    "PMGA"), numiter)), NULL)
        ret$iter <- iter
    }
    ret
}

EMICM.mod <- function (A, EMstep = TRUE, ICMstep = TRUE, 
                       keepiter = FALSE, tol = 1e-07, maxiter = 1000) {
    if (ncol(A) == 2 && all(A[, 2] >= A[, 1])) {
        ml <- Maclist(A)
        intmap <- t(MLEintvl(A, ml))
        A <- Macmat(ml)$pmat
    }
    else intmap <- NULL
    temp <- EMICMmac.mod(A, EMstep = EMstep, ICMstep = ICMstep, 
                         keepiter = keepiter, 
                         tol = tol, maxiter = maxiter)
    if (is.null(temp)) 
        return(NULL)
    class(temp) <- "icsurv"
    temp$intmap <- intmap
        temp
}

## real data
dat <- c(45, 6, 0, 46, 46, 7, 17, 7, 37, 0, 4, 15, 11,  
         22, 46, 46, 25, 46, 26, 46, 27, 36, 46, 36, 37, 
         40, 17, 46, 11, 38, 5, 37, 0, 18, 24, 36, 5, 19, 
         17, 24, 32, 33, 19, 37, 34, 36, Inf, 10, 7, Inf, 
         Inf, 16, Inf, 14, 44, 8, 11, Inf, 15, Inf, Inf, Inf, 
         37, Inf, 40, Inf, 34, 44, Inf, 48, Inf, Inf, 25, 
         Inf, 18, Inf, 12, Inf, 5, Inf, Inf, Inf, 11, 35, 
         25, Inf, Inf, Inf, 26, Inf, Inf, Inf)
dat <- data.frame(matrix(dat, 46, 2))
names(dat) <- c("L", "R")
dat

system.time(ans1 <- IntervalCensor(Imat = dat, 
                                   algorithm = "EM"))
## Number of EM Steps
ans1$iters
system.time(ans2 <- IntervalCensor(Imat = dat, 
                                   algorithm = "Squarem"))
## Number of EM Steps
ans2$iters

## The maximum difference in absolute value between EM and Squarem estimated parameters
max(abs(ans1$par-ans2$par))

system.time(ans3 <- EMICM.mod(dat, EMstep = TRUE, 
                              ICMstep = TRUE, 
                              keepiter = FALSE, tol = 1e-08, 
                              maxiter = 1000))

## The maximum difference in absolute value between EM, Squarem and EM-ICM estimated parameters
max(abs(ans2$par-ans3$pf))


## Apply EM, Squarem individually without the wrapper
## Function of one EM evaluation
intEM <- function(pvec, A) {
    tA <- t(A)
    Ap <- pvec*tA
    pnew <- colMeans(t(Ap)/colSums(Ap))
    pnew * (pnew > 0)
}

## Function of negative log-likelihood
loglik <- function(pvec, A) {
    - sum(log(c(A %*% pvec)))
}


A <- Aintmap(dat[, 1], dat[, 2])
m <- ncol(A)
## starting values
pvec <- rep(1/m, length = m)
## EM
system.time(ans1 <- fpiter(par = pvec, fixptfn = intEM, 
                           objfn = loglik, A = A, 
                           control = list(tol = 1e-8)))

## Squarem
system.time(ans2 <- squarem(par = pvec, fixptfn = intEM, 
                            objfn = loglik, A = A, 
                            control = list(tol = 1e-8)))

## simulation example
## data generation
gendata <- function(n, mu.nexam = 5) {   
    foo <- matrix(NA_real_, nrow = n, ncol = 3)
    for (i in 1:n) {
        st <- rweibull(1, shape = 1, scale = 5)
        nexam <- rpois(1, mu.nexam)
        exam <- round(runif(nexam, 0, 10), 1)  
        exam <- c(0, exam, Inf) 
        foo[i, ] <- c(time = st, L = max(exam[st > exam]), 
                      R = min(exam[st <= exam]))
    }
    return(foo)
}

## sample size 200
library("ggplot2")
set.seed(350)
nsim <- 100
t.sq <- t.emicm <- t.sqemicm <- t.em <- rep(NA_real_, nsim)
f.sq <- f.em <-  rep(NA_real_, nsim)
for (i in 1:nsim) {
    dat.simul <- gendata(200)[, 2:3]
    ## EM
    t.em[i] <- system.time(ans1 <- IntervalCensor(Imat = dat.simul, 
                                                  algorithm = "EM", 
                                                  control.setting = list()))[3]
    f.em[i] <- ans1$iters
        
    ## SQUAREM - interval censoring
    t.sq[i] <- system.time(ans2 <- IntervalCensor(Imat = dat.simul, 
                                                  algorithm = "Squarem", 
                                                  control.setting = list()))[3]
    f.sq[i] <- ans2$iters
    
    ## EMICM
    t.emicm[i] <- system.time(ans3 <- EMICM.mod(dat.simul, 
                                                EMstep = TRUE, 
                                                ICMstep = TRUE, 
                                                keepiter = FALSE, 
                                                tol = 1e-08, 
                                                maxiter = 1000))[3]
}

times <- c(t.em, t.sq, t.emicm)
method <- rep(c("EM", "SQUAREM", "EMICM"), each = nsim)

comp.algor <- data.frame(Algorithm = factor(method, 
                                            levels = c("EM", "SQUAREM", "EMICM")), 
                         time = times)
p <- ggplot(comp.algor, aes(x = Algorithm, y = time)) + 
    geom_boxplot(aes(fill = Algorithm)) + scale_y_log10() + 
    xlab("Algorithm") + ylab("CPU running time (log10 scale)") +
    theme_bw() + guides(colour = guide_legend(title = "Algorithm")) +
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 15), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
p

mean(f.em); sd(f.em)
mean(f.sq); sd(f.sq)

## sample size 2000
library("ggplot2")
set.seed(350)
nsim <- 100
t.sq <- t.emicm <- t.em <- rep(NA_real_, nsim)
f.sq <- f.em <-  rep(NA_real_, nsim)

for (i in 1:nsim) {
    dat.simul <- gendata(2000)[, 2:3]
    ## EM
    t.em[i] <- system.time(ans1 <- IntervalCensor(Imat = dat.simul, 
                                                  algorithm = "EM", 
                                                  control.setting = list()))[3]
    f.em[i] <- ans1$iters
    
    ## SQUAREM - interval censoring
    t.sq[i] <- system.time(ans2 <- IntervalCensor(Imat = dat.simul, 
                                                  algorithm = "Squarem", 
                                                  control.setting = list()))[3]
    f.sq[i] <- ans2$iters
    
    ## EMICM
    t.emicm[i] <- system.time(ans3 <- EMICM.mod(dat.simul, 
                                                EMstep = TRUE, 
                                                ICMstep = TRUE, 
                                                keepiter = FALSE, 
                                                tol = 1e-08, 
                                                maxiter = 1000))[3]
}

times <- c(t.em, t.sq, t.emicm)
method <- rep(c("EM", "SQUAREM", "EMICM"), each = nsim)

comp.algor <- data.frame(Algorithm = factor(method, 
                                            levels = c("EM", "SQUAREM", "EMICM")), 
                         time = times)
p <- ggplot(comp.algor, aes(x = Algorithm, y = time)) + 
    geom_boxplot(aes(fill = Algorithm)) + scale_y_log10() + 
    xlab("Algorithm") + ylab("CPU running time (log10 scale)") +
    theme_bw() + guides(colour = guide_legend(title = "Algorithm")) +
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 15), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
p

mean(f.em); sd(f.em)
mean(f.sq); sd(f.sq)

## 5.2 Genetics Global Ancestry Estimation Problem
## log likelihood function
loglike <- function(param, X, K) {
    n <- nrow(X); p <- ncol(X)
        
    F <- matrix(param[1: (p * K)], p, K)
    Q <- matrix(param[(p * K + 1): (p * K + n * K)], n, K)
    
    loglikelihood <- sum(X * log(Q %*% t(F)) + 
                         (2 - X) * log(Q %*% (1-t(F))))
    ##returns negative loglikelihood for minimization
    return(-loglikelihood)
}

admixture.em <- function(param, X, K) {
    eps <- 1e-6
    n <- nrow(X); p <- ncol(X)
    
    ## Initialize the counts.
    m <- matrix(eps, n, K)
    n0 <- n1 <- matrix(eps, p, K)
    
    ## recover the parameters F, Q
    F <- matrix(param[1: (p * K)], p, K)
    Q <- matrix(param[(p * K + 1): (p * K + n * K)], n, K)
        
    ## Initialize storage for the posterior probabilities of the hidden
    ## (phased) genotypes x population indicators for a single sample.
    r <- array(0, dim = c(p, 4, K, K))
        
    ## Update the expected allele counts and population counts. Repeat
    ## for each sample.
    for (i in 1:n) {
        colnames(r) <- c("00", "01", "10", "11")
        ## Repeat for each combination of ancestral populations.
        for (j in 1:K) {
            for (k in 1:K) {
                ## Compute the posterior probabilities 
                r[, "00", j, k] <- (X[i, ] == 0) * 
                    (1 - F[, j]) * (1 - F[, k])
                r[, "01", j, k] <- (X[i, ] == 1) * 
                    (1 - F[, j]) * F[, k]
                r[, "10", j, k] <- (X[i, ] == 1) * 
                    F[, j]       * (1 - F[, k])
                r[, "11", j, k] <- (X[i, ] == 2) * 
                    F[, j]       * F[, k]
                r[, , j, k]     <- r[, , j, k] * 
                    Q[i, j] * Q[i, k]
            }
        }
        ## Normalize the posterior probabilities.
        dim(r) <- c(p, 4*K^2)
        r      <- r/rowSums(r)
        ## Add the posterior probabilities to sufficient statistics.
        dim(r)      <- c(p, 4, K, K)
        colnames(r) <- c("00", "01", "10", "11")
        m[i, ]       <- m[i, ] + apply(r, 3, sum) + apply(r, 4, sum) 
        for (k in 1:K) {
            n0[, k] <- n0[, k] + rowSums(drop(r[, "00", k, ])) +
                rowSums(drop(r[, "01", k, ])) +
                rowSums(drop(r[, "00", , k])) +
                rowSums(drop(r[, "10", , k]))
            n1[, k] <- n1[, k] + rowSums(drop(r[, "10", k, ])) +
                rowSums(drop(r[, "11", k, ])) +
                rowSums(drop(r[, "01", , k])) +
                rowSums(drop(r[, "11", , k]))
        }
    }
    F <- n1 / (n0 + n1)
    Q <- m / rowSums(m)
    return(c(as.vector(F), as.vector(Q)))
}

load("geno.sim.RData")
set.seed(413)
## starting value
p <- 100
n <- 150
K <- 3
F <- matrix(runif(p*K), p, K)
Q <- matrix(1/K, n, K)
param.start <- c(as.vector(F), as.vector(Q))
## EM
system.time(f1 <- fpiter(par = param.start, 
                         fixptfn = admixture.em, 
                         objfn = loglike, 
                         control = list(tol = 1e-4), 
                         X = geno, K = 3))
f1$fpevals
## Squarem
system.time(f2 <- squarem(par = param.start, 
                          fixptfn = admixture.em, 
                          objfn = loglike, 
                          control = list(tol = 1e-4, 
                                         maxiter = 2000), 
                          X = geno, K = 3))
f2$fpevals

## 5.3 MM Problem
ld <- read.table("lee_data.txt")
head(ld, 5)

## log likelihood function
binom.loglike <- function(par, Z, y) {
    zb <- c(Z %*% par)
    pib <- 1 / (1 + exp(-zb))
    return(as.numeric(-t(y) %*% (Z %*% par) - sum(log(1 - pib))))
}

## quadratic majorization uniform bound
qmub.update <- function(par, Z, y) {
    Zmat <- solve(crossprod(Z)) %*% t(Z)
    zb <- c(Z %*% par)
    pib <- 1 / (1 + exp(-zb))
    ub <-  pib - y
    par <- par - 4 * c(Zmat %*% ub)
    par
}

## quadratic majorization non uniform bound
qmvb.update <- function(par, Z, y) {
    zb <- c(Z %*% par)
    pib <- 1 / (1 + exp(-zb))
    wmat <- diag((2 * pib - 1)/(2 * zb))
    ub <-  pib - y
    Zmat <- solve(t(Z) %*% wmat %*% Z) %*% t(Z)
    par <- par - c(Zmat %*% ub)
    par
}

Z <- as.matrix(ld[, 1:7])
y <- ld[, 8]
p0 <- rep(10, 7)
## uniform bound
system.time(ans1 <- fpiter(par = p0, fixptfn = qmub.update, 
                           objfn = binom.loglike, 
                           control = list(maxiter = 20000), 
                           Z = Z, y = y))
ans1
## squared uniform bound
system.time(ans2 <- squarem(par = p0, fixptfn = qmub.update, 
                            objfn = binom.loglike, 
                            Z = Z, y = y))
ans2

## non-uniform bound
system.time(ans3 <- fpiter(par = p0, fixptfn = qmvb.update, 
                           objfn = binom.loglike, 
                           control = list(maxiter = 20000), 
                           Z = Z, y = y))
ans3
## squared non-uniform bound
system.time(ans4 <- squarem(par = p0, fixptfn = qmvb.update, 
                            objfn = binom.loglike, 
                            Z = Z, y = y))
ans4

## simulation
set.seed(412)
sim <- 500
tqm1 <- tsqqm1 <- tqm2 <- tsqqm2 <- rep(NA_real_, sim)
fqm1 <- fsqqm1 <- fqm2 <- fsqqm2 <- rep(NA_real_, sim)
for(i in 1:sim) {
    p0 <- runif(7, 0, 10)
    ## QM-uniform bound
    tqm1[i] <- system.time(ans1 <- fpiter(par = p0, 
                                          fixptfn = qmub.update, 
                                          objfn = binom.loglike, 
                                          control = list(maxiter = 20000), 
                                          Z = Z, y = y))[3]
    tsqqm1[i] <- system.time(ans2 <- squarem(par = p0, 
                                             fixptfn = qmub.update, 
                                             objfn = binom.loglike, 
                                             Z = Z, y = y))[3]
    
    ## Squared QM-uniform bound
    fqm1[i] <- ans1$fpevals
    fsqqm1[i] <- ans2$fpevals
    
    ## QM-non-uniform bound
    tqm2[i] <- system.time(ans3 <- fpiter(par = p0, 
                                          fixptfn = qmvb.update, 
                                          objfn = binom.loglike, 
                                          control = list(maxiter = 20000), 
                                          Z = Z, y = y))[3]
    tsqqm2[i] <- system.time(ans4 <- squarem(par = p0, 
                                             fixptfn = qmvb.update, 
                                             objfn = binom.loglike, 
                                             Z = Z, y = y))[3]
    
    ## Squared QM-non-uniform bound
    fqm2[i] <- ans3$fpevals
    fsqqm2[i] <- ans4$fpevals
}

times <- c(tqm1, tsqqm1, tqm2, tsqqm2)
fevals <- c(fqm1, fsqqm1, fqm2, fsqqm2)
method <- rep(c("QM_Unif", "SQUARE QM_Unif", 
                "QM_Non", "SQUARE QM_Non"), each = sim)

mean(tqm1); sd(tqm1)
mean(tsqqm1); sd(tsqqm1)
mean(tqm2); sd(tqm2)
mean(tsqqm2); sd(tsqqm2)

mean(fqm1); sd(fqm1)
mean(fsqqm1); sd(fsqqm1)
mean(fqm2); sd(fqm2)
mean(fsqqm2); sd(fsqqm2)

comp.algorithm <- data.frame(Algorithm = factor(method, 
                                                levels = c("QM_Unif", "SQUARE QM_Unif", 
                                                           "QM_Non", "SQUARE QM_Non")), 
                             time = times, fevals = fevals)
p <- ggplot(comp.algorithm, aes(x = Algorithm, y = time)) + 
    geom_boxplot(aes(fill = Algorithm)) + scale_y_log10() + 
    xlab("Algorithm") + 
    ylab("CPU running time(log10 scale)") + 
    theme_bw() + guides(colour = guide_legend(fill = "Algorithm")) + 
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 15), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
p

q <- ggplot(comp.algorithm, aes(x = Algorithm, y = fevals)) + 
    geom_boxplot(aes(fill = Algorithm)) + scale_y_log10() + 
    xlab("Algorithm") + 
    ylab("The number of QM updates(log10 scale)") + 
    theme_bw() + guides(fill = guide_legend(title = "Algorithm")) + 
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 15), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
q

## Appendix Factor Analysis
## Real data example
## log-likelihood
factor.loglik <- function(param, cyy) {
    ## extract beta matrix and tau2 from param
    beta.vec <- param[1:36]
    beta.mat <- matrix(beta.vec, 4, 9)
    tau2 <- param[37:45]
    tau2.mat <- diag(tau2)
    
    Sig <- tau2.mat + t(beta.mat) %*% beta.mat
    ## suppose n=145 since this does not impact the parameter estimation
    loglik <- -145/2 * log(det(Sig)) - 145/2 * sum(diag(solve(Sig, cyy)))
    ## the negative log-likelihood is returned
    return(-loglik)
}

## one EM update
factor.em <- function(param, cyy) {
    ## extract beta matrix and tau2 from param
    beta.vec <- param[1:36]
    beta.mat <- matrix(beta.vec, 4, 9)
    tau2 <- param[37:45]
    tau2.mat <- diag(tau2)
    
    ## compute delta/Delta
    inv.quantity <- solve(tau2.mat + t(beta.mat) %*% beta.mat)
    small.delta <- inv.quantity %*% t(beta.mat)
    big.delta <- diag(4) - beta.mat %*% inv.quantity %*% t(beta.mat)
    
    cyy.inverse <- t(small.delta) %*% cyy %*% small.delta + big.delta
    cyy.mat <- t(small.delta) %*% cyy
    
    ## update betas and taus
    beta.new <- matrix(0, 4, 9)
    beta.p1 <- solve(cyy.inverse[1:3, 1:3]) %*% cyy.mat[1:3, 1:4]
    beta.p2 <- solve(cyy.inverse[c(1, 2, 4), c(1, 2, 4)]) %*% 
        cyy.mat[c(1, 2, 4), 5:9]
    beta.new[1:3, 1:4] <- beta.p1
    beta.new[c(1, 2, 4), 5:9] <- beta.p2
    
    tau.p1 <- diag(cyy)[1:4] - diag(t(cyy.mat[1:3, 1:4]) %*% 
                                    solve(cyy.inverse[1:3, 1:3]) %*% cyy.mat[1:3, 1:4])
    tau.p2 <- diag(cyy)[5:9] - diag(t(cyy.mat[c(1, 2, 4), 5:9]) %*% 
                                    solve(cyy.inverse[c(1, 2, 4), c(1, 2, 4)]) %*% 
                                    cyy.mat[c(1, 2, 4), 5:9])
    tau.new <- c(tau.p1, tau.p2)
    
    param.new <- c(as.numeric(beta.new), tau.new)
    return(param.new)
}

## one ECME update
factor.ecme <- function(param, cyy) {
    n <- 145
    ## extract beta matrix and tau2 from param
    beta.vec <- param[1:36]
    beta.mat <- matrix(beta.vec, 4, 9)
    tau2 <- param[37:45]
    tau2.mat <- diag(tau2)
        
    ## compute delta/Delta
    inv.quantity <- solve(tau2.mat + t(beta.mat) %*% beta.mat)
    small.delta <- inv.quantity %*% t(beta.mat)
    big.delta <- diag(4) - beta.mat %*% inv.quantity %*% t(beta.mat)
    
    cyy.inverse <- t(small.delta) %*% cyy %*% small.delta + big.delta
    cyy.mat <- t(small.delta) %*% cyy
    
    ## update betas
    beta.new <- matrix(0, 4, 9)
    beta.p1 <- solve(cyy.inverse[1:3, 1:3]) %*% cyy.mat[1:3, 1:4]
    beta.p2 <- solve(cyy.inverse[c(1, 2, 4), c(1, 2, 4)]) %*% 
        cyy.mat[c(1, 2, 4), 5:9]
    beta.new[1:3, 1:4] <- beta.p1
    beta.new[c(1, 2, 4), 5:9] <- beta.p2
    
    ## update taus given betas
    A <- solve(tau2.mat + t(beta.new) %*% beta.new)
    sum.B <- A %*% (n * cyy) %*% A
    gradient <- - tau2/2 * (diag(n*A) - diag(sum.B))
    hessian <- (0.5 * (tau2 %*% t(tau2))) * (A * (n * A - 2 * sum.B))
    diag(hessian) <- diag(hessian) + gradient
    U <- log(tau2)
    U <- U - solve(hessian, gradient)  # Newton step
    
    tau.new <- exp(U)
    param.new <- c(as.numeric(beta.new), tau.new)
    return(param.new)
}

## Cyy
cyy <- diag(9)
cyy[upper.tri(cyy)] <- c(.554, .227, .296, .189, .219, .769, 
                         .461, .479, .237, .212, .506, .530, 
                         .243, .226, .520, .408, .425, .304, 
                         .291, .514, .473, .280, .311, .718, 
                         .681, .313, .348, .374, .241, .311, 
                         .730, .661, .245, .290, .306, .672)
cyy[lower.tri(cyy)] <- t(cyy)[lower.tri(t(cyy))]

## starting value
beta.trans <- matrix(c(0.5954912, 0.6449102, 0.7630006, 0.7163828, 0.6175647, 0.6464100, 0.6452737, 0.7868222, 0.7482302, 
                       -0.4893347, -0.4408213, 0.5053083, 0.5258722, -0.4714808, -0.4628659, -0.3260013, 0.3690580, 0.4326963, 
                       -0.3848925, -0.3555598, -0.0535340, 0.0219100, 0, 0, 0, 0, 0, 
                       0, 0, 0, 0, 0.1931459, 0.4606456, -0.3622682, 0.0630371, 0.0431256), 9, 4)
beta.start <- t(beta.trans)
tau2.start <- rep(10^(-8), 9)

param.start <- c(as.numeric(beta.start), tau2.start)

## EM
library("SQUAREM")
system.time(f1 <- fpiter(par = param.start, cyy = cyy, fixptfn = factor.em, 
                         objfn = factor.loglik, 
                         control = list(tol = 10^(-8), 
                                        maxiter = 20000)))
f1$fpevals

## ECME
system.time(f2 <- fpiter(par = param.start, cyy = cyy, 
                         fixptfn = factor.ecme, objfn = factor.loglik, 
                         control = list(tol = 10^(-8), maxiter = 20000)))
f2$fpevals

## Squared EM
system.time(f3 <- squarem(par = param.start, cyy = cyy, 
                          fixptfn = factor.em, 
                          objfn = factor.loglik, 
                          control = list(tol = 10^(-8))))
f3$fpevals

## Squarem ECME
system.time(f4 <- squarem(par = param.start, cyy = cyy, 
                          fixptfn = factor.ecme, 
                          objfn = factor.loglik, 
                          control = list(tol = 10^(-8))))
f4$fpevals

## compare elapsed time and iterations
tem <- tsqem <- tecme <- tsqecme <- rep(NA_real_, 100)

for (i in 1:100) {
    tem[i] <- system.time(f1 <- fpiter(par = param.start, cyy = cyy, fixptfn = factor.em, objfn = factor.loglik, control = list(tol = 10^(-8), maxiter = 20000)))[3]
    tsqem[i] <- system.time(f2 <- squarem(par = param.start, cyy = cyy, fixptfn = factor.em, objfn = factor.loglik, control = list(tol = 10^(-8))))[3]
    tecme[i] <- system.time(f3 <- fpiter(par = param.start, cyy = cyy, fixptfn = factor.ecme, objfn = factor.loglik, control = list(tol = 10^(-8), maxiter = 10000)))[3]
    tsqecme[i] <- system.time(f4 <- squarem(par = param.start, cyy = cyy, fixptfn = factor.ecme, objfn = factor.loglik, control = list(tol = 10^(-8))))[3]
}

mean(tem); sd(tem); f1$fpevals
mean(tsqem); sd(tsqem); f2$fpevals
mean(tecme); sd(tecme); f3$fpevals
mean(tsqecme); sd(tsqecme); f4$fpevals

## Simulation Example
library("wakefield")
library("BiocParallel")
library("ggplot2")
simulate.FAEM <- function(seed) {
    set.seed(seed)
    ## generate 200*32 score dataframe
    generate.score <- function(a) {
        set.seed(a + seed)
        integer <- ceiling(a/8)
        if (integer == 1) {
            return(grade(200, mean = 60, sd = 5, digits = 0))
        } else if(integer == 2) {
            return(grade(200, mean = 70, sd = 7, digits = 0))
        } else if(integer == 3) {
            return(grade(200, mean = 80, sd = 6, digits = 0))
        } else {
            return(grade(200, mean = 90, sd = 8, digits = 0))
        }
    }
    Y.List <- bplapply(1:32, generate.score)
    Y.Matrix <- do.call(rbind, Y.List)
    Y.dat <- data.frame(t(Y.Matrix))
    row.names(Y.dat) <- NULL
    names(Y.dat) <- paste0("Subject", 1:32)
    
    ## Compute the factor estimates
    ## Center the data Y.dat
    Y.dat <- scale(Y.dat, center = TRUE, scale = TRUE)
    cyy <- cov(Y.dat)
        
    ## EM Iteration
    factor.em <- function(param, cyy) {
        ## extract beta matrix and tau2 from param
        beta.vec <- param[1:128]
        beta.mat <- matrix(beta.vec, 4, 32)
        tau2 <- param[129:160]
        tau2.mat <- diag(tau2)
        
        ## compute delta/Delta
        inv.quantity <- solve(tau2.mat + t(beta.mat) %*% beta.mat)
        small.delta <- inv.quantity %*% t(beta.mat)
        big.delta <- diag(4) - beta.mat %*% inv.quantity %*% 
            t(beta.mat)
        cyy.inverse <- t(small.delta) %*% cyy %*% small.delta + 
            big.delta
        cyy.mat <- t(small.delta) %*% cyy
        
        ## update betas and taus
        beta.new <- solve(cyy.inverse, cyy.mat)
        tau.new <- diag(cyy - t(cyy.mat) %*% 
                        solve(cyy.inverse, cyy.mat))
                
        param.new <- c(as.numeric(beta.new), tau.new)
        return(param.new)
    }
    
    ## log-likelihood function
    factor.loglik <- function(param, cyy) {
        ## extract beta matrix and tau2 from param
        beta.vec <- param[1:128]
        beta.mat <- matrix(beta.vec, 4, 32)
        tau2 <- param[129:160]
        tau2.mat <- diag(tau2)
        
        Sig <- tau2.mat + t(beta.mat) %*% beta.mat
        ## n=200 
        loglik <- -200/2 * log(det(Sig)) - 200/2 * 
            sum(diag(solve(Sig, cyy)))
        return(-loglik)
    }
        
    ## starting value
    set.seed(seed + 1000)
    beta.start <- matrix(runif(4*32, -1, 1), 4, 32)
    tau2.start <- rep(10^(-8), 32)
    
    param.start <- c(as.numeric(beta.start), tau2.start)
        
    ## EM 
    t1 <- system.time(f1 <- fpiter(par = param.start, cyy = cyy, 
                                   fixptfn = factor.em, 
                                   objfn = factor.loglik, 
                                   control = list(tol = 10^(-8), 
                                                  maxiter = 200000)))[3]
        
    ## Squared EM
    t2 <- system.time(f2 <- squarem(par = param.start, cyy = cyy, 
                                    fixptfn = factor.em, 
                                    objfn = factor.loglik, 
                                    control = list(tol = 10^(-8), 
                                                   maxiter = 20000)))[3]
        
    return(c(f1$fpevals, f2$fpevals, f1$convergence, 
             f2$convergence, t1, t2))
}

result.matrix <- matrix(0, 50, 6)
for (i in 1:50) {
    result.matrix[i, ] <- simulate.FAEM(1000 + i)
}

result.dat <- data.frame(number = rep(1:50, 2), feval = c(result.matrix[, 1], result.matrix[, 2]), time = c(result.matrix[, 5], result.matrix[, 6]), 
                         group = rep(c("EM", "SQUAREM"), each = 50), convergence = c(result.matrix[, 3], result.matrix[, 4]))
result.dat <- within(result.dat, convergence <- ifelse(convergence == 1, "converge", "notconverge"))

## Distribution Function of Runtime
r <- ggplot(result.dat, aes(x = log(time, base = 10), colour = factor(group))) +
    stat_ecdf() + xlab("CPU running time") + ylab("Cumulative probability") + 
    guides(colour = guide_legend(title = "Algorithm")) + theme_bw() +
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 19), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
r

## Distribution Function of Evaluations
e <- ggplot(result.dat, aes(x = log(feval, base = 10), colour = factor(group))) +
    stat_ecdf() +
    xlab("The number of EM evaluations") +
    ylab("Cumulative probability") + 
    guides(colour = guide_legend(title = "Algorithm")) + theme_bw() +
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 19), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
e

## Scatterplot of running time and iterations
sr <- ggplot(data.frame(result.matrix), aes(x = X6, y = X5)) +
    geom_point(colour = "red") +
    geom_abline(slope = 10, intercept = 0, colour = "blue") +
    xlab("CPU running time for Squarem") + 
    ylab("CPU running time for EM") + 
    theme_bw() + annotate("text", label = "y = 10x", x = 5, y = 40, size = 8) +
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 19), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
sr

se <- ggplot(data.frame(result.matrix), aes(x = X2, y = X1)) +
    geom_point(colour = "red") +
    geom_abline(slope = 10, intercept = 0, colour = "blue") + 
    xlab("The fevals for Squarem") + 
    ylab("The fevals for EM") + 
    theme_bw() + annotate("text", label = "y = 10x", x = 8000, y = 50000, size = 8) +
    theme(plot.title = element_text(size = 20), 
          axis.title = element_text(size = 19), 
          legend.title = element_text(size = 15), 
          legend.text = element_text(size = 12))
se