library("spldv") library("spdep") library("ProbitSpatial") library("spatialprobit") library("microbenchmark") library("dplyr") library("INLA") library("car") library("spdep") spprobit <- function(form, inst = NULL, winst = NULL, wmat = NULL, shpfile = NULL, blockid = NULL, minblock = NULL, maxblock = NULL, data = NULL, silent = FALSE, minp = NULL) { if (!identical(winst, NULL)){wdata <- model.matrix(winst, data = data)} wvar = FALSE if (identical(wmat, NULL)&identical(shpfile, NULL)){wvar = TRUE} if (identical(data, NULL)) { data <- model.frame(form) xnames <- names(data) if (!identical(inst, NULL)){ data1 <- model.frame(inst) xnames1 <- names(data1) newnames <- setdiff(xnames1, xnames) if (length(newnames)>0) { data <- cbind(data, data1[, newnames]) names(data) <- c(xnames, newnames) xnames <- names(data) } } if (!identical(winst, NULL)){ data1 <- model.frame(winst) xnames1 <- names(data1) newnames <- setdiff(xnames1, xnames) if (length(newnames)>0) { data <- cbind(data, data1[, newnames]) names(data) <- c(xnames, newnames) xnames <- names(data) } } } n <- nrow(data) xmat <- model.frame(form, data = data) nk <- ncol(xmat)+1 if (identical(minblock, NULL)) {minblock = nk} if (identical(maxblock, NULL)) {maxblock = n+1} y <- xmat[, 1] if (identical(blockid, NULL)) {blockid <- array(1, dim = n)} block <- factor(blockid) lblock <- levels(block) nblock <- length(lblock) if (nblock > 1){ cat("Block diagonal W matrix will be created from shpfile; wmat will be ignored if specified", "\n")} needw <- (!identical(shpfile, NULL) & identical(wmat, NULL)) | nblock > 1 tblock <- table(block) nbad <- sum(tblockmaxblock) if (nbad > 0) { badblock <- lblock[tblockmaxblock] lblock <- lblock[!lblock %in% badblock] cat("Some blocks have fewer observations than minblock", "\n") cat("The following blocks are removed from the data set prior to estimation:", "\n") print(tblock[rownames(tblock) %in% badblock]) sampvar <- block %in% lblock data <- data[sampvar, ] block <- block[sampvar] shpfile <- shpfile[sampvar, ] } n <- nrow(data) nblock <- length(lblock) probit <- glm(form, family = binomial(link = "probit"), data = data) if (silent == FALSE) { print(summary(probit)) cat("STANDARD PROBIT ESTIMATES", "\n") } xb <- probit$linear.predictors if (!identical(minp, NULL)) { xb <- ifelse(xb < qnorm(minp), qnorm(minp), xb) xb <- ifelse(xb > 1 - qnorm(minp), 1 - qnorm(minp), xb) } p <- pnorm(xb) u <- (y-p) * dnorm(xb)/(p * (1 - p)) gmat <- model.matrix(form, data = data) xnames <- colnames(gmat) gmat <- cbind(gmat, 1) nk <- ncol(gmat) for (i in lblock) { xmat <- model.matrix(form, data = data[block == i, ]) if (needw == TRUE & wvar == FALSE) { neighbors <- poly2nb(shpfile[block == i, ], queen = TRUE) wmat <- nb2mat(neighbors, zero.policy = TRUE) } if (identical(inst, NULL)&identical(winst, NULL)) { zmat <- cbind(xmat, wmat %*% xmat[, -1])} if (identical(inst, NULL)&!identical(winst, NULL)) { zmat <- cbind(xmat, wmat %*% (model.matrix(winst, data = data[block == i, ])[, -1])) } if (!identical(inst, NULL)&identical(winst, NULL)) { zmat <- model.matrix(inst, data = data[block == i, ])} if (!identical(inst, NULL)&!identical(winst, NULL)&wvar == FALSE) { zmat <- cbind(model.matrix(inst, data = data[block == i, ]), wmat %*% (model.matrix(winst, data = data[block == i, ])[, -1])) } if (!identical(inst, NULL)&!identical(winst, NULL)&wvar == TRUE) { zmat <- cbind(model.matrix(inst, data = data[block == i, ]), model.matrix(winst, data = data[block == i, ])[, -1]) } grad <- as.vector(u[block == i] * (u[block == i]+xb[block == i])) gmat[block == i, -nk] <- grad * xmat u[block == i] <- u[block == i] + gmat[block == i, -nk] %*% probit$coef if (wvar == FALSE){wxb <- grad * (wmat %*% xb[block == i])} if (wvar == TRUE){wxb <- grad * (wdata[block == i, ] %*% probit$coef)} gmat[block == i, nk] <- wxb gmat[block == i, ] <- zmat %*% (solve(crossprod(zmat)) %*% (t(zmat) %*% gmat[block == i, ])) } fit <- lm(u ~ gmat + 0) v <- diag(hccm(fit)) summat <- cbind(fit$coef, sqrt(v), fit$coef/sqrt(v), 2 * (1 - pnorm(abs(fit$coef)/sqrt(v) )) ) rownames(summat) <- c(xnames, "WXB") colnames(summat) <- c("Estimate", "Std. Error", "z-value", "Pr(>|z|)") if (silent == FALSE) { cat("LINEARIZED GMM PROBIT ESTIMATES", "\n") print(round(summat, 5)) cat("Number of observations = ", n, "\n") } names(fit$coef) <- c(xnames, "WXB") v <- sqrt(v) names(v) <- names(fit$coef) out <- list(fit$coef, v, u, gmat) names(out) <- c("coef", "se", "u", "gmat") return(out) } gmmprobit <- function(form, inst = NULL, winst = NULL, wmat = NULL, shpfile, startb = NULL, startrho = 0, blockid = 0, cvcrit = .0001, data = NULL, silent = FALSE) { library("car") if (identical(wmat, NULL)) { library(spdep) neighbors <- poly2nb(shpfile, queen = TRUE) wmat <- nb2mat(neighbors, zero.policy = TRUE) } if (identical(data, NULL)) { data <- model.frame(form) xnames <- names(data) if (!identical(inst, NULL)){ data1 <- model.frame(inst) xnames1 <- names(data1) newnames <- setdiff(xnames1, xnames) if (length(newnames)>0) { data <- cbind(data, data1[, newnames]) names(data) <- c(xnames, newnames) xnames <- names(data) } } if (!identical(winst, NULL)){ data1 <- model.frame(winst) xnames1 <- names(data1) newnames <- setdiff(xnames1, xnames) if (length(newnames)>0) { data <- cbind(data, data1[, newnames]) names(data) <- c(xnames, newnames) xnames <- names(data) } } } xmat <- model.frame(form, data = data) y <- xmat[, 1] xmat <- model.matrix(form, data = data) if (identical(inst, NULL)&identical(winst, NULL)) { zmat <- cbind(xmat, wmat %*% xmat[, -1])} if (identical(inst, NULL)&!identical(winst, NULL)) { zmat <- cbind(xmat, wmat %*% (model.matrix(winst, data = data)[, -1])) } if (!identical(inst, NULL)&identical(winst, NULL)) { zmat <- model.matrix(inst, data = data)} if (!identical(inst, NULL)&!identical(winst, NULL)) { zmat <- cbind(model.matrix(inst, data = data), wmat %*% (model.matrix(winst, data = data)[, -1])) } if (!identical(inst, NULL)&identical(winst, NULL)) { cat("Warning: list provided for inst but not winst", "\n") cat("inst list should include variables that are omitted from orginal explanatory variable list", "\n") cat("\n") } n <- nrow(xmat) nk <- ncol(xmat) nk1 <- nk+1 block <- factor(blockid) for (i in levels(block)) { wmat[blockid == i, blockid != i] <- 0 wsum <- rowSums(wmat[blockid == i, blockid == i]) wsum[wsum == 0] <- 1 wmat[blockid == i, blockid != i] <- wmat[blockid == i, blockid != i]/wsum } if (length(startb) == 0) { if (silent == FALSE) {cat("STANDARD PROBIT ESTIMATES", "\n")} probit <- glm(form, family = binomial(link = "probit"), data = data) if (silent == FALSE) {print(summary(probit))} startb <- probit$coef } b0 <- c(startb, startrho) subprobit <- function(b) { bmat <- b[1:nk] rho <- b[nk1] xb <- xmat %*% bmat xstarb <- array(0, dim = n) u <- array(0, dim = n) ws <- array(0, dim = n) gmat <- cbind(xmat, 1) block <- factor(blockid) for (i in levels(block)) { w <- wmat[block == i, block == i] iwmat <- solve(diag(nrow(w)) - rho * w) vmat <- tcrossprod(iwmat) svar <- sqrt(diag(vmat)) ws[block == i] <- diag(iwmat)/svar gmat[block == i, 1:nk] <- iwmat %*% xmat[block == i, ]/svar xstarb[block == i] <- gmat[block == i, 1:nk] %*% bmat cphi <- pnorm(xstarb[block == i]) dphi <- dnorm(xstarb[block == i]) u[block == i] <- (y[block == i] - cphi) * dphi/(cphi * (1 - cphi)) grho1 <- iwmat %*% (w %*% xstarb[block == i]) grho2 <- diag(vmat %*% (w + t(w) - 2 * rho * crossprod(w, y = NULL)) %*% vmat) * xstarb[block == i]/(2 * svar * svar) gmat[block == i, nk+1] <- grho1-as.vector(grho2) } du <- u * (xstarb + u) for (j in seq(1:nk1)) { gmat[, j] <- du * gmat[, j] gmat[, j] <- fitted(lm(gmat[, j]~zmat)) } fit <- lm(u~gmat+0) chmat <- fit$coef out <- list(chmat, gmat, ws, u, xstarb) names(out) <- c("chmat", "gmat", "ws", "u", "xstarb") return(out) } chk = cvcrit+1 iter = 0 chmat <- array(0, dim = nk1) while (chk > cvcrit | iter <= 1) { iter <- iter+1 b0 <- b0 + chmat gmm <- subprobit(b0) chmat <- gmm$chmat chk = max(abs(chmat)) if (silent == FALSE) {print(c(iter, chk))} } ws <- gmm$ws u <- gmm$u xstarb <- gmm$xstarb b <- b0 gg <- solve(crossprod(gmm$gmat)) for (j in seq(1:nk1)) { gmm$gmat[, j] <- abs(gmm$u) * gmm$gmat[, j] } vmat <- diag(gg %*% crossprod(gmm$gmat, y = NULL) %*% gg) outmat <- cbind(b, sqrt(vmat), b/sqrt(vmat), 2 * (1-pnorm(abs(b)/sqrt(vmat) )) ) v <- rownames(outmat) v[nrow(outmat)] = "WXB" rownames(outmat) <- v colnames(outmat) <- c("Estimate", "Std. Error", "z-value", "Pr(>|z|)") if (silent == FALSE) { cat("SPATIAL GMM PROBIT ESTIMATES", "\n") print(outmat) } out <- list(b, sqrt(vmat)) names(out) <- c("coef", "se") return(out) } spprobitml <- function(form, wmat = NULL, shpfile = NULL, blockid = NULL, minblock = NULL, maxblock = NULL, stdprobit = TRUE, data = NULL) { library("spdep") if (identical(data, NULL)) { data <- model.frame(form) } n <- nrow(data) xmat <- model.frame(form, data = data) nk <- ncol(xmat)+1 if (identical(minblock, NULL)) {minblock = nk} if (identical(maxblock, NULL)) {maxblock = n+1} y <- xmat[, 1] if (identical(blockid, NULL)) {blockid <- array(1, dim = n)} block <- factor(blockid) lblock <- levels(block) nblock = length(lblock) if (nblock > 1){ cat("Block diagonal W matrix will be created from shpfile; wmat will be ignored if specified", "\n")} needw <- (!identical(shpfile, NULL)&identical(wmat, NULL)) | nblock > 1 if (needw == TRUE & identical(shpfile, NULL)){ cat("ERROR: shpfile required for estimation", "\n")} tblock <- table(block) nbad = sum(tblockmaxblock) if (nbad > 0) { badblock <- lblock[tblockmaxblock] lblock <- lblock[!lblock %in% badblock] cat("Some blocks have fewer observations than minblock", "\n") cat("The following blocks are removed from the data set prior to estimation:", "\n") print(tblock[rownames(tblock) %in% badblock]) sampvar <- block %in% lblock data <- data[sampvar, ] block <- block[sampvar] shpfile <- shpfile[sampvar, ] } n = nrow(data) nblock = length(lblock) if (stdprobit == TRUE) { fit <- glm(form, family = binomial(link = "probit"), data = data) cat("Standard Probit Estimates", "\n") print(summary(fit)) } makevar <- function(rho) { v <- array(1, dim = n) xstar <- model.matrix(form, data = data) for (i in lblock) { xmat <- model.matrix(form, data = data[block == i, ]) if (needw == TRUE) { neighbors <- poly2nb(shpfile[block == i, ], queen = TRUE) wmat <- nb2mat(neighbors, zero.policy = TRUE) } vmat <- solve(diag(nrow(xmat)) - rho * wmat) xstar[block == i, ] <- vmat %*% xmat vmat <- tcrossprod(vmat) v[block == i] <- sqrt(diag(vmat)) } out <- list(xstar, v) names(out) <- c("xstar", "v") return(out) } probitrho <- function(rho) { fit <- makevar(rho) xstar <- as.matrix(as.data.frame(fit$xstar)/fit$v) fit <- glm(y~xstar+0, family = binomial(link = "probit")) xb <- fit$linear.predictors lvar <- sum(ifelse(y == 1, log(pnorm(xb)), log(1-pnorm(xb)))) out <- list(fit$coef, lvar) names(out) <- c("coef", "logl") return(out) } logl <- function(rho) {-probitrho(rho)$logl} rho <- optimize(logl, lower = -.99, upper = .99)$minimum fit <- probitrho(rho) bvect <- c(fit$coef, rho) names(bvect) <- c(colnames(xmat), "rho") nk <- length(bvect)-1 rho <- bvect[nk+1] b <- bvect[1:nk] fit <- makevar(rho) xb <- as.numeric(fit$xstar %*% b)/fit$v p <- pnorm(xb) g <- (dnorm(xb)^2)/(p * (1-p)) gmat <- as.matrix((sqrt(g)/fit$v) * data.frame(fit$xstar)) vmat1 <- solve(crossprod(gmat)) # Use numeric derivatives to calculate unconditional standard errors logl <- function(rho) { fit <- makevar(rho) sv <- fit$v xb <- as.numeric(as.matrix(fit$xstar) %*% b)/sv lvar <- ifelse(y == 1, log(pnorm(xb)), log(pnorm(-xb))) return(lvar) } lvar <- sum(logl(rho)) g <- dnorm(xb) * (y-p)/(p * (1-p)) gmat <- as.matrix(data.frame(fit$xstar) * (g/fit$v)) g1 <- logl(rho+.001) g0 <- logl(rho-.001) g <- (g1-g0)/.002 vmat2 <- solve(crossprod(cbind(gmat, g))) semat1 <- sqrt(diag(vmat1)) semat2 <- sqrt(diag(vmat2)) cat("Conditional on rho", "\n") cat("rho = ", rho, "\n") outmat <- cbind(bvect[1:nk], semat1, bvect[1:nk]/semat1, 2 * (1 - pnorm(abs(bvect[1:nk])/semat1)) ) colnames(outmat) <- c("Estimate", "Std. Error", "z-value", "Pr(>|z|)") rownames(outmat) <- colnames(xmat) print(outmat) cat("Unconditional Standard Errors", "\n") outmat <- cbind(bvect, semat2, bvect/semat2, 2 * (1-pnorm(abs(bvect)/semat2)) ) colnames(outmat) <- c("Estimate", "Std. Error", "z-value", "Pr(>|z|)") rownames(outmat) <- c(colnames(xmat), "rho") print(outmat) cat("Number of observations = ", n, "\n") out <- list(bvect, lvar, vmat1, vmat2) names(out) <- c("coef", "logl", "vmat1", "vmat2") return(out) } compare_time <- function(n, nneigh, beta = c(0.5, 1, -1), rho = 0.6, times = 1, seed = 1, n1 = 1500, n2 = 2500, n3 = 7500){ W <- generate_W(n, nneigh, seed = seed) lW <- mat2listw(W) X <- cbind(rep(1, n), rnorm(n), rnorm(n)) y <- sim_binomial_probit(W, X, beta, rho, model = "SAR", M = NULL, lambda = NULL, sigma2 = 1, ord_iW = 6, seed = seed) mydata <- data.frame(cbind(y, X[, -1])) colnames(mydata) <- c("y", "x", "z") slm <- y ~ x + z if(n < n1){ results <- microbenchmark( LGMM_sbinaryLGMM_os = sbinaryLGMM(slm, link = "probit", listw = W, data = mydata, nins = 2) , LGMM_spprobit = spprobit(slm, data = mydata, wmat = as.matrix(W), silent = TRUE), OS_GMM_sbinaryGMM = { probit <- glm(slm, family = binomial("probit"), data = mydata); sbinaryGMM(slm, link = "probit", listw = W, data = mydata, type = "onestep", winitial = "optimal", s.matrix = "iid", nins = 1, start = c(coef(probit), 0), method = "nr");}, OS_GMM_gmmprobit = { probit <- glm(slm, family = binomial("probit"), data = mydata); mcSgmm <- gmmprobit(slm, data = mydata, wmat = as.matrix(W), startb = coef(probit), startrho = 0, cvcrit = .001);}, OS_GMM_sbinaryGMM_approx = { probit <- glm(slm, family = binomial("probit"), data = mydata); sbinaryGMM(slm, link = "probit", listw = W, data = mydata, type = "onestep", winitial = "optimal", nins = 1, approximation = TRUE, pw = 6, reltol = 0.0001, start = c(coef(probit), 0));}, Bayes_sarprobit = sarprobit(slm, W = W, data = mydata, showProgress = FALSE), # ML_ProbitSpatialFit = ProbitSpatialFit(slm, W = W, data = mydata, DGP = "SAR", # method = "full-lik", varcov = "varcov"), condML_ProbitSpatialFit = ProbitSpatialFit(slm, W = W, data = mydata, DGP = "SAR", method = "conditional", varcov = "varcov"), inlaR = { n <- nrow(mydata); mydata$idx <- 1:n; rho.max <- 1; rho.min <- -1; mm <- model.matrix(slm, mydata); betaprec1 <- 0.001; Q.beta1 <- Diagonal(n = ncol(mm), x = 1); Q.beta1 <- betaprec1 * Q.beta1; hyper.slm <- list(prec = list(initial = log(1), fixed = TRUE), theta2 = list(prior = "logitbeta", param = c(1, 1))) inla(y ~ -1 + f(idx, model = "slm", args.slm = list(rho.min = rho.min, rho.max = rho.max, W = W, X = mm, Q.beta = Q.beta1), hyper = hyper.slm), data = mydata, family = "binomial", control.family = list(link = "probit"), control.compute = list(dic = TRUE, cpo = TRUE, config = TRUE)); }, times = times)} else if(n < n2) { results <- microbenchmark( LGMM_sbinaryLGMM_os = sbinaryLGMM(slm, link = "probit", listw = W, data = mydata, nins = 2) , LGMM_spprobit = spprobit(slm, data = mydata, wmat = as.matrix(W), silent = TRUE), OS_GMM_sbinaryGMM = {NA}, OS_GMM_gmmprobit = { probit <- glm(slm, family = binomial("probit"), data = mydata) mcSgmm <- gmmprobit(slm, data = mydata, wmat = as.matrix(W), startb = coef(probit), startrho = 0, cvcrit = .001);}, OS_GMM_sbinaryGMM_approx = { probit <- glm(slm, family = binomial("probit"), data = mydata) sbinaryGMM(slm, link = "probit", listw = W, data = mydata, type = "onestep", winitial = "optimal", nins = 1, approximation = TRUE, pw = 6, reltol = 0.0001, start = c(coef(probit), 0));}, Bayes_sarprobit = sarprobit(slm, W = W, data = mydata, showProgress = FALSE), # ML_ProbitSpatialFit = ProbitSpatialFit(slm, W = W, data = mydata, DGP = "SAR", # method = "full-lik", varcov = "varcov"), condML_ProbitSpatialFit = ProbitSpatialFit(slm, W = W, data = mydata, DGP = "SAR", method = "conditional", varcov = "varcov"), inlaR = { n <- nrow(mydata); mydata$idx <- 1:n; rho.max <- 1; rho.min <- -1; mm <- model.matrix(slm, mydata); betaprec1 <- 0.001; Q.beta1 <- Diagonal(n = ncol(mm), x = 1); Q.beta1 <- betaprec1 * Q.beta1; hyper.slm <- list(prec = list(initial = log(1), fixed = TRUE), theta2 = list(prior = "logitbeta", param = c(1, 1))); inla(y ~ -1 + f(idx, model = "slm", args.slm = list(rho.min = rho.min, rho.max = rho.max, W = W, X = mm, Q.beta = Q.beta1), hyper = hyper.slm), data = mydata, family = "binomial", control.family = list(link = "probit"), control.compute = list(dic = TRUE, cpo = TRUE, config = TRUE)); }, times = times) } else if (n < n3) { results <- microbenchmark( LGMM_sbinaryLGMM_os = sbinaryLGMM(slm, link = "probit", listw = W, data = mydata, nins = 2) , LGMM_spprobit = spprobit(slm, data = mydata, wmat = as.matrix(W), silent = TRUE), OS_GMM_sbinaryGMM = {NA}, OS_GMM_gmmprobit = {NA}, OS_GMM_sbinaryGMM_approx = { probit <- glm(slm, family = binomial("probit"), data = mydata) sbinaryGMM(slm, link = "probit", listw = W, data = mydata, type = "onestep", winitial = "optimal", nins = 1, approximation = TRUE, pw = 6, reltol = 0.0001, start = c(coef(probit), 0));}, Bayes_sarprobit = sarprobit(slm, W = W, data = mydata, showProgress = FALSE), condML_ProbitSpatialFit = ProbitSpatialFit(slm, W = W, data = mydata, DGP = "SAR", method = "conditional", varcov = "varcov"), inlaR = { n <- nrow(mydata); mydata$idx <- 1:n; rho.max <- 1; rho.min <- -1; mm <- model.matrix(slm, mydata); betaprec1 <- 0.001; Q.beta1 <- Diagonal(n = ncol(mm), x = 1); Q.beta1 <- betaprec1 * Q.beta1; hyper.slm <- list(prec = list(initial = log(1), fixed = TRUE), theta2 = list(prior = "logitbeta", param = c(1, 1))); inla(y ~ -1 + f(idx, model = "slm", args.slm = list(rho.min = rho.min, rho.max = rho.max, W = W, X = mm, Q.beta = Q.beta1), hyper = hyper.slm), data = mydata, family = "binomial", control.family = list(link = "probit"), control.compute = list(dic = TRUE, cpo = TRUE, config = TRUE)); }, times = times) } else { results <- microbenchmark( LGMM_sbinaryLGMM_os = sbinaryLGMM(slm, link = "probit", listw = W, data = mydata, nins = 2) , LGMM_spprobit = {NA;}, OS_GMM_sbinaryGMM = {NA;}, OS_GMM_gmmprobit = {NA;}, OS_GMM_sbinaryGMM_approx = { probit <- glm(slm, family = binomial("probit"), data = mydata) sbinaryGMM(slm, link = "probit", listw = W, data = mydata, type = "onestep", winitial = "optimal", nins = 1, approximation = TRUE, pw = 6, reltol = 0.0001, start = c(coef(probit), 0));}, Bayes_sarprobit = sarprobit(slm, W = W, data = mydata, showProgress = FALSE), condML_ProbitSpatialFit = ProbitSpatialFit(slm, W = W, data = mydata, DGP = "SAR", method = "conditional", varcov = "varcov"), inlaR = { n <- nrow(mydata); mydata$idx <- 1:n; rho.max <- 1; rho.min <- -1; mm <- model.matrix(slm, mydata); betaprec1 <- 0.001; Q.beta1 <- Diagonal(n = ncol(mm), x = 1); Q.beta1 <- betaprec1 * Q.beta1; hyper.slm <- list(prec = list(initial = log(1), fixed = TRUE), theta2 = list(prior = "logitbeta", param = c(1, 1))); inla(y ~ -1 + f(idx, model = "slm", args.slm = list(rho.min = rho.min, rho.max = rho.max, W = W, X = mm, Q.beta = Q.beta1), hyper = hyper.slm), data = mydata, family = "binomial", control.family = list(link = "probit"), control.compute = list(dic = TRUE, cpo = TRUE, config = TRUE)); }, times = times) } results <- data.frame(results) %>% group_by(expr) %>% summarize(mean = mean(time)/1000000) if(!(n < n1)) results$mean[results$expr %in% c("OS_GMM_sbinaryGMM")] <- NA if(!(n < n2)) results$mean[results$expr %in% c("OS_GMM_sbinaryGMM", "OS_GMM_gmmprobit")] <- NA if(!(n < n3)) results$mean[results$expr %in% c("OS_GMM_sbinaryGMM", "OS_GMM_gmmprobit", "LGMM_spprobit")] <- NA results$n = n results } myseed = 2512 result_300 <- compare_time(n = 300, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 2500, n3 = 50000) # result_500 <- compare_time(n = 500, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 2500, n3 = 50000) # result_750 <- compare_time(n = 750, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 2500, n3 = 50000) # result_1000 <- compare_time(n = 1000, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 2500, n3 = 50000) # result_1500 <- compare_time(n = 1500, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 2500, n2 = 7500, n3 = 50000) # result_2500 <- compare_time(n = 2500, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 2500, n2 = 7500, n3 = 50000) # result_5000 <- compare_time(n = 5000, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 7500, n3 = 50000) # result_7500 <- compare_time(n = 7500, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 2500, n3 = 50000) result_10000 <- compare_time(n = 10000, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 7500, n3 = 50000) # result_15000 <- compare_time(n = 15000, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 2500, n3 = 50000) result_25000 <- compare_time(n = 25000, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 7500, n3 = 50000) result_50000 <- compare_time(n = 50000, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 7500, n3 = 50000) result_100000 <- compare_time(n = 100000, nneigh = 4, beta = c(0.5, 1, -1.5), rho = 0.6, times = 1, seed = myseed, n1 = 1500, n2 = 7500, n3 = 50000) results <- rbind(result_300, result_500, result_750, result_1000, result_1500, result_2500, result_5000, result_7500, result_10000, result_15000, result_25000, result_50000, result_100000) results %>% ggplot( aes(x = n, y = mean, group = expr, color = expr)) + geom_line() + coord_trans(x = "sqrt", y = "sqrt") + scale_x_discrete(limits = c(unique(results$n)), name = "sqrt(n)") + labs(y = "sqrt(milliseconds)") + theme_classic() + geom_hline(yintercept = 1000 * 60) + annotate("text", x = 2500, y = 1000 * 70, label = "1 minute") + geom_hline(yintercept = 5000 * 60) + annotate("text", x = 2500, y = 5000 * 60 + 20000, label = "5 minutes") + geom_hline(yintercept = 15000 * 60) + annotate("text", x = 2500, y = 15000 * 60+35000, label = "15 minutes") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + labs(colour = "ESTIMATOR")