library("NonpModelCheck") set.seed(1) runs <- 1000 p <- 9 # p is the number of covariate values included R n <- 100 d <- 12 result <- c(0, 0, 0) for (index in 1:runs) { for (theta in 0:2) { U <- matrix(0, n, d) for (i in 1:d) { U[, i] <- rnorm(n) } Y <- theta/4 * (sin(2 * U[, 2]) + 2 * exp(-16 * U[, 2]^2)) + sqrt(abs(U[, 5] * (1 - U[, 5]))) * sin(2 * pi * 1.05/(U[, 5] + 0.05)) + rnorm(n, 0, 0.5) if (npmodelcheck(U, Y, 2)$p_value < 0.05) { result[theta + 1] <- result[theta + 1] + 1 } } } result/runs set.seed(1) runs <- 1000 p <- 9 # p is the number of covariate values included R n <- 100 d <- 12 result <- c(0, 0, 0) for (index in 1:runs) { for (theta in 0:2) { U <- matrix(0, n, d) for (i in 1:d) { U[, i] <- rnorm(n) } Y <- theta/4 * (sin(2 * U[, 2]) + 2 * exp(-16 * U[, 2]^2)) + sqrt(abs(U[, 5] * (1 - U[, 5]))) * sin(2 * pi * 1.05/(U[, 5] + 0.05)) + rnorm(n, 0, 0.5) if (npmodelcheck(U, Y, 2, dim.red = 0)$p_value < 0.05) { result[theta + 1] <- result[theta + 1] + 1 } } } result/runs #---------------------------------------------------------------------------------------------------------------------------------------- set.seed(1) n <- 50 d <- 4 U <- matrix(0, n, d) for (i in 1:d) { U[, i] <- rnorm(n) } Y <- abs(U[, 3])^2/3 + rnorm(n, 0, 0.5) npmodelcheck(U, Y, 3, p = 3) npmodelcheck(U, Y, 3, p = 7) npmodelcheck(U, Y, 3, p = 15) #---------------------------------------------------------------------------------------------------------------------------------------- library("MASS") set.seed(1) runs <- 1000 n <- 100 d <- 6 result <- 0 sigma <- matrix(c(1, 0, 0.55, 0, 0, 0, 0, 1, 0, 0.6, 0.7, 0, 0.55, 0, 1, 0, 0, 0, 0, 0.6, 0, 1, 0.6, 0, 0, 0.7, 0, 0.6, 1, 0, 0, 0, 0, 0, 0, 1), 6, 6) for (index in 1:runs) { X <- mvrnorm(n, rep(0, 6), sigma) Y <- (X[, 2] + X[, 4] + X[, 5])/4 + rnorm(n) if (npmodelcheck(X, Y, ind_test = c(2, 4, 5))$p_value < 0.05) { result <- result + 1 } } result/runs #---------------------------------------------------------------------------------------------------------------------------------------- ## Results for GLRT library("gam") get_quantile_GLRT <- function(Y, X) { null <- gam(Y ~ lo(X[, 1]) + lo(X[, 3]) + lo(X[, 6])) unrestr <- gam(Y ~ lo(X[, 1]) + lo(X[, 2]) + lo(X[, 3]) + lo(X[, 4]) + lo(X[, 5]) + lo(X[, 6])) RSS0 <- sum((null$residuals)^2) RSS1 <- sum((unrestr$residuals)^2) lambda <- (n/2) * log(RSS0/RSS1) lambda_star <- 0 Y_star <- 0 for (ind_MC in 1:400) { for (i in 1:n) # form a conditional bootstrap sample Y_star[i] <- as.numeric(unrestr$smooth[, 1])[i] + sample(unrestr$residuals, 1) + as.numeric(unrestr$smooth[, 3])[i] + as.numeric(unrestr$smooth[, 6])[i] null_star <- gam(Y_star ~ lo(X[, 1]) + lo(X[, 3]) + lo(X[, 6])) unrestr_star <- gam(Y_star ~ lo(X[, 1]) + lo(X[, 2]) + lo(X[, 3]) + lo(X[, 4]) + lo(X[, 5]) + lo(X[, 6])) RSS0_star <- sum((null_star$residuals)^2) RSS1_star <- sum((unrestr_star$residuals)^2) lambda_star[ind_MC] <- (n/2) * log(RSS0_star/RSS1_star) } return(quantile(lambda_star, .95)) } set.seed(1) runs <- 1000 rej_Fan <- 0 for (index in 1:runs) { X <- matrix(0, n, d) Sigma <- diag(6) Sigma[1, 3] <- .55 Sigma[2, 4] <- .6 Sigma[2, 5] <- .7 Sigma[3, 1] <- .55 Sigma[4, 2] <- .6 Sigma[4, 5] <- .6 Sigma[5, 2] <- .7 Sigma[5,4] <- .6 X <- mvrnorm(n, mu = c(0, 0, 0, 0, 0, 0), Sigma) Y <- (X[, 2] + X[, 4] + X[, 5])/4 + rnorm(n) null <- gam(Y ~ lo(X[, 1]) + lo(X[, 3]) + lo(X[, 6])) unrestr <- gam(Y ~ lo(X[, 1]) + lo(X[, 2]) + lo(X[, 3]) + lo(X[, 4]) + lo(X[, 5]) + lo(X[, 6])) RSS0 <- sum((null$residuals)^2) RSS1 <- sum((unrestr$residuals)^2) lambda <- (n/2) * log(RSS0/RSS1) quan <- get_quantile_GLRT(Y, X) if (lambda > quan) rej_Fan <- rej_Fan + 1 } cat("\n runs: ", runs, "\n rej_Fan/runs: ", rej_Fan/runs, "\n") #---------------------------------------------------------------------------------------------------------------------------------------- library("mnormt") set.seed(1) n <- 110 p <- 9 d <- 25 Cov_Matrix <- diag(d) X <- rmnorm(n, rep(0, d), Cov_Matrix) beta <- c(3, 1.5, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0) Y <- as.numeric(X %*% beta + rnorm(n, 0, 3)) npvarselec(X, Y, method = "backward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") #---------------------------------------------------------------------------------------------------------------------------------------- library("cosso") library("mnormt") ### TABLE 1 Independent set.seed(1) runs <- 100 n <- 110 d <- 25 Cov_Matrix <- diag(d) result_Np_back <- c(0, 0) result_Np_for <- c(0, 0) result_Np_for2 <- c(0, 0) result_Cosso <- c(0, 0) for (index in 1:runs) { X <- rmnorm(n, rep(0, d), Cov_Matrix) beta <- c(3, 1.5, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0) Y <- as.numeric(X%*%beta + rnorm(n, 0, 3)) fit <- npvarselec(X, Y, method = "backward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_back[1] <- result_Np_back[1] + length(intersect(fit$selected, c(1, 2, 5, 7, 17))) result_Np_back[2] <- result_Np_back[2] + length(intersect(fit$selected, c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) fit <- npvarselec(X, Y, method = "forward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for[1] <- result_Np_for[1] + length(intersect(fit$selected, c(1, 2, 5, 7, 17))) result_Np_for[2] <- result_Np_for[2] + length(intersect(fit$selected, c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) fit <- npvarselec(X, Y, method = "forward2", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for2[1] <- result_Np_for2[1] + length(intersect(fit$selected, c(1, 2, 5, 7, 17))) result_Np_for2[2] <- result_Np_for2[2] + length(intersect(fit$selected, c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) fit <- cosso(X, Y) h <- tune.cosso(fit, plot.it = FALSE)$OptM fit2 <- predict.cosso(fit, M = h, type = "c")$theta result_Cosso[1] <- result_Cosso[1] + length(intersect(which(fit2 != 0), c(1, 2, 5, 7, 17))) result_Cosso[2] <- result_Cosso[2] + length(intersect(which(fit2 != 0), c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) } cat("\n\n n = ", runs, "\n Np_back correct = ", result_Np_back[1]/runs, "\n Np_back incorrect = ", result_Np_back[2]/runs, "\n Np_for correct = ", result_Np_for[1]/runs, "\n Np_for incorrect = ", result_Np_for[2]/runs, "\n Np_for2 correct = ", result_Np_for2[1]/runs, "\n Np_for2 incorrect = ", result_Np_for2[2]/runs, "\n COSSO correct = ", result_Cosso[1]/runs, "\n Cosso incorrect = ", result_Cosso[2]/runs, "\n") ### TABLE 1 Dependent set.seed(1) runs <- 100 n <- 110 d <- 25 Cov_Matrix <- matrix(0, d, d) cols <- seq(1, d) for (i in 1:d) Cov_Matrix[, i] <- 0.5^(abs(i - cols)) result_Np_back <- c(0, 0) result_Np_for <- c(0, 0) result_Np_for2 <- c(0, 0) result_Cosso <- c(0, 0) for (index in 1:runs) { X <- rmnorm(n, rep(0, d), Cov_Matrix) beta <- c(3, 1.5, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0) Y <- as.numeric(X%*%beta + rnorm(n, 0, 3)) fit <- npvarselec(X, Y, method = "backward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_back[1] <- result_Np_back[1] + length(intersect(fit$selected, c(1, 2, 5, 7, 17))) result_Np_back[2] <- result_Np_back[2] + length(intersect(fit$selected, c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) fit <- npvarselec(X, Y, method = "forward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for[1] <- result_Np_for[1] + length(intersect(fit$selected, c(1, 2, 5, 7, 17))) result_Np_for[2] <- result_Np_for[2] + length(intersect(fit$selected, c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) fit <- npvarselec(X, Y, method = "forward2", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for2[1] <- result_Np_for2[1] + length(intersect(fit$selected, c(1, 2, 5, 7, 17))) result_Np_for2[2] <- result_Np_for2[2] + length(intersect(fit$selected, c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) fit <- cosso(X, Y) h <- tune.cosso(fit, plot.it = FALSE)$OptM fit2 <- predict.cosso(fit, M = h, type = "c")$theta result_Cosso[1] <- result_Cosso[1] + length(intersect(which(fit2 != 0), c(1, 2, 5, 7, 17))) result_Cosso[2] <- result_Cosso[2] + length(intersect(which(fit2 != 0), c(3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25))) } cat("\n\n n = ", runs, "\n Np_back correct = ", result_Np_back[1]/runs, "\n Np_back incorrect = ", result_Np_back[2]/runs, "\n Np_for correct = ", result_Np_for[1]/runs, "\n Np_for incorrect = ", result_Np_for[2]/runs, "\n Np_for2 correct = ", result_Np_for2[1]/runs, "\n Np_for2 incorrect = ", result_Np_for2[2]/runs, "\n COSSO correct = ", result_Cosso[1]/runs, "\n Cosso incorrect = ", result_Cosso[2]/runs, "\n") ### TABLE 2 g1 set.seed(1) runs <- 100 n <- 40 d <- 8 Cov_Matrix <- diag(d) result_Np_back <- c(0, 0) result_Np_for <- c(0, 0) result_Np_for2 <- c(0, 0) result_Cosso <- c(0, 0) for (index in 1:runs) { X <- rmnorm(n, rep(0, d), Cov_Matrix) Y <- sin(pi * X[, 1]) + rnorm(n, 0, .3) fit <- npvarselec(X, Y, method = "backward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_back[1] <- result_Np_back[1] + length(intersect(fit$selected, 1)) result_Np_back[2] <- result_Np_back[2] + length(intersect(fit$selected, c(2, 3, 4, 5, 6, 7, 8))) fit <- npvarselec(X, Y, method = "forward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for[1] <- result_Np_for[1] + length(intersect(fit$selected, 1)) result_Np_for[2] <- result_Np_for[2] + length(intersect(fit$selected, c(2, 3, 4, 5, 6, 7, 8))) fit <- npvarselec(X, Y, method = "forward2", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for2[1] <- result_Np_for2[1] + length(intersect(fit$selected, 1)) result_Np_for2[2] <- result_Np_for2[2] + length(intersect(fit$selected, c(2, 3, 4, 5, 6, 7, 8))) fit <- cosso(X, Y) tryCatch({ h <- tune.cosso(fit, plot.it = FALSE)$OptM }, error = function(e) {}) fit2 <- predict.cosso(fit, M = h, type = "c")$theta result_Cosso[1] <- result_Cosso[1] + length(intersect(which(fit2 != 0), 1)) result_Cosso[2] <- result_Cosso[2] + length(intersect(which(fit2 != 0), c(2, 3, 4, 5, 6, 7, 8))) } cat("\n\n n = ", runs, "\n Np_back correct = ", result_Np_back[1]/runs, "\n Np_back incorrect = ", result_Np_back[2]/runs, "\n Np_for correct = ", result_Np_for[1]/runs, "\n Np_for incorrect = ", result_Np_for[2]/runs, "\n Np_for2 correct = ", result_Np_for2[1]/runs, "\n Np_for2 incorrect = ", result_Np_for2[2]/runs, "\n COSSO correct = ", result_Cosso[1]/runs, "\n Cosso incorrect = ", result_Cosso[2]/runs, "\n") ### TABLE 2 g2 set.seed(1) runs <- 100 n <- 40 d <- 8 Cov_Matrix <- diag(d) result_Np_back <- c(0, 0) result_Np_for <- c(0, 0) result_Np_for2 <- c(0, 0) result_Cosso <- c(0, 0) for (index in 1:runs) { X <- rmnorm(n, rep(0, d), Cov_Matrix) Y <- sin((3/4)*pi*X[, 1]) - 3*pnorm(-abs(X[, 5])^3) + rnorm(n, 0, .3) fit <- npvarselec(X, Y, method = "backward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_back[1] <- result_Np_back[1] + length(intersect(fit$selected, c(1, 5))) result_Np_back[2] <- result_Np_back[2] + length(intersect(fit$selected, c(2, 3, 4, 6, 7, 8))) fit <- npvarselec(X, Y, method = "forward", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for[1] <- result_Np_for[1] + length(intersect(fit$selected, c(1, 5))) result_Np_for[2] <- result_Np_for[2] + length(intersect(fit$selected, c(2, 3, 4, 6, 7, 8))) fit <- npvarselec(X, Y, method = "forward2", p = 9, degree.pol = 1, kernel.type = "epanech", bandwidth = "CV") result_Np_for2[1] <- result_Np_for2[1] + length(intersect(fit$selected, c(1, 5))) result_Np_for2[2] <- result_Np_for2[2] + length(intersect(fit$selected, c(2, 3, 4, 6, 7, 8))) tryCatch({ fit <- cosso(X, Y) h <- tune.cosso(fit, plot.it = FALSE)$OptM fit2 <- predict.cosso(fit, M = h, type = "c")$theta }, error = function(e) {}) result_Cosso[1] <- result_Cosso[1] + length(intersect(which(fit2 != 0), c(1, 5))) result_Cosso[2] <- result_Cosso[2] + length(intersect(which(fit2 != 0), c(2, 3, 4, 6, 7, 8))) } cat("\n\n n = ", runs, "\n Np_back correct = ", result_Np_back[1]/runs, "\n Np_back incorrect = ", result_Np_back[2]/runs, "\n Np_for correct = ", result_Np_for[1]/runs, "\n Np_for incorrect = ", result_Np_for[2]/runs, "\n Np_for2 correct = ", result_Np_for2[1]/runs, "\n Np_for2 incorrect = ", result_Np_for2[2]/runs, "\n COSSO correct = ", result_Cosso[1]/runs, "\n Cosso incorrect = ", result_Cosso[2]/runs, "\n") #---------------------------------------------------------------------------------------------------------------------------------------- library("supclust") data <- read.table("prostate.txt") Y <- as.numeric(data[, 1] == "normal") X <- matrix(as.matrix(data[, (2:dim(data)[2])]), ncol = ncol(data[, (2:dim(data)[2])]), dimnames = NULL) n <- nrow(X) d <- 60 # Note that despite setting a random seed wilma() does not return # identical results and results subsequently obtained using this code # might differ from the results shown in the manuscript. set.seed(1) fit <- wilma(X, Y, d, once.per.clust = TRUE) groups <- vector("list", d) for (i in 1:d) { groups[[i]] <- fit$clist[[i]] } group.npvarselec(X, Y, groups, method = "forward2", fitSPC = TRUE) #---------------------------------------------------------------------------------------------------------------------------------------- set.seed(1) X <- runif(180, -4, 4) Y <- sin(pi * X) * X^2 + rnorm(180, 0, 1) fit <- localpoly.reg(X, Y, degree.pol = 2) fit2 <- loess(Y ~ X) fit3 <- loess(Y ~ X, span = 0.2) plot(X, Y) lines(X[order(X)], fit$predicted[order(X)], col = 2) lines(X[order(X)], fit2$fitted[order(X)], col = 1) lines(X[order(X)], fit3$fitted[order(X)], col = 4) library("sm") set.seed(11) X <- runif(150, -3, 10) Y <- sqrt(abs(X * (7 - X))) * (cos(1/6 * pi * X)) + rnorm(150, 0, 0.2) h <- h.select(X, Y) sm.regression(X, Y, h = h, col = 4) fit <- localpoly.reg(X, Y, kernel.type = "box") lines(X[order(X)], fit$predicted[order(X)]) fit2 <- localpoly.reg(X, Y, kernel.type = "epanech") lines(X[order(X)], fit2$predicted[order(X)], col = 2) #---------------------------------------------------------------------------------------------------------------------------------------- library("KernSmooth") library("locfit") set.seed(123456789) X <- runif(150, 2, 12) Y <- exp(1/X) * (1 - 1/X)/sqrt(1 + abs(8 - X))/4.5 + rnorm(150, 0, 0.01) fit <- localpoly.reg(X, Y, bandwidth = "CV", degree.pol = 2, kernel.type = "triweight") fit2 <- locpoly(X, Y, bandwidth = fit$bandwidth, degree = 2) fit3 <- locfit(Y ~ lp(X, deg = 2)) plot(fit3, get.data = TRUE) lines(fit2$x, fit2$y, col = 4) lines(X[order(X)], fit$predicted[order(X)], col = 2) library("locpol") set.seed(123) X <- runif(150, 2, 12) Y <- exp(1/X)/sqrt(2 + sqrt(abs(pi * (6 - X)))) + rnorm(150, 0, 0.01) d <- data.frame(X) d$Y <- Y fit <- locpol(Y ~ X, d) plot(X, Y) lines(fit$xeval, fit$lpFit[, 2]) fit2 <- localpoly.reg(X, Y, bandwidth = "GCV", degree.pol = 1) lines(X[order(X)], fit2$predicted[order(X)], col = 2) set.seed(123) X <- c(runif(30, 0, 15), runif(100, 15, 25), runif(50, 25, 55), runif(300, 55, 230)) Y <- c(rnorm(30, 0, 0.1), sin(1/2 * pi * X[31:130]) + rnorm(100, 0, 0.3), rnorm(50, 0, 0.15), cos(1/25 * pi * X[181:480]) + rnorm(300, 0, 0.2)) fit <- localpoly.reg(X, Y, bandwidth = "Adp", degree.pol = 1) h <- h.select(X, Y, method = "cv") fit2 <- sm.regression(X, Y, h = h) fit3 <- locfit(Y ~ lp(X, deg = 1, nn = 0.05)) plot(fit3, get.data = TRUE) lines(fit2$eval.points, fit2$estimate, col = 4) lines(fit$points, fit$predicted, col = 2) #---------------------------------------------------------------------------------------------------------------------------------------- set.seed(1111) X <- matrix(0, 200, 2) X[, 1] <- runif(200, -3, 3) X[, 2] <- runif(200, -3, 3) Y <- 4 * sin(pi * X[, 1]) + 4 * sin(pi * X[, 2]) + rnorm(200, 0, 0.3) par(mfrow = c(1, 2)) plot3d.localpoly.reg(X, Y, bandwidth = "CV2", degree.pol = 0, gridsurface = 30, main = "Estimated function") f <- function(x1, x2) { 4 * sin(pi * x1) + 4 * sin(pi * x2) } persp(seq(min(X[, 1]), max(X[, 1]), length = 30), seq(min(X[, 2]), max(X[, 2]), length = 30), outer(seq(min(X[, 1]), max(X[, 1]), length = 30), seq(min(X[, 2]), max(X[, 2]), length = 30), f), theta = 30, phi = 30, expand = 0.5, col = "lightblue", ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "X_1", ylab = "X_2", zlab = "Y", main = "Original function")