## 2.3 treated <- read.table(file = "nswre74_treated.txt") controls <- read.table(file = "cps3_controls.txt") nsw <- rbind(treated, controls) ue <- function(x) factor(ifelse(x > 0, 0, 1)) UE74 <- mapply(ue, nsw[, 8]) UE75 <- mapply(ue, nsw[, 9]) nsw[, 4:7] <- lapply(nsw[, 4:7], factor) lalonde <- cbind(nsw[, 1:9], UE74, UE75, nsw[, 10]) colnames(lalonde) <- c("treat", "age", "educ", "black", "hisp", "married", "nodegr", "re74", "re75", "u74", "u75", "re78") ## a readily prepared data.frame is provided with the package data("lalonde", package = "CovSel") lalonde[1:10, ] ## 4.1 library("CovSel") set.seed(9327529) n <- 1000 eta <- mvrnorm(n, rep(0, 2), diag(1, 2, 2)) Sigma <- diag(1, 10, 10) Sigma[7, 8] <- Sigma[8, 7] <- 0.5 X <- mvrnorm(n, rep(0, 10), Sigma) y0 <- 2 + 2 * X[, 1] + 2 * X[, 2] + 2 * X[, 5] + 2 * X[, 6] + 2 * X[, 8] + eta[, 1] y1 <- 4 + 2 * X[, 1] + 2 * X[, 2] + 2 * X[, 5] + 2 * X[, 6] + 2 * X[, 8] + eta[, 2] e <- 1/(1 + exp(-0.5 * X[, 1] - 0.5 * X[, 2] - 0.5 * X[, 3] - 0.5 * X[, 4] - 0.5 * X[, 7])) T <- rbinom(n, 1, e) y <- y1 * T + y0 * (1 - T) datc <- data.frame(x1 = X[, 1], x2 = X[, 2], x3 = X[, 3], x4 = X[, 4], x5 = X[, 5], x6 = X[, 6], x7 = X[, 7], x8 = X[, 8], x9 = X[, 9], x10 = X[, 10], y0, y1, y, T) ans <- cov.sel(T = datc$T, Y = datc$y, X = datc[, 1:10], type = "dr", alg = 3, scope = NULL, alpha = 0.1, trace = 0) ans summary(ans) ans <- cov.sel(T = datc$T, Y = datc$y, X = datc[, 1:10], type = "dr", alg = 3, scope = NULL, alpha = 0.3, trace = 0, method = "save") ans ## 4.2 library(bindata) set.seed(9327529) n <- 500 x1 <- rbinom(n, 1, prob = 0.5) x25 <- rmvbin(n, bincorr = cbind(c(1, 0.7), c(0.7, 1)), margprob = c(0.5, 0.5)) x34 <- rmvbin(n, bincorr = cbind(c(1, 0.7), c(0.7, 1)), margprob = c(0.5, 0.5)) x2 <- x25[, 1] x3 <- x34[, 1] x4 <- x34[, 2] x5 <- x25[, 2] x6 <- rbinom(n, 1, prob = 0.5) x7 <- rbinom(n, 1, prob = 0.5) x8 <- rbinom(n, 1, prob = 0.5) e0 <- rnorm(n) e1 <- rnorm(n) p <- 1/(1 + exp(3 - 1.5 * x1 - 1.5 * x2 - 1.5 * x3 - 0.1 * x4 - 0.1 * x5 - 1.3 * x8)) T <- rbinom(n, 1, prob = p) y0 <- 4 + 2 * x1 + 3 * x4 + 5 * x5 + 2 * x6 + e0 y1 <- 2 + 2 * x1 + 3 * x4 + 5 * x5 + 2 * x6 + e1 y <- y1 * T + y0 * (1 - T) datf <- data.frame(x1, x2, x3, x4, x5, x6, x7, x8, y0, y1, y, T) datf[, 1:8] <- lapply(datf[, 1:8], factor) datf[, 12] <- as.numeric(datf[, 12]) ans <- cov.sel(T = datf$T, Y = datf$y, X = datf[, 1:8], type = "np", alpha = 0.1, alg = 3, scope = NULL, thru = 0.5, thro = 0.25, thrc = 100) ans ## 4.3 set.seed(9327529) n <- 500 x1 <- rnorm(n, mean = 0, sd = 1) x2 <- rbinom(n, 1, prob = 0.5) x25 <- rmvbin(n, bincorr = cbind(c(1, 0.7), c(0.7, 1)), margprob = c(0.5, 0.5)) x2 <- x25[, 1] Sigma <- matrix(c(1, 0.5, 0.5, 1), ncol = 2) x34 <- mvrnorm(n, rep(0, 2), Sigma) x3 <- x34[, 1] x4 <- x34[, 2] x5 <- x25[, 2] x6 <- rbinom(n, 1, prob = 0.5) x7 <- rnorm(n, mean = 0, sd = 1) x8 <- rbinom(n, 1, prob = 0.5) e0 <- rnorm(n) e1 <- rnorm(n) p <- 1/(1 + exp(3 - 1.2 * x1 - 3.7 * x2 - 1.5 * x3 - 0.3 * x4 - 0.3 * x5 - 1.9 * x8)) T <- rbinom(n, 1, prob = p) y0 <- 4 + 2 * x1 + 3 * x4 + 5 * x5 + 2 * x6 + e0 y1 <- 2 + 2 * x1 + 3 * x4 + 5 * x5 + 2 * x6 + e1 y <- y1 * T + y0 * (1 - T) datfc <- data.frame(x1, x2, x3, x4, x5, x6, x7, x8, y0, y1, y, T) datfc[, c(2, 5, 6, 8)] <- lapply(datfc[, c(2, 5, 6, 8)], factor) datfc[, 12] <- as.numeric(datfc[, 12]) ans <- cov.sel(T = datfc$T, Y = datfc$y, X = datfc[, 1:8], type = "np", alpha = 0.1, alg = 3, scope = NULL, thru = 0.5, thro = 0.25, thrc = 100) ans ## 4.4 cs <- cov.sel(T = lalonde[, 1], Y = lalonde[, 12], X = lalonde[, 2:11], type = "np", alg = 1, thru = 0.5, thro = 0.25, thrc = 100) cs cs <- cov.sel(T = lalonde[, 1], Y = lalonde[, 12], X = lalonde[, 2:11], type = "np", alg = 2, thru = 0.5, thro = 0.25, thrc = 100) cs