# Section "Using TSCI" # Example 1 # Loading and preprocessing the dataset used in this section: ## ----loading_card_Data, echo = TRUE, eval = TRUE------------------------------------------ library("TSCI") library("ivmodel") data("card.data", package = "ivmodel") card.data$fatheduc_na <- as.numeric(is.na(card.data$fatheduc)) card.data$motheduc_na <- as.numeric(is.na(card.data$motheduc)) card.data$fatheduc[is.na(card.data$fatheduc)] <- mean(card.data$fatheduc, na.rm = T) card.data$motheduc[is.na(card.data$motheduc)] <- mean(card.data$motheduc, na.rm = T) card.data$parenteduc <- card.data$fatheduc * card.data$motheduc # Violation space creation: ## ----violation space creation 1, echo = TRUE, eval = TRUE--------------------------------- vio_space <- with(card.data, list(nearc4, nearc4 * cbind(exper, expersq, black, south, smsa, smsa66, reg661, reg662, reg663, reg664, reg665, reg666, reg667, reg668))) # Apply TSCI package to data: ## ----tsci fit 1 evaluation, echo = TRUE, eval = TRUE-------------------------------------- RNGkind("L'Ecuyer-CMRG") set.seed(10) Xname <- c("exper", "expersq", "black", "south", "smsa", "smsa66", "reg661", "reg662", "reg663", "reg664", "reg665", "reg666", "reg667", "reg668") fit_boosting <- tsci_boosting(Y = card.data$lwage, D = card.data$educ, Z = card.data$nearc4, X = card.data[, Xname], vio_space = vio_space, nsplits = 5, nrounds = 15, eta = 0.6, max_depth = 6, nfolds = 1, B = 10, parallel = "snow", ncores = 5) # Present summary output: ## ----summary 1, echo = TRUE, eval = TRUE-------------------------------------------------- summary(fit_boosting) # Present extended summary output: ## ----summary 2, echo = TRUE, eval = TRUE-------------------------------------------------- summary(fit_boosting, extended_output = TRUE) # User-specified covariates: ## ----exampleWspline, echo = TRUE, eval = TRUE--------------------------------------------- suppressPackageStartupMessages(library("fda")) X <- card.data[, c("exper", "expersq")] head(X, 4) nknots <- 2 norder <- 3 nbasis <- nknots + norder - 2 W <- matrix(ncol = NCOL(X) * nbasis, nrow = NROW(X)) for (j in seq_len(NCOL(X))) { knots <- quantile(unique(X[, j]), seq(0, 1, length = nknots)) basis <- create.bspline.basis(rangeval = range(knots), breaks = knots, norder = norder) W[, c(((j - 1) * nbasis + 1):(j * nbasis))] <- eval.basis(X[, j], basis) } head(W, 4) # Subsection "Specifying an individual hat matrix" # Preprocess data and violation space creation: ## ----data generating 2, echo = TRUE, eval = TRUE------------------------------------------- card.data0 <- subset(card.data, nearc4 == 0) fit_college_absence <- lm(educ ~ reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + reg668 + smsa66 + age + black + momdad14 + sinmom14 + step14 + fatheduc + fatheduc_na + motheduc + motheduc_na + parenteduc, data = card.data0) card.data$family_background <- predict(fit_college_absence, newdata = card.data) Z <- with(card.data, cbind(nearc4, nearc4 * family_background)) vio_space <- list(card.data$nearc4) # Specifying an individual hat matrix: ## ----hat matrix 1, echo = TRUE, eval = TRUE------------------------------------------------ Xname <- c("exper", "expersq", "black", "south", "smsa", "smsa66", "reg661", "reg662", "reg663", "reg664", "reg665", "reg666", "reg667", "reg668", "fatheduc", "fatheduc_na", "motheduc", "motheduc_na", "parenteduc", "momdad14", "sinmom14", "step14") X <- card.data[, Xname] A <- cbind(1, Z, as.matrix(X)) omega <- A %*% chol2inv(chol(t(A) %*% A)) %*% t(A) # Applying package TSCI ## ----tsci fit 3, echo = TRUE, eval = TRUE-------------------------------------------------- fit_secondstage <- tsci_secondstage(Y = card.data$lwage, D = card.data$educ, Z = Z, W = X, vio_space = vio_space, weight = omega) summary(fit_secondstage, extended_output = TRUE)