## ## Best Subset Selection ## subset_selection <- function(A, b, k, beta_lb = -1000, beta_ub = 1000, count_intercept = FALSE, solver = "auto", ...) { control <- list(...) Q <- 2 * t(A) %*% A L <- -2 * t(b) %*% A x <- OP(objective = Q_objective(Q = Q, L = L), bounds = V_bound(li = seq_len(nrow(Q)), lb = rep.int(-Inf, nrow(Q)))) y <- ROI_reformulate(x, "socp") n <- length(objective(x)) x <- OP(objective = c(terms(objective(y))$L, double(n))) L <- constraints(y)$L L <- cbind(L, simple_triplet_zero_matrix(nrow(L), n)) if (length(beta_lb) == 1L) beta_lb <- rep.int(beta_lb, n) if (length(beta_lb) == 1L) beta_ub <- rep.int(beta_ub, n) LB <- cbind(simple_triplet_diag_matrix(-1, n), simple_triplet_zero_matrix(n, 1), simple_triplet_diag_matrix(beta_lb, n)) UB <- cbind(simple_triplet_diag_matrix(1, n), simple_triplet_zero_matrix(n, 1), simple_triplet_diag_matrix(-beta_ub, n)) if (count_intercept) { SUM <- cbind(simple_triplet_zero_matrix(1, n+1), matrix(1, 1, n)) } else { SUM <- cbind(simple_triplet_zero_matrix(1, n+2), matrix(1, 1, n-1)) } constraints(x) <- C_constraint(rbind(L, LB, UB, SUM), c(constraints(y)$cones, K_lin(n), K_lin(n), K_lin(1L)), c(constraints(y)$rhs, double(n), double(n), k)) types(x) <- c(rep.int("C", length(objective(y))), rep.int("B", n)) len <- length(objective(x)) bounds(x) <- V_bound(li = seq_len(len), lb = rep.int(-Inf, len)) maximum(x) <- maximum(y) if (isTRUE(control$dry_run)) return(x) z <- ROI_solve(x) head(solution(z), n) } ## ## SON Clustering ## convex_clust <- function(A, gamma, solver = "auto", control = list()) { m <- nrow(A) n <- ncol(A) ncombn <- (m * (m-1) / 2) k <- ncombn * (n+1) obj <- c(rep.int(0, n * m), 1/2, rep.int(gamma, ncombn)) b <- c(1, -1, 2*as.double(A), rep.int(0, k)) L <- simple_triplet_zero_matrix(nrow = 2 + n * m + k, ncol = n * m + 1 + ncombn) L[1, n*m+1] <- -1 L[2, n*m+1] <- -1 L[2+seq_len(n * m), seq_len(n * m)] <- diag(2, n * m) ko <- combn(seq_len(m), 2) M <- matrix(seq_len(n*m), m, n, byrow = FALSE) irow <- n * m + 3 cones <- K_soc(n*m+2) for (i in seq_len(ncol(ko))) { L[irow, n * m + 1 + i] <- -1 cones <- c(cones, K_soc(n+1)) irow <- irow + 1 for (j in seq_len(n)) { L[irow, M[ko[,i], j]] <- c(-1, 1) irow <- irow + 1 } } op <- OP(objective = L_objective(obj), constraints = C_constraint(L = L, cones = cones, rhs = b), bounds = V_bound(ld = -Inf, nobj = length(obj))) if (isTRUE(control$dry_run)) return(op) x <- ROI_solve(op, solver, control) matrix(x$solution[seq_len(n*m)], m, n) } ## ## Graphical Lasso ## index_to_vech <- function(i, j, n) { ind_to_vech <- function(i, j, n) { if (j > i) return(NA) (j - 1) * n + i - (j - 1) * j / 2 } unlist(mapply(ind_to_vech, i, j, MoreArgs = list(n = n), SIMPLIFY = FALSE, USE.NAMES = FALSE), recursive = FALSE, use.names = FALSE) } ROI_glasso <- function(s, rho, solver = "auto", ...) { stopifnot(nrow(s) > 1) control <- list(...) stm <- simple_triplet_matrix n <- nrow(s); nv <- (n + 1) * n / 2; nij <- choose(n, 2) ndzx <- (2 * n + 1) * 2 * n / 2 seqn <- seq_len(n); seqnv <- seq_len(nv); seqnij <- seq_len(nij) obj <- c(as.vector(vech(s + s * lower.tri(s))), double(ndzx)) A <- simple_triplet_diag_matrix(-1, nv + ndzx) cones <- K_psd(c(nv, ndzx)) ij <- combn(seqn, 2) k <- index_to_vech(n + ij[1,], ij[2,], 2 * n) Z_UPPER <- stm(seqnij, nv + k, rep.int(1, nij), nrow = nij, ncol = nv + ndzx) cones <- c(cones, K_zero(nij)) k <- index_to_vech(ij[2,], ij[1,], 2 * n) D_DIAG <- stm(seqnij, nv + k, rep.int(1, nij), nrow = nij, ncol = nv + ndzx) cones <- c(cones, K_zero(nij)) kd <- index_to_vech(seqn, seqn, 2 * n) kz <- index_to_vech(n + seqn, seqn, 2 * n) EQ_DIAG <- stm(c(seqn, seqn), nv + c(kd, kz), c(rep.int(-1, n), rep.int(1, n)), nrow = n, ncol = nv + ndzx) cones <- c(cones, K_zero(n)) A <- rbind(A, rbind(Z_UPPER, D_DIAG, EQ_DIAG)) EQ_X <- stm(c(seqnv, seqnv), c(seqnv, ndzx + seqnv), c(rep.int(-1, nv), rep.int(1, nv)), nrow = nv, ncol = ncol(A)) cones <- c(cones, K_zero(nrow(EQ_X))) A <- rbind(A, EQ_X) rhs <- double(nrow(A)) obj <- c(obj, rep.int(-1, n)) j <- nv + kd LOG <- stm(c(3 * seqn, 3 * seqn - 2), c(j, ncol(A) + seqn), rep.int(-1, 2 * n), nrow = 3 * n, ncol = ncol(A) + n) A <- rbind(cbind(A, simple_triplet_zero_matrix(nrow(A), n)), LOG) cones <- c(cones, K_expp(n)) rhs <- c(rhs, rep.int(c(0, 1, 0), n)) rho_matrix <- matrix(rho, n, n) obj <- c(obj, vech(rho_matrix + rho_matrix * lower.tri(rho_matrix))) NORM <- stm(i = c(seq_len(2 * nv), seq_len(2 * nv)), j = c(seqnv, seqnv, ncol(A) + seqnv, ncol(A) + seqnv), v = c(rep.int(1, nv), rep.int(-1, 3 * nv)), nrow = 2 * nv, ncol = ncol(A) + nv) A <- rbind(cbind(A, simple_triplet_zero_matrix(nrow(A), nv)), NORM) cones <- c(cones, K_lin(2 * nv)) rhs <- c(rhs, double(2 * nv)) model <- OP(objective = L_objective(obj), constraints = C_constraint(A, cones = cones, rhs = rhs), bounds = V_bound(ld = -Inf, nobj = length(obj))) if (isTRUE(control$dry_run)) return(model) so <- ROI_solve(model, solver = solver, control) Y <- matrix(0, n, n) Y[lower.tri(Y, diag = TRUE)] <- so$solution[seq_len(n * (n+1) / 2)] Y[upper.tri(Y)] <- t(Y)[upper.tri(Y)] list(w = chol2inv(chol(Y)), wi = Y, errflag = so$status$code, niter = so$message$info$iter) } ## ## Examples Sys.setenv(ROI_LOAD_PLUGINS = FALSE) library("slam") library("ROI") library("ROI.plugin.ecos") ## ## Best Subset Selection simluate_data <- function(beta, m, n) { stopifnot(length(beta) <= n) id <- sample(seq_len(n), length(beta)) beta_new <- double(n) beta_new[id] <- beta beta_new <- c(9, beta_new) X <- matrix(100 * runif(m * n), m, n) X <- cbind(rep.int(1, NROW(X)), X) y <- as.vector(X %*% beta_new) + rnorm(m) id <- id + 1 return(list(X = X, y = y, beta_new = beta_new, id = id)) } set.seed(0) d <- simluate_data(1:4, 300, 10) sol <- subset_selection(d$X, d$y, k = 5, -100, 100, count_intercept = TRUE) round(sol, 2) - d$beta_new ## ## SON Clustering ## X <- scale(as.matrix(USArrests), center = TRUE, scale = FALSE) cc <- convex_clust(X, gamma = 1) cc ## ## Graphical Lasso ## library("ROI.plugin.scs") library("glasso") set.seed(1) x <- matrix(rnorm(5*2), ncol = 2) s <- var(x) b <- ROI_glasso(s, 0.01, tol = 1e-12) a <- glasso(s, rho = 0.01, w.init = b$w, , wi.init = b$wi) max(abs(a$w - b$w)) set.seed(2) x <- matrix(rnorm(70*30), ncol = 30) s <- var(x) b <- ROI_glasso(s, 0.01) a <- glasso(s, rho = 0.01, w.init = b$w, , wi.init = b$wi) max(abs(a$w - b$w))