##################### ## Initialization ## # has to be run before any of the examples ##################### library("tclust") ## standard margins mmar <- c(5.1, 4.1, 4.1, 1.1) og <- grey(0) ## color for outliers ################ ## Figure 1 ## ################ set.seed(100) data("geyser2") clus <- tkmeans(geyser2, k = 3, alpha=0.03) plot(clus, col = c(og, 2:4), tol.lwd = 1, tol.lty = 2) ################ ## Figure 2 ## ################ data("M5data") cl <- M5data[, "cluster"] plot(M5data[, 1:2], col = cl + 1, pch = cl + 1, main = "The M5data data set") ################ ## Figure 3 ## ################ data("M5data") x <- M5data[, 1:2] set.seed(100) ## applying tclust with restr.fact = 1, restr= "eigen" res.a <- tclust(x, k = 3, alpha=0.1, restr.fact = 1, restr= "eigen", equal.weights = TRUE, warnings = 1) ## applying tclust with restr= "sigma" res.b <- tclust(x, k = 3, alpha=0.1, restr.fact = 1, restr= "sigma", equal.weights = TRUE) ## applying tclust with restr.fact = 1, restr= "deter" res.c <- tclust(x, k = 3, alpha=0.1, restr.fact = 1, restr= "deter", equal.weights = TRUE, warnings = 1) ## applying tclust with restr.fact = 50, restr= "eigen" res.d <- tclust(x, k = 3, alpha=0.1, restr.fact = 50, restr= "eigen", equal.weights = FALSE) op <- par(mfrow = c(2, 2), mar = c(2.1, 2.1, 3.6, 1.1)) col <- c(og, 4, 2, 3) pch <- c(1, 4, 2, 3) ## plotting results.. plot(res.a, col = col, pch = pch, tol.lty = 2, xlab = "", ylab = "", main.pre ="(a)", main = "/r") plot(res.b, col = col, pch = pch, tol.lty = 2, xlab = "", ylab = "", main.pre ="(b)", main = "/r") plot(res.c, col = col, pch = pch, tol.lty = 2, xlab = "", ylab = "", main.pre ="(c)", main = "/r") plot(res.d, col = col, pch = pch, tol.lty = 2, xlab = "", ylab = "", main.pre ="(d)", main = "/r") par(op) ################ ## Figure 4 ## ################ ## data generation set.seed(10) x <- rbind(rmvnorm(200, c(0, 0), diag(2)), rmvnorm(200, c(5, 0), diag(2))) ## applying tclust clus.4 <- tclust(x, k = 3, alpha = 0.0, restr.fact = 1, warnings = 0) ## plotting results.. plot(clus.4, xlim = c(-3, 9), ylim = c(-6, 6)) ################ ## Figure 5 ## ################ library("cluster") data("geyser2") set.seed(0) op <- par(mfrow = c(1, 3), mar = c(3.1, 3.1, 3.1, 2.1)) xlab <- "" #names(geyser2)[1] ylab <- "" #names(geyser2)[2] ## applying PAM on the geyser2 data set clus <- pam(geyser2, 3) plot(geyser2, xlab = xlab, ylab = ylab, col = clus$clustering + 1, main = "(a) PAM with Geyser") points(clus$medoids, pch = "X", cex = 1.4) ## modifying the geyser2 data set geyser2.mod <- geyser2 idx <- c(16, 21, 36, 171, 236, 265) geyser2.mod[idx,] <- geyser2[idx,] - 12 ## applying PAM on the modified geyser2 data set clus <- pam(geyser2.mod, 3) plot(geyser2.mod, xlab = xlab, ylab = ylab, col = clus$clustering + 1, main = "(b) PAM with modified Geyser") ## applying tkmeans on the modified geyser2 data set clus <- tkmeans(geyser2.mod, k = 3, alpha = 0.03) plot(clus, xlab = xlab, ylab = ylab, main = "(c) TCLUST with modified Geyser", sub = "") par(op) ################ ## Figure 6 ## ################ fig.elt <- list() library("sn") op <- par(mfrow = c(2, 3), mar = c(3.1, 2, 3.1, 2)) set.seed(3) n <- 200 xlab <- "" #"x1" ylab <- "" #"x2" ## data generation: Mixture of elliptical t's xi <- c(0.5, -1) Omega <- diag(2) / 2 + 0.5 alpha <- c(0, 0) rnd1 <- rmst(n, xi, Omega, alpha, df = 2) xi <- c(6, -3) Omega <- matrix(c(4, 1, 1, 2), nrow = 2, ncol = 2) alpha <- c(0, 0) rnd2 <- rmst(n, xi, Omega, alpha, df = 3) xi <- c(-3, 3) Omega <- matrix(c(2, 1, 1, 4), nrow = 2, ncol = 2) alpha <- c(0, 0) rnd3 <- rmst(n, xi, Omega, alpha, df = 4) rnd.1 <- rbind(rnd1, rnd2, rnd3) real.clus1 <- c(rep(1, n), rep(2, n), rep(3, n)) ylim <- c(-25, 15) ## Plotting the real clusters plot(rnd.1, xlim = c(-15, 80), ylim = ylim, xlab = xlab, ylab = ylab, col = real.clus1 + 1, pch = real.clus1 + 1) title("(a) Elliptical t components", line = 1.6) ## applying tclust without trimming clus <- tclust(rnd.1, k = 3, alpha = 0.0) plot(clus, xlim = c(-15, 80), ylim = ylim, xlab = xlab, ylab = ylab, main.pre = "(b)") ## applying tclust with trimming(5%) clus <- tclust(rnd.1, k = 3, alpha = 0.05) plot(clus, xlim = c(-15, 80), ylim = ylim, xlab = xlab, ylab = ylab, main.pre = "(c)") ## data generation: Mixture of non-elliptical t's xi <- c(0.5, -1) Omega <- diag(2) / 2 + 0.5 alpha <- c(2, 80) rnd1 <- rmst(n, xi, Omega, alpha, df = 2) xi <- c(6, -3) Omega <- matrix(c(4, 1, 1, 2), nrow = 2, ncol = 2) alpha <- c(2, 2) rnd2 <- rmst(n, xi, Omega, alpha, df = 3) xi <- c(-3, 3) Omega <- matrix(c(2, 1, 1, 4), nrow = 2, ncol = 2) alpha <- c(4, 4) rnd3 <- rmst(n, xi, Omega, alpha, df = 4) rnd.2 <- rbind(rnd1, rnd2, rnd3) real.clus2 <- c(rep(1, n), rep(2, n), rep(3, n)) ylim <- c(-10, 20) ## Plotting the real clusters plot(rnd.2, xlim = c(-10, 20), ylim = ylim, xlab = xlab, ylab = ylab, col = real.clus2 + 1, pch=real.clus2 + 1) title("(d) Non-elliptical t components", line = 1.6) ## applying tclust without trimming clus <- tclust(rnd.2, k = 3, alpha = 0.0) plot(clus, xlim = c(-10, 20), ylim = ylim, xlab = xlab, ylab = ylab, main.pre = "(e)") ## applying tclust with trimming(5%) clus <- tclust(rnd.2, k = 3, alpha = 0.05) plot(clus, xlim = c(-10, 20), ylim = ylim, xlab = xlab, ylab = ylab, main.pre = "(f)") par(op) ################ ## Figure 7 ## ################ library("mclust") ## function for plotting mclust-objects in tclust-style. mclustplottcluststyle2d <- function(x, clus, clus.perm, tol.lty = 3, size = sqrt(qchisq(0.95, 2)), ...) { if(missing(clus.perm)) cuse <- clus$classification else cuse <- clus.perm [clus$classification + 1] plot(x, col = cuse + 1, pch = cuse + 1, ...) k <- ncol(clus$parameters$mean) for(i in 1:k) tclust:::.doEllipses(cov = clus$parameters$variance$sigma[,, i], center = clus$parameters$mean[, i], size = size, lty = tol.lty) } op <- par(mfrow = c(2, 2), mar = c(3.1, 3.1, 3.1, 2.1)) ## example 1 - uniformly distributed noise ## data generation set.seed(0) x <- rbind( rmvnorm(360, c(0.0, 0), matrix(c( 1, 0, 0, 1), ncol = 2)), rmvnorm(540, c(5.0, 10), matrix(c( 6, -2, -2, 6), ncol = 2)), cbind(runif(100, -5, 15), runif(100, -10, 20))) ## applying mclust noiseInit <- sample(c(TRUE, FALSE), size = nrow(x), replace = TRUE, prob = c(0.1, 0.9)) clus <- Mclust(x, initialization = list(noise = noiseInit), G = 2) mclustplottcluststyle2d(x, clus, c(0, 2, 1), xlab = "", ylab = "", main = "(a) mclust", tol.lty = 2) ## applying tclust clus <- tclust(x, k = 2, alpha = 0.1) plot(clus, xlab = "", ylab = "", main = "(b) tclust", sub = "", tol.lty = 2) ## example 2 - structured noise ## data generation set.seed(0) v <- runif(100, -2 * pi, 2 * pi) noise <- cbind(100 + 25 * sin(v), 10 + 5 * v) x <- rbind( rmvnorm(360, c(0.0, 0), matrix(c(1, 0, 0, 1), ncol = 2)), rmvnorm(540, c(5.0, 10), matrix(c(6, -2, -2, 6), ncol = 2)), noise) ## applying mclust noiseInit <- sample(c(TRUE, FALSE), size = nrow(x), replace = TRUE, prob = c(0.1, 0.9)) clus <- Mclust(x, initialization = list(noise = noiseInit), G = 2) mclustplottcluststyle2d(x, clus, c(0, 2, 1), xlab = "", ylab = "", main = "(c) mclust", tol.lty = 2) ## applying tclust clus <- tclust(x, k = 2, alpha = 0.1) plot(clus, xlab = "", ylab = "", main = "(d) tclust", sub = "", tol.lty = 2) par(op) ################ ## Figure 8 ## ################ ## data generation set.seed(100) mixt <- rbind(rmvnorm(360, c( 0, 0), matrix(c(1, 0, 0, 1), ncol = 2)), rmvnorm(540, c( 5, 10), matrix(c(6, -2, -2, 6), ncol = 2)), rmvnorm(100, c(2.5, 5), matrix(c(50, 0, 0, 50), ncol = 2))) ## applying tclust set.seed(100) clus.1 <- tclust(mixt, k = 3, alpha = 0.0, restr.fact = 50, warnings = 2) clus.2 <- tclust(mixt, k = 2, alpha = 0.05, restr.fact = 8, warnings = 2) ## plotting results op <- par(mfrow = c(1, 2)) plot(clus.1, by.clust = TRUE, col = c(og, 2, 3, 4), pch = c(1, 2, 3, 4), tol.lty = 2, main.pre = "(a)") plot(clus.2, by.clust = TRUE, col = c(og, 2, 3), pch = c(1, 2, 3), tol.lty = 2, main.pre = "(b)") par(op) ################ ## Figure 9 ## ################ ## computing ctlcurves ctl <- ctlcurves(mixt, restr.fact = 50, alpha = seq(0, 0.2, by = 0.05)) op <- par(mfrow = c(1, 1)) plot(ctl) par(op) ################# ## Figure 10 ## ################# data("geyser2") op <- par(mfrow = c(1, 2)) set.seed(10) ## creating a one dimensional data "matrix" geyser1 <- geyser2[, 1, drop = FALSE] ## applying tkmeans plot(tkmeans(geyser1, k = 2, alpha = 0.03), jitter = TRUE, tol.lwd = 2, main.pre = "(a)") ## adding a random dimension to geyser2 geyser3 <- cbind(geyser2, rnorm(nrow(geyser2))) ## applying tkmeans plot(tkmeans(geyser3, k = 3, alpha = 0.03), main.pre = "(b)") par(op) ################# ## Figure 11 ## ################# ## data generation set.seed(100) mixt2 <- rbind( rmvnorm(360, c(0, 0), matrix(c(1, 0, 0, 1), ncol = 2)), rmvnorm(540, c(5, 10), matrix(c(6, -2, -2, 6), ncol = 2)), rmvnorm(100, c(2.5, 5), matrix(c(50, 0, 0, 50), ncol = 2)) ) ## applying tclust set.seed(100) clus.w <- tclust(mixt2, k = 3, alpha = 0.1, restr.fact = 1, equal.weights = TRUE, warnings = 1) ## applying DiscrFact discr.clus.w <- DiscrFact(clus.w) ## plotting results op <- par(mfrow = c(1, 3), mar = mmar) plot(clus.w, col = c(og, 2, 3, 4), tol.lty = 2, main.pre = "(a)") plot_DiscrFact_p2(discr.clus.w, xlim = c(-70, 0), main.pre = "(b)") plot_DiscrFact_p3(discr.clus.w, tol.lty = 2, main.pre = "(c)") par(op) ################# ## Figure 12 ## ################# data("swissbank") set.seed(0) fig6.ctl <- ctlcurves(swissbank, k = 1:4, alpha = seq(0, 0.3, by = 0.025), restr.fact = 50, iter.max = 100, nstart = 100) op <- par(mfrow = c(1, 1), mar = mmar) plot(fig6.ctl) par(op) ################# ## Figure 13 ## ################# fig7.clus <- tclust(swissbank, k = 2, alpha = .1, restr.fact = 50) fig7.discrfact <- DiscrFact(fig7.clus, threshold = .000125) op <- par(mfrow = c(1, 3), mar = mmar) plot(fig7.discrfact, enum = TRUE) par(op) ################# ## Figure 14 ## ################# clus <- tclust(swissbank, k = 2, alpha=.1, restr.fact=50) discrfact <- DiscrFact(clus) pch <- c(rep("G", 100), rep("F", 100)) condition <- discrfact$assignfact > log(.000125) cl <- clus$cluster data("swissbank") op <- par(mfrow = c(1, 3), mar = mmar) xlab <- "Distance of the inner frame to lower border" ylab <- "Length of the diagonal" plot(swissbank[, 4], swissbank[, 6], col = "darkgrey", pch = pch, main = "(a) Cluster1", xlab = xlab, ylab = ylab) cl1 <- cl == 1 points(swissbank[cl1, 4], swissbank[cl1, 6], pch = pch[cl1], col = 2) idx <-(cl1) & condition points(swissbank[idx, 4], swissbank[idx, 6], pch = 1, cex = 4, col = "blue") plot(swissbank[, 4], swissbank[, 6], col = "darkgrey", pch = pch, main = "(b) Cluster2", xlab = xlab, ylab = ylab) cl2 <- cl == 2 points(swissbank[cl2, 4], swissbank[cl2, 6], pch = pch[cl2], col = 3) idx <-(cl2) & condition points(swissbank[idx, 4], swissbank[idx, 6], pch = 1, cex = 4, col = "blue") cl0 <- cl == 0 plot(swissbank[, 4], swissbank[, 6], col = "darkgrey", pch = pch, main = "(c) Trimmed", xlab = xlab, ylab = ylab) points(swissbank[cl0, 4], swissbank[cl0, 6], pch = pch[cl0]) idx <-(cl0) & condition points(swissbank[idx, 4], swissbank[idx, 6], pch = 1, cex = 4, col = "blue") par(op) ## #################################################### ## Example code chunks as included in the paper ## #################################################### ## ############## library("tclust") data("geyser2") clus <- tkmeans(geyser2, k = 3, alpha = 0.03) plot(clus) ############## data("M5data") x <- M5data[, 1:2] res.a <- tclust(x, k = 3, alpha = 0.1, restr.fact = 1, restr = "eigen", equal.weights = TRUE) res.b <- tclust(x, k = 3, alpha = 0.1, restr.fact = 1, restr = "sigma", equal.weights = TRUE) res.c <- tclust(x, k = 3, alpha = 0.1, restr.fact = 1, restr = "deter", equal.weights = TRUE) res.d <- tclust(x, k = 3, alpha = 0.1, restr.fact = 50, restr = "eigen", equal.weights = FALSE) plot(res.a, main = "/r") plot(res.b, main = "/r") plot(res.c, main = "/r") plot(res.d, main = "/r") ################# set.seed(10) x <- rbind(rmvnorm(200, c(0, 0), diag(2)), rmvnorm(200, c(5, 0), diag(2))) clus <- tclust(x, k = 3, alpha = 0, restr.fact = 1) plot(clus) ############### sigma1 <- diag(2) ## EigenValues: 1, 1 sigma2 <- diag(2) * 8 - 2 ## EigenValues: 8, 4 sigma3 <- diag(2) * 50 ## EigenValues: 50, 50 mixt <- rbind( rmvnorm(360, mean = c(0.0, 0), sigma = sigma1), rmvnorm(540, mean = c(5.0, 10), sigma = sigma2), rmvnorm(100, mean = c(2.5, 5), sigma = sigma3)) plot(tclust(mixt, k = 3, alpha = 0.00, restr.fact = 50)) plot(tclust(mixt, k = 2, alpha = 0.05, restr.fact = 12)) ############### plot(ctlcurves(mixt, k = 1:4, alpha = seq(0, 0.2, by = 0.05))) ############### geyser1 <- geyser2[, 1, drop = FALSE] geyser3 <- cbind(geyser2, rnorm(nrow(geyser2))) plot(tkmeans(geyser1, k = 2, alpha = 0.03), jitter = TRUE) plot(tkmeans(geyser3, k = 3, alpha = 0.03)) ############### clus.w <- tclust(mixt, k = 3, alpha = 0.1, restr.fact = 1, equal.weights = TRUE) discr.clus.w <- DiscrFact(clus.w, threshold = 0.1) plot(discr.clus.w) ############### data("swissbank") plot(ctlcurves(swissbank, k = 1:4, alpha = seq(0, 0.3, by = 0.025))) ################## clus <- tclust(swissbank, k = 2, alpha = 0.1, restr.fact = 50) plot(DiscrFact(clus, threshold = 0.0001)) ################# tclust(swissbank, k = 2, alpha = 0.1, restr.fact = 40) clus$size