library("ClickClust") ### Example from Section 3.1 data("synth", package = "ClickClust") synth$data repl.levs <- function(x, ch.lev) { for (j in 1:length(ch.lev)) x <- gsub(ch.levs[j], j, x) return(x) } d <- paste(synth$data, collapse = " ") d <- strsplit(d, " ")[[1]] ch.levs <- levels(as.factor(d)) S <- strsplit(synth$data, " ") S <- sapply(S, repl.levs, ch.levs) S <- sapply(S, as.numeric); S C <- click.read(S) C ### Example 1 from Section 3.2 set.seed(123) N2 <- click.EM(X = C$X, K = 2) N2 # Figure 2 library("WeightedCluster") s <- strsplit(synth$data, " ") SS <- matrix(NA, 250, 50) for (i in 1:250){ SS[i,1:length(s[[i]])] <- s[[i]] } aa <- seqdef(SS) seqdplot(aa, group = N2$id) ### Example 2 from Section 3.2 set.seed(123) M2 <- click.EM(X = C$X, y = C$y, K = 2) M2 ### Example 1 from Section 3.3 click.plot(X = C$X, y = C$y, id = M2$id, states = ch.levs, obs.lwd = 0.3) ### Example 2 from Section 3.3 click.plot(X = C$X, id = N2$id, states = ch.levs) ### Example from Section 3.4 V <- click.var(X = C$X, alpha = N2$alpha, gamma = N2$gamma, z = N2$z) st.err <- sqrt(diag(V)) Estimates <- c(N2$alpha[-2], as.vector(apply(N2$gamma[, -5, ], 3, t))) Lower <- Estimates - qnorm(0.975) * st.err Upper <- Estimates + qnorm(0.975) * st.err cbind(Estimates, Lower, Upper) ### Example 1 from Section 3.5 set.seed(1234) F2 <- click.forward(X = C$X, K = 2) F2 ### Example 2 from Section 3.5 set.seed(1234) B2 <- click.backward(X = C$X, K = 2) B2 ### Example from Section 3.6 T <- click.predict(M = 3, gamma = N2$gamma, pr = N2$z[1, ]) colnames(T) <- ch.levs T[S[[1]][length(S[[1]])], ] ### Example from Section 3.7 set.seed(123) n.seq <- 250 p <- 5 K <- 2 mix.prop <- c(0.3, 0.7) TP <- array(rep(NA, p * p * K), c(p, p, K)) TP[, , 1] <- matrix(c(0.20, 0.10, 0.15, 0.15, 0.40, 0.10, 0.10, 0.20, 0.20, 0.40, 0.15, 0.10, 0.20, 0.20, 0.35, 0.15, 0.10, 0.20, 0.20, 0.35, 0.30, 0.30, 0.10, 0.10, 0.20), byrow = TRUE, ncol = p) TP[, , 2] <- matrix(c(0.15, 0.35, 0.20, 0.20, 0.10, 0.40, 0.10, 0.20, 0.20, 0.10, 0.25, 0.20, 0.15, 0.15, 0.25, 0.25, 0.20, 0.15, 0.15, 0.25, 0.10, 0.20, 0.20, 0.20, 0.30), byrow = TRUE, ncol = p) A <- click.sim(n = n.seq, int = c(10, 50), alpha = mix.prop, gamma = TP) A C <- click.read(A$S) C ### Experiment from Section 4 set.seed(1234) n.seq <- 100 p <- 15 K <- 3 mix.prop <- c(0.25, 0.35, 0.40) TP1 <- matrix(c(0.30, 0.01, 0.03, 0.03, 0.10, 0.15, 0.10, 0.05, 0.05, 0.05, 0.02, 0.02, 0.05, 0.02, 0.02, 0.01, 0.30, 0.05, 0.05, 0.05, 0.10, 0.10, 0.07, 0.07, 0.07, 0.03, 0.03, 0.01, 0.03, 0.03, 0.05, 0.05, 0.20, 0.20, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.02, 0.02, 0.12, 0.02, 0.02, 0.05, 0.05, 0.20, 0.20, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.02, 0.02, 0.12, 0.02, 0.02, 0.02, 0.05, 0.10, 0.10, 0.40, 0.05, 0.05, 0.02, 0.02, 0.02, 0.04, 0.04, 0.01, 0.04, 0.04, 0.02, 0.02, 0.01, 0.01, 0.02, 0.35, 0.01, 0.05, 0.05, 0.05, 0.10, 0.10, 0.01, 0.10, 0.10, 0.03, 0.05, 0.10, 0.10, 0.05, 0.05, 0.15, 0.02, 0.02, 0.02, 0.10, 0.10, 0.01, 0.10, 0.10, 0.05, 0.10, 0.05, 0.05, 0.10, 0.02, 0.05, 0.15, 0.15, 0.15, 0.03, 0.03, 0.01, 0.03, 0.03, 0.05, 0.10, 0.05, 0.05, 0.10, 0.02, 0.05, 0.15, 0.15, 0.15, 0.03, 0.03, 0.01, 0.03, 0.03, 0.05, 0.10, 0.05, 0.05, 0.10, 0.02, 0.05, 0.15, 0.15, 0.15, 0.03, 0.03, 0.01, 0.03, 0.03, 0.10, 0.05, 0.05, 0.05, 0.10, 0.03, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.02, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.10, 0.03, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.02, 0.10, 0.10, 0.03, 0.01, 0.15, 0.15, 0.01, 0.10, 0.05, 0.10, 0.10, 0.10, 0.03, 0.03, 0.08, 0.03, 0.03, 0.10, 0.05, 0.05, 0.05, 0.10, 0.03, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.02, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.10, 0.03, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.02, 0.10, 0.10), byrow = TRUE, ncol = p) TP2 <- matrix(c(0.01, 0.01, 0.03, 0.03, 0.10, 0.05, 0.10, 0.05, 0.05, 0.05, 0.12, 0.12, 0.04, 0.12, 0.12, 0.01, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.16, 0.16, 0.16, 0.02, 0.02, 0.03, 0.02, 0.02, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.15, 0.15, 0.15, 0.03, 0.03, 0.08, 0.03, 0.03, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.15, 0.15, 0.15, 0.03, 0.03, 0.08, 0.03, 0.03, 0.10, 0.05, 0.10, 0.10, 0.10, 0.05, 0.05, 0.04, 0.04, 0.04, 0.05, 0.05, 0.13, 0.05, 0.05, 0.02, 0.02, 0.01, 0.01, 0.02, 0.05, 0.02, 0.05, 0.05, 0.05, 0.10, 0.10, 0.30, 0.10, 0.10, 0.13, 0.02, 0.10, 0.10, 0.05, 0.05, 0.10, 0.03, 0.03, 0.03, 0.05, 0.05, 0.16, 0.05, 0.05, 0.11, 0.01, 0.05, 0.05, 0.03, 0.02, 0.03, 0.05, 0.05, 0.05, 0.10, 0.10, 0.15, 0.10, 0.10, 0.11, 0.01, 0.05, 0.05, 0.03, 0.02, 0.03, 0.05, 0.05, 0.05, 0.10, 0.10, 0.15, 0.10, 0.10, 0.11, 0.01, 0.05, 0.05, 0.03, 0.02, 0.03, 0.05, 0.05, 0.05, 0.10, 0.10, 0.15, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05, 0.06, 0.01, 0.15, 0.15, 0.01, 0.10, 0.05, 0.10, 0.10, 0.10, 0.03, 0.03, 0.05, 0.03, 0.03, 0.10, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.10, 0.15, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.05, 0.05), byrow = TRUE, ncol = p) TP3 <- matrix(c(0.10, 0.10, 0.05, 0.05, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.05, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.04, 0.05, 0.05, 0.04, 0.04, 0.04, 0.01, 0.01, 0.50, 0.01, 0.01, 0.02, 0.04, 0.05, 0.05, 0.12, 0.05, 0.02, 0.05, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.10, 0.15, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.01, 0.07, 0.07, 0.02, 0.03, 0.10, 0.05, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.10, 0.05, 0.01, 0.07, 0.07, 0.02, 0.03, 0.10, 0.05, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.10, 0.05, 0.01, 0.07, 0.07, 0.02, 0.03, 0.10, 0.05, 0.05, 0.05, 0.10, 0.10, 0.10, 0.10, 0.10, 0.04, 0.15, 0.03, 0.03, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.07, 0.07, 0.12, 0.07, 0.07, 0.04, 0.15, 0.03, 0.03, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.07, 0.07, 0.12, 0.07, 0.07, 0.02, 0.03, 0.05, 0.05, 0.60, 0.01, 0.04, 0.03, 0.03, 0.03, 0.02, 0.02, 0.03, 0.02, 0.02, 0.04, 0.15, 0.03, 0.03, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.07, 0.07, 0.12, 0.07, 0.07, 0.04, 0.15, 0.03, 0.03, 0.05, 0.10, 0.05, 0.05, 0.05, 0.05, 0.07, 0.07, 0.12, 0.07, 0.07), byrow = TRUE, ncol = p) TP <- array(rep(NA, p * p * K), c(p, p, K)) TP[, , 1] <- TP1 TP[, , 2] <- TP2 TP[, , 3] <- TP3 A <- click.sim(n = n.seq, int = c(10, 100), alpha = mix.prop, gamma = TP) C <- click.read(A$S) M1 <- click.EM(X = C$X, y = C$y, K = 1) M2 <- click.EM(X = C$X, y = C$y, K = 2) M3 <- click.EM(X = C$X, y = C$y, K = 3) M4 <- click.EM(X = C$X, y = C$y, K = 4) M5 <- click.EM(X = C$X, y = C$y, K = 5) N1 <- click.EM(X = C$X, K = 1) N2 <- click.EM(X = C$X, K = 2) N3 <- click.EM(X = C$X, K = 3) N4 <- click.EM(X = C$X, K = 4) N5 <- click.EM(X = C$X, K = 5) F1 <- click.forward(C$X, K = 1) F2 <- click.forward(C$X, K = 2) F3 <- click.forward(C$X, K = 3) F4 <- click.forward(C$X, K = 4) F5 <- click.forward(C$X, K = 5) B1 <- click.backward(C$X, K = 1) B2 <- click.backward(C$X, K = 2) B3 <- click.backward(C$X, K = 3) B4 <- click.backward(C$X, K = 4) B5 <- click.backward(C$X, K = 5) # Table 11 library("MixSim") rbind(c(M1$BIC, M2$BIC, M3$BIC, M4$BIC, M5$BIC), c(N1$BIC, N2$BIC, N3$BIC, N4$BIC, N5$BIC), c(F1$BIC, F2$BIC, F3$BIC, F4$BIC, F5$BIC), c(B1$BIC, B2$BIC, B3$BIC, B4$BIC, B5$BIC)) rbind(c(RandIndex(A$id, M1$id)$AR, RandIndex(A$id, M2$id)$AR, RandIndex(A$id, M3$id)$AR, RandIndex(A$id, M4$id)$AR, RandIndex(A$id, M5$id)$AR), c(RandIndex(A$id, N1$id)$AR, RandIndex(A$id, N2$id)$AR, RandIndex(A$id, N3$id)$AR, RandIndex(A$id, N4$id)$AR, RandIndex(A$id, N5$id)$AR), c(RandIndex(A$id, F1$id)$AR, RandIndex(A$id, F2$id)$AR, RandIndex(A$id, F3$id)$AR, RandIndex(A$id, F4$id)$AR, RandIndex(A$id, F5$id)$AR), c(RandIndex(A$id, B1$id)$AR, RandIndex(A$id, B2$id)$AR, RandIndex(A$id, B3$id)$AR, RandIndex(A$id, B4$id)$AR, RandIndex(A$id, B5$id)$AR)) # Table 12 library("clickstream") library("TraMineR") set.seed(1234) A <- click.sim(n = n.seq, int = c(10, 100), alpha = mix.prop, gamma = TP) C <- click.read(A$S) B <- lapply(A$S, FUN = as.character) AR <- matrix(0, 6, 5) for (k in 2:5){ id <- rep(NA, length(A$S)) D <- clusterClickstreams(B, order = 0, center = k) for (g in 1:(k-1)){ for (j in 1:length(D$clusters[[g]])){ a <- as.numeric(D$clusters[[g]][[j]]) for (i in 1:length(A$S)){ b <- as.numeric(A$S[[i]]) if (length(a) == length(b)){ if ((sum(a - b) == 0) & (is.na(id[i]))){ id[i] <- g } } } } } id[is.na(id)] <- k AR[1,k] <- RandIndex(A$id, id)$AR } SS <- matrix(NA, 100, 99) for (i in 1:n.seq){ SS[i,1:length(A$S[[i]])] <- A$S[[i]] } aa <- seqdef(SS) diss <- seqdist(aa, method = "LCS") allClust <- wcCmpCluster(diss, maxcluster=5, method=c("all")) for (k in 1:4){ id2 <- as.numeric(allClust$pam$clustering[,k]) id3 <- as.numeric(allClust$ward$clustering[,k]) id4 <- as.numeric(allClust$single$clustering[,k]) id5 <- as.numeric(allClust$complete$clustering[,k]) id6 <- as.numeric(allClust$average$clustering[,k]) AR[2,k+1] <- RandIndex(id2, A$id)$AR AR[3,k+1] <- RandIndex(id3, A$id)$AR AR[4,k+1] <- RandIndex(id4, A$id)$AR AR[5,k+1] <- RandIndex(id5, A$id)$AR AR[6,k+1] <- RandIndex(id6, A$id)$AR } AR dev.new(width = 11, height = 11) click.plot(X = C$X, id = B3$id, colors = c("lightyellow", "red", "darkred"), col.levels = 10) # Or loading the dataset and producing the code with: data("utilityB3", package = "ClickClust") dev.new(width = 11, height = 11) click.plot(X = C$X, id = B3$id, colors = c("lightyellow", "red", "darkred"), col.levels = 10) ### Experiment from Section 5 set.seed(1234) data("msnbc323", package = "ClickClust") n <- length(msnbc323) C <- click.read(msnbc323) M1 <- click.EM(X = C$X, y = C$y, K = 1, iter = 10, r = 250, scale.const = 2) M2 <- click.EM(X = C$X, y = C$y, K = 2, iter = 10, r = 250, scale.const = 2) M3 <- click.EM(X = C$X, y = C$y, K = 3, iter = 10, r = 250, scale.const = 2) M4 <- click.EM(X = C$X, y = C$y, K = 4, iter = 10, r = 250, scale.const = 2) M5 <- click.EM(X = C$X, y = C$y, K = 5, iter = 10, r = 250, scale.const = 2) N1 <- click.EM(X = C$X, K = 1, iter = 10, r = 250, scale.const = 2) N2 <- click.EM(X = C$X, K = 2, iter = 10, r = 250, scale.const = 2) N3 <- click.EM(X = C$X, K = 3, iter = 10, r = 250, scale.const = 2) N4 <- click.EM(X = C$X, K = 4, iter = 10, r = 250, scale.const = 2) N5 <- click.EM(X = C$X, K = 5, iter = 10, r = 250, scale.const = 2) # Table 13 rbind(c(M1$BIC, M2$BIC, M3$BIC, M4$BIC, M5$BIC), c(N1$BIC, N2$BIC, N3$BIC, N4$BIC, N5$BIC)) state.names <- c("frontpage", "news", "tech", "local", "opinion", "on-air", "misc", "weather", "msn-news", "health", "living", "business", "msn-sports", "sports", "summary", "bbs", "travel") dev.new(width = 11, height = 11) click.plot(X = C$X, y = C$y, id = M3$id, col.levels = 10, top.srt = 90, colors = c("lightyellow", "red", "darkred"), font.cex = 1.5, marg = 4, states = state.names)