library("clickstream") library("arulesSequences") ## 2. Clickstreams cls <- list( Session1 = c("P1","P2","P1","P3","P4","Defer"), Session2 = c("P3","P4","P1","P3","Defer"), Session3 = c("P5","P1","P6","P7","P6","P7","P8","P7","Buy"), Session4 = c("P9","P2","P11","P12","P11","P13","P11","Buy"), Session5 = c("P4","P6","P11","P6","P1","P3","Defer"), Session6 = c("P3","P13","P12","P4","P12","P1","P4","P1","P3","Defer"), Session7 = c("P10","P5","P10","P8","P8","P5","P1","P7","Buy"), Session8 = c("P9","P2","P1","P9","P3","P1","Defer"), Session9 = c("P5","P8","P5","P7","P4","P1","P6","P4","Defer") ) class(cls) <- "Clickstreams" cls <- randomClickstreams( states = c("P1", "P2"), startProbabilities = c(0.5, 0.5), transitionMatrix = matrix(c(0.2, 0.8, 0.4, 0.6), nrow = 2), meanLength = 10, n = 100 ) cls <- readClickstreams(file = "sample.csv", sep = ",", header = TRUE) cls summary(cls) ## 3. Analyzing clickstreams mc <- fitMarkovChain(clickstreamList = cls, order = 2, control = list(optimizer = "quadratic")) options(digits = 2) mc options(digits = 7) summary(mc) plot(mc) set.seed(123) clusters <- clusterClickstreams(clickstreamList = cls, order = 1, centers = 3) clusters summary(clusters) pattern <- new("Pattern", sequence = c("P9", "P2")) resultPattern <- predict(mc, startPattern = pattern, dist = 1) resultPattern pattern <- new("Pattern", sequence = c("P9", "P2"), absorbingProbabilities = data.frame(Buy = 0.333, Defer = 0.667)) resultPattern <- predict(mc, startPattern = pattern, dist = 2) resultPattern absorbingProbabilities <- c(0.5, 0.5) sequence <- c("P9", "P2") for (s in sequence) { absorbingProbabilities <- absorbingProbabilities * data.matrix(subset( mc@absorbingProbabilities, state == s, select = c("Buy", "Defer") )) } absorbingProbabilities <- absorbingProbabilities / sum(absorbingProbabilities) absorbingProbabilities ## 4. Example with simulated data set.seed(123) cls <- randomClickstreams( states = c("P1", "P2", "P3", "P4", "P5", "P6", "P7", "Defer", "Buy"), startProbabilities = c(0.2, 0.25, 0.1, 0.15, 0.1, 0.1, 0.1, 0, 0), transitionMatrix = matrix( c(0.01, 0.09, 0.05, 0.21, 0.12, 0.17, 0.11, 0.2, 0.04, 0.1, 0, 0.29, 0.06, 0.11, 0.13, 0.21, 0.1, 0, 0.07, 0.16, 0.03, 0.25, 0.23, 0.08, 0.03, 0.12, 0.03, 0.16, 0.14, 0.07, 0, 0.05, 0.22, 0.19, 0.1, 0.07, 0.24, 0.27, 0.17, 0.13, 0, 0.03, 0.09, 0.06, 0.01, 0.11, 0.18, 0.04, 0.15, 0.26, 0, 0.1, 0.11, 0.05, 0.21, 0.07, 0.08, 0.2, 0.14, 0.18, 0.02, 0.08, 0.02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), nrow = 9), meanLength = 50, n = 100000 ) summary(cls) maxOrder <- 5 result <- data.frame() for (k in 1:maxOrder) { mc <- fitMarkovChain(clickstreamList = cls, order = k) result <- rbind(result, c(k, summary(mc)$aic, summary(mc)$bic)) } names(result) <- c("Order", "AIC", "BIC") result clusters <- clusterClickstreams(clickstreamList = cls, order = 1, centers = 5) summary(clusters$clusters[[1]]) maxOrder <- 5 result <- data.frame() for (k in 1:maxOrder) { mc <- fitMarkovChain(clickstreamList = clusters$clusters[[1]], order = k) result <- rbind(result, c(k, summary(mc)$aic, summary(mc)$bic)) } names(result) <- c("Order", "AIC", "BIC") result mc <- fitMarkovChain(clickstreamList = clusters$clusters[[1]], order = 2) summary(mc) pattern <- new("Pattern", sequence = c("P1", "P4", "P6"), absorbingProbabilities = data.frame(Buy = 0.22, Defer = 0.78)) resultPattern <- predict(mc, startPattern = pattern, dist = 2) resultPattern mc <- fitMarkovChain(clickstreamList = cls, order = 5) pattern <- new("Pattern", sequence = c("P1", "P4", "P6"), absorbingProbabilities = data.frame(Buy = 0.22, Defer = 0.78)) resultPattern <- predict(mc, startPattern = pattern, dist = 2) resultPattern ## 5. Alternative approaches frequencyDF <- frequencies(cls) frequencyDF trans <- as.transactions(cls) sequences <- as(cspade(trans, parameter = list(support = 0)), "data.frame") sequences