library("rEMM") ### Basic usage data(EMMTraffic) EMMTraffic emm <- EMM(threshold=0.2, measure="eJaccard") emm <- build(emm, EMMTraffic) size(emm) cluster_counts(emm) cluster_centers(emm) plot(emm, method="graph") transition_matrix(emm) transition_matrix(emm, type="counts") transition(emm, "1", "2", type="probability") predict(emm, n=2, current="2") predict(emm, n=2, current="2", probabilities=TRUE) ### Manipulating EMMs emm_3removed <- remove_clusters(emm, "3") plot(emm_3removed, method = "graph") emm_52removed <- remove_transitions(emm, "5", "2") plot(emm_52removed, method = "graph") emm_25merged <- merge_clusters(emm, c("2","5")) plot(emm_25merged, method = "graph") ### Using cluster structure fading and pruning emm_fading <- EMM(threshold=0.2, measure="eJaccard", lambda = 1) emm_fading <- build(emm_fading, EMMTraffic) plot(emm_fading, method = "graph") emm_pruned <- prune(emm_fading, count_threshold=0.1) plot(emm_pruned, method = "graph") ### Visualization options data("EMMsim") plot(EMMsim_train, col="gray", pch=EMMsim_sequence_train) lines(EMMsim_test, col ="gray") points(EMMsim_test, col="red", pch=5) text(EMMsim_test, labels=1:nrow(EMMsim_test), pos=3) emm <- EMM(threshold=0.1, measure="euclidean") emm <- build(emm, EMMsim_train) plot(emm, method="graph") plot(emm) plot(emm, data=EMMsim_train) ### Scoring new sequences score(emm, EMMsim_test, method="prod", match_cluster="exact", plus_one=FALSE, initial_transition = FALSE) score(emm, EMMsim_test, method="sum", match_cluster="exact", plus_one=FALSE, initial_transition = FALSE) transition_table(emm, EMMsim_test, match_cluster="exact", plus_one=FALSE) score(emm, EMMsim_test, method="prod") score(emm, EMMsim_test, method="sum") ### Reclustering states k <- 2:10 emmc <- recluster_hclust(emm, k=k, method ="average") plot(attr(emmc, "cluster_info")$dendrogram) sc <- sapply(emmc, score, EMMsim_test) names(sc) <- sq sc plot(emmc[[which.max(sc)]], method="graph") plot(emmc[[which.max(sc)]], data=EMMsim_train) ### Analyzing river flow data data(Derwent) summary(Derwent) plot(Derwent[,1], type="l", ylab="Gauged flows", main=colnames(Derwent)[1]) Derwent_scaled <- scale(Derwent) emm <- EMM(measure="euclidean", threshold=3) emm <- build(emm, Derwent_scaled) plot(emm, method = "state_counts", log="y") plot(emm) rare_threshold <- sum(cluster_counts(emm))*0.005 rare_threshold plot(prune(emm, rare_threshold)) catchment <- 1 plot(Derwent[,catchment], type="l", ylab="Gauged flows", main=colnames(Derwent)[catchment]) state_sequence <- find_clusters(emm, Derwent_scaled) mark_states <- function(states, state_sequence, ys, col=0, label=NULL, ...) { x <- which(state_sequence %in% states) points(x, ys[x], col=col, ...) if(!is.null(label)) text(x, ys[x], label, pos=4, col=col) } mark_states("11", state_sequence, Derwent[,catchment], col="blue", label="11") mark_states("12", state_sequence, Derwent[,catchment], col="red", label="12") catchment <- 6 plot(Derwent[,catchment], type="l", ylab="Gauged flows", main=colnames(Derwent)[catchment]) mark_states("11", state_sequence, Derwent[,catchment], col="blue", label="11") mark_states("12", state_sequence, Derwent[,catchment], col="red", label="12") ### Genetic sequence analysis data("16S") emm <- EMM("Kullback", threshold=0.1) emm <- build(emm, Mollicutes16S+1) plot(emm, method = "graph") it <- initial_transition(emm) it[it>0]