# load the package
library("TSclust")

##### EXAMPLE 1 SYNTHETIC SERIES DATASET #####

# load the dataset
data("synthetic.tseries")
# create the true cluster solution
true_cluster <- rep(1:6, each = 3)

# create distance matrix for the integrated periodogram dissimilarity
IP.dis <- diss(synthetic.tseries, "INT.PER")
# hierarchical cluster solution
IP.hclus <- cutree(hclust(IP.dis), k = 6)
# rate the solution based on the implemented cluster similarity
cluster.evaluation(true_cluster, IP.hclus)

library("cluster")
# partition against medoids solution
IP.pamclus <- pam(IP.dis, k = 6)$clustering
# rate the solution based on the implemented cluster similarity
cluster.evaluation(true_cluster, IP.pamclus)
# show the solution
IP.pamclus

# analyzed Autocorrelation based solution
ACF.dis <- diss(synthetic.tseries, "ACF", p = 0.05)
ACF.hclus <- cutree(hclust(ACF.dis), k = 6)
cluster.evaluation(true_cluster, ACF.hclus)

ACF.pamclus <- pam(ACF.dis, k = 6)$clustering
cluster.evaluation(true_cluster, ACF.pamclus)
ACF.hclus

# dissimilarity based on ARMA models, take the p-value

AR.MAH.PVAL.dis <- diss(synthetic.tseries, "AR.MAH")$p_value
# p-value clustering method
AR.MAH.PVAL.clus <- pvalues.clust(AR.MAH.PVAL.dis, 0.05)
cluster.evaluation(true_cluster, AR.MAH.PVAL.clus)
AR.MAH.PVAL.clus
pvalues.clust(AR.MAH.PVAL.dis, 0.6)

# nonparametric spectral approximation dissimilarity, high computational
# requirements
LLR.dis <- diss(synthetic.tseries, "SPEC.LLR", meth = "LK", n = 500)
LLR.pam <- pam(LLR.dis, k = 6)$clustering
cluster.evaluation(true_cluster, LLR.pam)
LLR.pam


##### EXAMPLE 2.1 INTEREST RATES DATASET - PREDICTION BASED DISTANCE #####

# load the dataset
data("interest.rates")

# since the dissimilarity is bootstrap based, set a seed for reproducibility
set.seed(35000)

# sample plot of the prediction densities from which the distance is calculated,
# the prediction is done at an horizon h=6 steps

diss.PRED(interest.rates[, 13], interest.rates[, 16], h = 6, B = 2000, logarithm.x = TRUE, 
  logarithm.y = TRUE, differences.x = 1, differences.y = 1, plot = T)$L1dist

# prepare the correct differences and logarithms for all the countries involved
# in the dataset
diffs <- rep(1, ncol(interest.rates))
logs <- rep(TRUE, ncol(interest.rates))

set.seed(74748)
# compute the distance at for the dataset, high computational requirements
dpred <- diss(interest.rates, "PRED", h = 6, B = 1200, logarithms = logs, differences = diffs, 
  plot = T)
# preform hierarchical clustering and plot the dendogram
hc.dpred <- hclust(dpred$dist)

plot(hc.dpred, main = "", sub = "", xlab = "", ylab = "")


##### EXAMPLE 2.2 INTEREST RATES DATASET - DISSIMILARITY COMPARISON #####

# data are properly transformed
relative.rate.change <- diff(log(interest.rates), 1)
# 5-cluster solutions will be stored in 'Five.cluster.sol'' matrix
Five.cluster.sol <- matrix(0, nrow = 18, ncol = 3)
colnames(Five.cluster.sol) <- c("ACF", "LNP", "PIC")
rownames(Five.cluster.sol) <- colnames(relative.rate.change)
# obtaining the 5-cluster solutions with diss.ACF, diss.PER and diss.AR.PIC
Five.cluster.sol[, 1] <- cutree(hclust(diss(relative.rate.change, "ACF", p = 0.05)), 
  k = 5)
Five.cluster.sol[, 2] <- cutree(hclust(diss(relative.rate.change, "PER", normalize = TRUE, 
  logarithm = TRUE)), k = 5)
Five.cluster.sol[, 3] <- cutree(hclust(diss(relative.rate.change, "AR.PIC")), k = 5)
# show the solution
Five.cluster.sol


##### PAIRS DATASET #####

# load time series dataset
data("paired.tseries")

# ground truth for the first branch experiment, when two elements are joined
# together they appear in the merge field with negative sign, see ?hclust
true_pairs <- data.frame(-matrix(1:36, nrow = 2, byrow = F))

# euclidean
deucl <- diss(paired.tseries, "EUCL")
hceucl <- hclust(deucl, "complete")
# count within the hierarchical cluster the pairs
Qeucl <- sum(true_pairs %in% data.frame(t(hceucl$merge)))/18
Qeucl

# CID distance
dcid <- diss(paired.tseries, "CID")
hccid <- hclust(dcid, "complete")
Qcid <- sum(true_pairs %in% data.frame(t(hccid$merge)))/18
Qcid

# PDC with automatic selection of the embedding dimension
dpdc <- diss(paired.tseries, "PDC")
hcpdc <- hclust(dpdc, "complete")
Qpdc <- sum(true_pairs %in% data.frame(t(hcpdc$merge)))/18
Qpdc

# apply 'as-is' CDM
dcdm <- diss(paired.tseries, "CDM", "gz")
hccdm <- hclust(dcdm, "complete")
Qcdm <- sum(true_pairs %in% data.frame(t(hccdm$merge)))/18
Qcdm

# SAX illustration
testseries <- as.ts(cbind(paired.tseries[, 16], paired.tseries[, 28]))
colnames(testseries) <- c("Series 16", "Series 28")
SAX.plot(testseries, w = 20, alpha = 5)


# for numerical reasons, a symbolic representation could provide better results
# we can use SAX, since it is used in the reference paper and implemented in the
# package

# normalization, PAA reduction and SAX symbolization
tsersax <- apply(paired.tseries, 2, function(x) {
  x <- (x - mean(x))/sd(x)
  x <- PAA(x, w = 343)
  convert.to.SAX.symbol(x, alpha = 27)
})
tsersax <- as.ts(tsersax)

dcdmsymb <- diss(tsersax, "CDM", "gz")
hccdmsymb <- hclust(dcdmsymb, "ward")
Qcdmsymb <- sum(true_pairs %in% data.frame(t(hccdmsymb$merge)))/18
Qcdmsymb

# force embedding dimension of 5 and time delay 8, for better results
dpdc_m5t8 <- diss(paired.tseries, "PDC", m = 5, t = 8)
hcpdc_m5t8 <- hclust(dpdc_m5t8, "complete")
Qpdc_m5t8 <- sum(true_pairs %in% data.frame(t(hcpdc_m5t8$merge)))/18
Qpdc_m5t8

##### EXTRA EXAMPLE : ELECTRICITY DATASET #####

# load the electricity dataset
data("electricity")

# single pair example
diss.CORT(electricity[, 1], electricity[, 21], k = 3, deltamethod = "DTW")


# CORT dissimilarity
distcort1 <- diss(electricity, "CORT", k = 3, deltamethod = "DTW")

# perform hierarchical cluster and plot the results
hc1 <- hclust(distcort1, "average")
plot(hc1)

# select a three cluster solution from the dendogram and show the results
clustcort1 <- cutree(hc1, k = 3)
clustcort1


# alternative cluster using CORT but with no weight for the dynamic behavior (raw DTW)
distcort2 <- diss(electricity, "CORT", k = 0, deltamethod = "DTW")
hc2 <- hclust(distcort2, "average")
# select three cluster and compare with the previous solution that uses dynamics
# behavior
clustcort2 <- cutree(hc2, k = 3)
clustcort2

# compare their silhouette coefficients
library("cluster")
# compute the Silhouette coefficients with diss.CORT and k=3
silh1 <- silhouette(clustcort1, distcort1)
# compute the Silhouette coefficients with diss.CORT and k=0
silh2 <- silhouette(clustcort2, distcort2)
# Silhouette plots associated with silh1 and silh2
plot(silh1, main = "k=3")
plot(silh2, main = "k=0")