# Code used for the analyses reported in
#   "intRinsic: An R Package for Model-Based Estimation of the
#    Intrinsic Dimension of a Dataset"
# by Francesco Denti

# Loading required packages and basic set up ----------------------------------
library("dplyr")
library("patchwork")
library("ggplot2")
theme_set(theme_bw())

library("knitr")
library("ggdendro")
library("RColorBrewer")
library("pheatmap")
library("reshape2")
library("randomForest")
library("mcclust")
library("latex2exp")
library("salso")
library("sads")
library("kableExtra")

## install.packages("intRinsic")
library("intRinsic")
## for the most up-to-date version, please download the package from
## devtools::install_github("Fradenti/intRinsic")

# Auxiliary functions -----------------------------------------------------
## this replaces the deprecated gg3d package functions, which produced warnings
gg_scatter3d <- function(Data, theta, phi, col, alpha) {
  pmat <- plot3D::perspbox(z = diag(2), plot = FALSE, theta = theta, phi = phi)
  x_axis <- plot3D::trans3D(x = 0:1, y = 0, z = 0, pmat = pmat) |>
    data.frame() |>
    mutate(axis = "x")
  y_axis <- plot3D::trans3D(x = 0, y = 0:1, z = 0, pmat = pmat) |>
    data.frame() |>
    mutate(axis = "y")
  z_axis <- plot3D::trans3D(x = 0, y = 0, z = 0:1, pmat = pmat) |>
    data.frame() |>
    mutate(axis = "z")

  Axes <- bind_rows(x_axis, y_axis, z_axis)

  data <- Data |> mutate(
    x = scales::rescale(x, from = range(Data$x), to = c(0, 1)),
    y = scales::rescale(y, from = range(Data$y), to = c(0, 1)),
    z = scales::rescale(z, from = range(Data$z), to = c(0, 1)))

  XY <- plot3D::trans3D(
    x = data$x,
    y = data$y,
    z = data$z,
    pmat = pmat) |>
    data.frame()

  Q <- ggplot() +
    theme_void() +
    geom_segment(aes(x = Axes[1, 1], xend = Axes[2, 1], y = Axes[1, 2], yend = Axes[2, 2])) +
    geom_segment(aes(x = Axes[3, 1], xend = Axes[4, 1], y = Axes[3, 2], yend = Axes[4, 2])) +
    geom_segment(aes(x = Axes[5, 1], xend = Axes[6, 1], y = Axes[5, 2], yend = Axes[6, 2])) +
    geom_text(aes(x = Axes[c(2, 4, 6), 1], y = Axes[c(2, 4, 6), 2]),
              label = c("x-axis", "y-axis", "z-axis"),
              hjust = c(1, 1.1, 1),
              vjust = c(1, 1, -0.2),
              angle = c(-5, 10, 90)) +
    ylab("3D scatterplot") + xlab("3D scatterplot") +
    geom_point(data = XY, aes(x = x, y = y), col = col, alpha = alpha)+
    theme(text = element_text(size = 15))

  return(Q)
}
## Custom function to plot distance path
dist_path <- function(X, class = NULL, alpha = .2) {
  f <- as.matrix(stats::dist(X))
  f <- apply(f, 2, cummean)

  D <- reshape2::melt(f)

  if (is.null(class)) {
    Q <-  ggplot(data = D) +
      geom_path(aes(x = Var1, y = value, group = Var2),
                alpha = alpha,
                col = 4)
  } else {
    D <- data.frame(D, class = rep(class, rep(nrow(X), nrow(X))))
    pal <- RColorBrewer::brewer.pal(9, "Set1")
    Q <- ggplot(D) +
      geom_path(aes(
        x = Var1,
        y = value,
        group = Var2,
        col = class),
      alpha = alpha) +
      scale_color_manual(values = pal[1:nlevels(factor(class))])
  }

  Q + theme(text = element_text(size = 15), legend.position = "bottom") +
    xlab("NN order") + ylab("Cumulative mean of distances")
}

# Section 2---------------------------------------------------------------------

## Figure 2 Overlapping Paretos ------------------------------------------------
nd <- 4
mycolors <- colorRampPalette(brewer.pal(9, "Blues")[4:9])(nd)

x  <- seq(1.001, 4, by = .02)
y1 <- cbind(.5, x, sads::dpareto(x, .5, 1))
y2 <- cbind(1, x, sads::dpareto(x, 1, 1))
y3 <- cbind(1.5, x, sads::dpareto(x, 1.5, 1))
y4 <- cbind(2.5, x, sads::dpareto(x, 2, 1))
YY <- data.frame(rbind(y1, y2, y3, y4))

ggplot2::ggplot() +
  ggplot2::geom_line(
    data = YY,
    ggplot2::aes(
      x = .data$x,
      y = .data$V3,
      col = factor(.data$V1),
      group = .data$V1
    ),
    linewidth = 1
  ) +
  ggplot2::scale_color_manual("d", values = mycolors) +
  ggplot2::ggtitle("Overlapping Pareto(1, d) Densities") +
  ggplot2::xlab(bquote(mu)) +
  ggplot2::ylab("Density") +
  ggplot2::theme_bw() +
  theme(
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15),
    title = element_text(size = 15),
    legend.text = element_text(size = 16),
    legend.position = "bottom"
  )

# Section 3.1 -----------------------------------------------------------------

## Creation of Simulated Datasets ----------------------------------------------
set.seed(123456)

## Swissroll dataset -----------------------------------------------------------
Swissroll <- Swissroll(n = 1000)

## HyperCube dataset (five dimensional uniform distribution) -------------------
HyperCube <- replicate(5, runif(500))
HyperCube <- cbind(HyperCube, 0, 0, 0)

##  Mixture of three Gaussians--------------------------------------------------
x0 <- rnorm(500, mean = -5, sd = 1)
x1 <- cbind(x0, 3 * x0, 0, 0, 0)
x2 <- cbind(replicate(3, rnorm(500)), 0, 0)
x3 <- replicate(5, rnorm(500, 5))
GaussMix <- rbind(x1, x2, x3)
class_GMix <- rep(c("A", "B", "C"), rep(500, 3))

## Scatterplot Figure 3 --------------------------------------------------------
sw_plo_1 <- ggplot(Swissroll) +
  geom_point(aes(x = x, y = y), alpha = .5, col = 4) +
  theme(text = element_text(size = 15))
sw_plo_2 <- ggplot(Swissroll) +
  geom_point(aes(x = z, y = y), alpha = .5, col = 4) +
  theme(text = element_text(size = 15))
sw_plo_3 <- ggplot(Swissroll) +
  geom_point(aes(x = x, y = z), alpha = .5, col = 4) +
  theme(text = element_text(size = 15))

g2 <- gg_scatter3d(Data = Swissroll,
                   theta = 25,
                   phi = 0,
                   col = 4,
                   alpha = .5)
G <- (sw_plo_1 + sw_plo_2) / (sw_plo_3 + g2)
G

# Section 3.2 -----------------------------------------------------------------

## Computing ratios on simulated datasets --------------------------------------
mus_Swissroll <- compute_mus(X = Swissroll)
mus_HyperCube <- compute_mus(X = HyperCube)
mus_GaussMix <- compute_mus(X = GaussMix)

mus_Swissroll_1 <- compute_mus(X = Swissroll, n1 = 5, n2 = 10)
mus_HyperCube_1 <- compute_mus(X = HyperCube, n1 = 5, n2 = 10)
mus_GaussMix_1 <- compute_mus(X = GaussMix, n1 = 5, n2 = 10)

mus_Swissroll_2 <- compute_mus(X = Swissroll, n1 = 10, n2 = 20)
mus_HyperCube_2 <- compute_mus(X = HyperCube, n1 = 10, n2 = 20)
mus_GaussMix_2 <- compute_mus(X = GaussMix, n1 = 10, n2 = 20)

mus_GaussMix_2
# extra:
plot(mus_Swissroll_2, range_d = c(1, 2, 3, 4))

## Figure 4 --------------------------------------------------------------------
P2 <- rbind(
  cbind(rbind(
    data.frame(mus = mus_Swissroll,  order = "n1 = 1, n2 = 2"),
    data.frame(mus = mus_Swissroll_1, order = "n1 = 5, n2 = 10"),
    data.frame(mus = mus_Swissroll_2, order = "n1 = 10, n2 = 20")
  ),
  name = "Swissroll"),
  cbind(rbind(
    data.frame(mus = mus_HyperCube,  order = "n1 = 1, n2 = 2"),
    data.frame(mus = mus_HyperCube_1, order = "n1 = 5, n2 = 10"),
    data.frame(mus = mus_HyperCube_2, order = "n1 = 10, n2 = 20")
  ),
  name = "HyperCube"),
  cbind(rbind(
    data.frame(mus = mus_GaussMix,   order = "n1 = 1, n2 = 2"),
    data.frame(mus = mus_GaussMix_1, order = "n1 = 5, n2 = 10"),
    data.frame(mus = mus_GaussMix_2, order = "n1 = 10, n2 = 20")
  ),
  name = "GaussMix")
)

P2$name <- factor(P2$name,
                  levels = c("Swissroll", "HyperCube", "GaussMix"))
P2$order <-  factor(P2$order,
                    levels = c("n1 = 1, n2 = 2",
                               "n1 = 5, n2 = 10",
                               "n1 = 10, n2 = 20"))

ggplot(P2 |> filter(mus < 15)) +
  geom_histogram(
    aes(x = mus, y = after_stat(density)),
    col = "darkblue",
    fill = "lightgray",
    bins = 90) +
  ylab("Density") +
  xlab(bquote(mu)) +
  facet_wrap( ~ name + order, nrow = 3, scales = "free_y") +
  theme(
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15),
    title = element_text(size = 15),
    strip.text.x = element_text(size = 15)) +
  coord_cartesian(xlim = c(1, 4)) +
  geom_vline(xintercept = 5,
             col = "darkblue",
             lty = 3)

## Table 2 ---------------------------------------------------------------------
summaGM <- data.frame(rbind(
  summary(mus_GaussMix),
  summary(mus_GaussMix_1),
  summary(mus_GaussMix_2)
))
summaGM <- round(summaGM, 4)
summaGM <-
  cbind(c("n1 = 1, n2 = 2", "n1 = 5, n2 = 10", "n1 = 10, n2 = 20"),
        summaGM)
colnames(summaGM) =
  c("NN orders",
    "Minimum",
    "1st quartile",
    "Median",
    "Mean",
    "3rd quartile",
    "Maximum")

kbl(
  x = summaGM,
  booktabs = TRUE,
  align = c("lcccccc"),
  caption = "Summary statistics for the \\texttt{GaussMix} dataset. Each row corresponds to a different combination of NN orders.",
  label = "summary_gmx"
)
# 1  & 2   & 1.0002 & 1.1032 & 1.3048 & 18.4239 & 1.9228 & 5874.3666\\
# 5  & 10  & 1.0234 & 1.1627 & 1.2858 & 1.5344  & 1.7043 & 6.9533   \\
# 10 & 20  & 1.0472 & 1.1685 & 1.2613 & 1.4987  & 1.7250 & 4.0069   \\

## ---Example of failure with duplicates p12------------------------------------
# !!!!! this code will produce a warning!
Dummy_Data_with_replicates <- rbind(c(1, 2, 3), c(1, 2, 3),
                                    c(1, 4, 3), c(1, 4, 3),
                                    c(1, 4, 5))
mus_shorter <- compute_mus(X = Dummy_Data_with_replicates)

# Section 3.3 - TWO-NN ---------------------------------------------------------

## linear fit ----------------------------------------------------------------------
lin_1 <- twonn(X = Swissroll,
               method = "linfit",
               c_trimmed = 0)
lin_2 <- twonn(X = Swissroll,
               method = "linfit",
               c_trimmed = 0.001)
lin_3 <- twonn(X = Swissroll,
               method = "linfit",
               c_trimmed = 0.01)
lin_4 <- twonn(X = Swissroll,
               method = "linfit",
               c_trimmed = 0.05)
lin_5 <- twonn(X = Swissroll,
               method = "linfit",
               c_trimmed = 0.1)
lin_2
summary(lin_2)
### Table 3 --------------------------------------------------------------------
tab3 <- t(round(rbind(
  lin_1$est,
  lin_2$est,
  lin_3$est,
  lin_4$est,
  lin_5$est
), 4))
tab3 <- data.frame(x = c("Lower bound", "Estimate", "Upper bound"), tab3)
colnames(tab3) =
  c("", "0%",
    "0.1%",
    "1%",
    "5%",
    "10%")
kbl(x = tab3,
    booktabs = TRUE,
    align = c("lcccccc"))

## extra - example method
plot(lin_2)

### Figure 5 -------------------------------------------------------------------
l1 <- autoplot(lin_1, title = "No trimming") +
  theme(text = element_text(size = 15))
l2 <- autoplot(lin_5, title = "10% trimming") +
  theme(text = element_text(size = 15))
l1 + l2

## MLE -------------------------------------------------------------------------
dist_Eucl_D2 <- dist(HyperCube)
dist_Manh_D2 <- dist(HyperCube, method = "manhattan")
dist_Canb_D2 <- dist(HyperCube, method = "canberra")

mle_11 <- twonn(dist_mat = dist_Eucl_D2)
mle_12 <- twonn(dist_mat = dist_Eucl_D2, alpha = .99)
mle_21 <- twonn(dist_mat = dist_Manh_D2)
mle_22 <- twonn(dist_mat = dist_Manh_D2, alpha = .99)
mle_31 <- twonn(dist_mat = dist_Canb_D2)
mle_32 <- twonn(dist_mat = dist_Canb_D2, alpha = .99)

summary(mle_12)

### Table 4 --------------------------------------------------------------------
Dml <- t(round(
  rbind(
    mle_11$est,
    mle_12$est,
    mle_21$est,
    mle_22$est,
    mle_31$est,
    mle_32$est),
  4))
Dml <- data.frame(x = c("Lower bound", "Estimate", "Upper bound"), Dml)

# version for spin file
colnames(Dml) <-
  c("",
    "Eucl. - alpha = 0.95",
    "Eucl. - alpha = 0.99",
    "Manh. - alpha = 0.95",
    "Manh. - alpha = 0.99",
    "Canb. - alpha = 0.95",
    "Canb. - alpha = 0.99")

kbl(x = Dml,
    booktabs = TRUE,
    align = c("lcccccc"))

## Bayesian Fit ----------------------------------------------------------------
bay_1 <- twonn(X = Swissroll, method = "bayes")
bay_2 <- twonn(X = Swissroll, method = "bayes", alpha = 0.99)
bay_3 <- twonn(
  X = Swissroll,
  method = "bayes",
  a_d = 1,
  b_d = 1)
bay_4 <- twonn(
  X = Swissroll,
  method = "bayes",
  a_d = 1,
  b_d = 1,
  alpha = 0.99)
bay_5 <- twonn(
  X = Swissroll,
  method = "bayes",
  a_d = 10,
  b_d = 10,
  alpha = 0.99)

summary(bay_5)

# extra - example method
plot(bay_5)

### Figure 6 -------------------------------------------------------------------
pbay1 <- autoplot(
  bay_4,
  plot_upp = 3,
  by = 0.01,
  title = TeX("Prior: $d \\sim $ Gamma(1, 1)"))
pbay2 <- autoplot(
  bay_5,
  plot_upp = 3,
  by = 0.01,
  title = TeX("Prior: $d \\sim $ Gamma(10, 10)"))

pbay1 + pbay2

### Table 5 -------------------------------------------------------------------
Dbay <- round(cbind(bay_1$est,
                    bay_2$est,
                    bay_3$est,
                    bay_4$est), 4)
Dbay <-
  data.frame(x = c("Lower bound", "Mean", "Median", "Mode", "Upper bound"),
             Dbay)

colnames(Dbay) <-
  c("alpha",
    "0.95",
    "0.99",
    "0.95",
    "0.99")

kbl(x = Dbay,
    booktabs = TRUE,
    align = c("lcccc"))

# Section 3.4 - Hidalgo  -------------------------------------------------------

## Gaussmix dataset ------------------------------------------------------------

### Figure 10 ------------------------------------------------------------------
mus_gm <- compute_mus(GaussMix)
summary(twonn(mus = mus_gm, method = "linfit"))
summary(twonn(mus = mus_gm, method = "mle"))

mus_gmA <- compute_mus(GaussMix[class_GMix == "A", ])
mus_gmB <- compute_mus(GaussMix[class_GMix == "B", ])
mus_gmC <- compute_mus(GaussMix[class_GMix == "C", ])

n       <- base::length(mus_gm)
F_mui   <- (0:(n - 1)) / n
y       <- -base::log(1 - (F_mui))
x       <- base::sort(base::log(mus_gm))
modlin1 <- stats::lm(y ~ x - 1)
Res     <- coef(modlin1)

g11 <-
  autoplot(intRinsic:::twonn_linfit(mus = mus_gmA), title = "Component A")
g12 <-
  autoplot(intRinsic:::twonn_linfit(mus = mus_gmB), title = "Component B")
g13 <-
  autoplot(intRinsic:::twonn_linfit(mus = mus_gmC), title = "Component C")

pal <- RColorBrewer::brewer.pal(9, "Set1")

p1 <-   ggplot() +
  geom_point(aes(x = x, y = y, col = class_GMix))+
  geom_abline(
    intercept = 0,
    slope = intRinsic:::twonn_linfit(mus = mus_gm)$est[2],
    col = I("black")) +
  geom_abline(
    intercept = 0,
    slope = intRinsic:::twonn_linfit(mus = mus_gmA)$est[2],
    col = I(pal[1]),
    lty = 2) +
  geom_abline(
    intercept = 0,
    slope = intRinsic:::twonn_linfit(mus = mus_gmB)$est[2],
    col = I(pal[2]),
    lty = 2) +
  geom_abline(
    intercept = 0,
    slope = intRinsic:::twonn_linfit(mus = mus_gmC)$est[2],
    col = I(pal[3]),
    lty = 2) +
  scale_color_manual("Mixture Component", values = c(pal[1], pal[2], pal[3])) +
  ylab("-log(1-(i/N))") +
  xlab(expression(log(mu))) +
  annotate(
    "label", -Inf,
    Inf,
    label = paste("ID:", round(Res, 3)),
    hjust = -0.05,
    vjust = 1.1,
    size = 4) +
  annotate(
    "label",
    Inf, -Inf,
    label = paste("R^2:", round(summary(modlin1)$r.squared, 3)),
    parse = TRUE,
    hjust = 1.05,
    vjust = -0.1,
    size = 4) +
  geom_point(aes(x = x, y = y, col = class_GMix), alpha = .9) +
  theme(
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15),
    title = element_text(size = 15),
    legend.text = element_text(size = 16),
    legend.position = "bottom") +
  ggtitle("GaussMix dataset")

p1 / (g11 + g12 + g13)

## Figure 8 ------------------------------------------------------------------
(dist_path(X = (HyperCube)) + ggtitle("HyperCube dataset")) +
  (dist_path(X = (GaussMix), class = class_GMix) + ggtitle("GaussMix dataset"))

## Hidalgo preprocessing -------------------------------------------------------

## Computing matrices Nq
set.seed(12345)
ind <- sort(sample(1:1500, 100, FALSE))
Nq1 <- compute_mus(GaussMix[ind, ], Nq = TRUE, q = 1)$NQ
Nq2 <- compute_mus(GaussMix[ind, ], Nq = TRUE, q = 5)$NQ
Nq3 <- compute_mus(GaussMix[ind, ], Nq = TRUE, q = 10)$NQ

X1  <- data.frame(reshape2::melt(Nq1), ind = "q = 1")
X2  <- data.frame(reshape2::melt(Nq2), ind = "q = 5")
X3  <- data.frame(reshape2::melt(Nq3), ind = "q = 10")

NQs     <- rbind(X1, X2, X3)
NQs$ind <- factor(NQs$ind, levels = c("q = 1", "q = 5", "q = 10"))

## Figure  9 ------------------------------------------------------------------
pal <- RColorBrewer::brewer.pal(8, "Blues")

ggplot(data = NQs) +
  geom_tile(aes(x = Var1, y = Var2, fill = as.factor(value))) +
  scale_fill_manual("Neighbors?", values = pal[c(1, 8)]) +
  theme_bw() +
  xlab("Observation") +
  ylab("Observation") +
  theme(legend.position = "none") +
  facet_wrap(~ ind) +
  theme(text = element_text(size = 15))

## Hidalgo: running the model --------------------------------------------------
## Note: Running the models is time consuming for 1500 data points.
## To load provided pre-saved runs please change the path
## below to the suitable folder

personal_path <- "rds_files"
FILE <- file.path(personal_path, "HID01_DEC22.rds")
if (file.exists(FILE)) {
    hid_fit    <- readRDS(FILE)
} else {
    set.seed(1234)
    hid_fit <- Hidalgo(
        X = GaussMix,
        K = 10,
        alpha_Dirichlet = .05,
        nsim = 2000,
        burn_in = 2000,
        thinning = 5)
}
FILE <- file.path(personal_path, "HID02_DEC22.rds")
if (file.exists(FILE)) {
    hid_fit_TR <- readRDS(FILE)
} else {
    set.seed(12345)
    hid_fit_TR <- Hidalgo(
        X = GaussMix,
        K = 10,
        prior_type = "Truncated_PointMass",
        D = 5,
        alpha_Dirichlet = .05,
        nsim = 2000,
        burn_in = 2000,
        thinning = 5)
}
hid_fit_TR

## Figure 10 -----------------------------------------------------------------
autoplot(hid_fit) / autoplot(hid_fit_TR)

## Figure 11 -------------------------------------------------------------------
autoplot(hid_fit, type = "point_estimates", title = "Conjugate prior") +
    coord_cartesian(ylim = c(0, 5.5)) +
    autoplot(hid_fit_TR, type = "point_estimates", title = "Truncated prior") +
    coord_cartesian(ylim = c(0, 5.5)) +
    geom_hline(yintercept = 5, lty = 2)

## Figure 12 ---------------------------------------------------------------------------
psm_cl <- clustering(object = hid_fit_TR,
                     clustering_method = "dendrogram",
                     K = 3)
psm_cl

## plotting large heatmap, commented since it can be time consuming
autoplot(hid_fit_TR, type = "clustering")

## Figure 13 -------------------------------------------------------------------
autoplot(
  hid_fit_TR,
  type = "class",
  class = class_GMix,
  class_plot_type = "boxplot") +
autoplot(
  hid_fit_TR,
  type = "class",
  class = class_GMix,
  class_plot_type = "histogram")

id_by_class(hid_fit_TR, class_GMix)

# Section 4 - ALON MICROARRAY EXPERIMENT ---------------------------------------

## Load dataset and data preparation -------------------------------------------
data("AlonDS", package = "HiDimDA")
status   <- factor(AlonDS$grouping, labels = c("Cancer", "Healthy"))
Xalon    <- as.matrix(AlonDS[, -1])
nd       <- 40
mycolors <- colorRampPalette(brewer.pal(9, "Blues")[2:9])(nd)

## Figure 14 -------------------------------------------------------------------
Ann <- data.frame("Health Status" = status)
rownames(Xalon) <- rownames(Ann) <- paste("Patient", 1:62)
ann_colors  <-
  list(Health.Status = c(Healthy = "blue", Cancer = "lightblue"))
pheatmap::pheatmap(
  log(Xalon),
  show_colnames = FALSE,
  cluster_cols = TRUE,
  cluster_rows = TRUE,
  annotation_row = Ann,
  angle_col = "0",
  annotation_colors = ann_colors,
  color = mycolors)

## Homogeneous ID Analysis------------------------------------------------------
Alon_twonn_1 <- twonn(Xalon, method = "linfit")
summary(Alon_twonn_1)
Alon_twonn_2 <- twonn(Xalon, method = "bayes")
summary(Alon_twonn_2)
Alon_twonn_3 <- twonn(Xalon, method = "mle")
summary(Alon_twonn_3)

## Figure 15 ------------------------------------------------------------------
autoplot(Alon_twonn_1) + autoplot(Alon_twonn_2)

## Heterogeneous ID Analysis ---------------------------------------------------

## Figure 16 - Distance paths -------------------------------------------------
dist_path(X = Xalon, class = status, alpha = .4) + ggtitle("Alon dataset")

## Hidalgo --------------------------------------------------------------------
## same as before, personalize your path to load the pre-saved version
FILE <- file.path(personal_path, "Alon_hidalgo_Dec22.rds")
if (file.exists(FILE)) {
    Alon_hid <- readRDS(FILE)
} else {
    set.seed(1234)
    Alon_hid <- Hidalgo(
        X = (Xalon),
        K = 15,
        a0_d = 1,
        b0_d = 1,
        alpha_Dirichlet = .05,
        nsim = 10000,
        burn_in = 100000,
        thin = 5)
}
Alon_hid

## plot chains - takes some time
autoplot(Alon_hid)
## Summary and Figure 17 ------------------------------------------------------
Alon_psm       <- clustering(Alon_hid, K = 4)
Alon_psm$clust <- case_when(
  Alon_psm$clust == 1 ~ 3,
  Alon_psm$clust == 2 ~ 2,
  Alon_psm$clust == 3 ~ 1,
  Alon_psm$clust == 4 ~ 4)
dd     <- hclust(as.dist(1 - Alon_psm$psm))
Status <- status[dd$order]
d1 <- ggdendrogram(dd, rotate = FALSE, theme_dendro = TRUE) +
  geom_point(aes(
    y = rep(0, 62),
    x =  1:62,
    fill = Status),
  pch = 21,
  size = 2.5) +
  scale_color_discrete("status") +
  theme(legend.position = "top", text = element_text(size = 15))
d2 <- autoplot(
  Alon_hid,
  type = "class",
  class = Alon_psm$clust,
  class_plot_type = "boxplot")

## Current version
d1 / (autoplot(
  Alon_hid,
  type = "class_plot",
  class_plot_type = "boxplot",
  class = status) + d2)

clustering_solution <- clustering(Alon_hid)$clust
## other summaries
clustering_solution <- Alon_psm$clust
id_by_class(Alon_hid, class = status)
id_by_class(Alon_hid, class = clustering_solution)

summa <- data.frame(
  cbind(
    table(Alon_psm$clust, status),
    `Proportion of Healthy` =
      table(Alon_psm$clust, status)[, 2] / rowSums(table(Alon_psm$clust, status)),
    `Average posterior id mean` =
      tapply(Alon_hid$id_summary$MEAN, Alon_psm$clust, mean),
    `Average posterior id median` =
      tapply(Alon_hid$id_summary$MEAN, Alon_psm$clust, median),
    `Average posterior id sd` =
      tapply(Alon_hid$id_summary$MEAN, Alon_psm$clust, sd)))

knitr::kable(summa, booktabs = TRUE, digits = 4)

## Random Forest Analysis -----------------------------------------------------
X_OR <- data.frame(Y = status, X = (Xalon))
set.seed(1231)
rfm1 <- randomForest::randomForest(Y ~ .,
                                   data = X_OR,
                                   type = "classification",
                                   ntree = 100)
rfm1
X_ID <- data.frame(Y     = status,
                   X     = summary(Alon_hid),
                   clust = factor(Alon_psm$clust))

set.seed(1231)
rfm2 <- randomForest::randomForest(Y ~ .,
                                   data = X_ID,
                                   type = "classification",
                                   ntree = 100)
rfm2

## extra
importance(rfm2)

## Appendix ---------------------------------------------------------------
set.seed(12211221)
X_highdim <- replicate(50, rnorm(5000))
intrinsicDimension::essLocalDimEst(X_highdim)
summary(twonn(X_highdim))
intrinsicDimension::essLocalDimEst(Swissroll)
summary(twonn(Swissroll, c_trimmed = 0.001))
intrinsicDimension::essLocalDimEst(data = Xalon)

## Session info -----------------------------------------------------------
sessionInfo()
