# (method 1) ON GITHUB
# devtools::install_github("vrunge/gfpop", force = TRUE)

# (method 2) ON CRAN
if (!require("gfpop")) {
  install.packages("gfpop")
  library("gfpop")
}

library("ggplot2")

##################################################################################################################
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
###  2. Constraint graphs and change-points model as a HMM  ###
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###

paperGraph(4)
paperGraph(5)
paperGraph(6)
paperGraph(7)
paperGraph(8)
paperGraph(9)

## Plus figures in Appendix A:

paperGraph(17)
paperGraph(18)
paperGraph(19)
paperGraph(20)
paperGraph(21)
paperGraph(22)
paperGraph(23)

##################################################################################################################
### ### ### ### ### ### ### ###
###  4. The gfpop package   ###
### ### ### ### ### ### ### ###

#-----------------------------------#
## chunk 1 : An example of edge #####
#-----------------------------------#

e1 <- Edge(state1 = "Dw", state2 = "Up", type = "up", penalty = 10, gap = 0.5)
e1
#-----------------------------------#
## chunk 2 : An example of graph ####
#-----------------------------------#

graph(
  Edge(state1 = "mu0", state2 = "mu0", penalty = 0, K = 3),
  Edge(state1 = "mu0", state2 = "Coll", penalty = 10, type = "std"),
  Edge(state1 = "Coll", state2 = "Coll", penalty = 0),
  Edge(state1 = "Coll", state2 = "mu0", penalty = 0, type = "std", K = 3),
  StartEnd(start = "mu0", end = c("mu0", "Coll")),
  Node(state = "mu0", min = 0, max = 0)
)

#-----------------------------------#
## chunk 3 : Some default graphs. ###
#-----------------------------------#

graph(type = "isotonic", penalty = 12)

## chunk 4 : Gaussian model with an up-down graph
set.seed(75)
n <- 1000
myData <- dataGenerator(n, c(0.1, 0.3, 0.5, 0.8, 1), c(1, 2, 1, 3, 1), sigma = 1)
myGraph <- graph(penalty = 2 * log(n), type = "updown")
gfpop(data = myData, mygraph = myGraph, type = "mean")

## chunk 5 : Gaussian Robust biweight model with an up-down graph
n <- 1000
chgtpt <- c(0.1, 0.3, 0.5, 0.8, 1)
mydata <- dataGenerator(n, chgtpt, c(0, 1, 0, 1, 0), sigma = 1)
mydata <- mydata + 5 * (rbinom(n, 1, 0.05)) - 5 * (rbinom(n, 1, 0.05))
beta <- 2 * log(n)
myGraph <- graph(
  Edge("Dw", "Up", type = "up", penalty = beta, gap = 1, K = 3),
  Edge("Up", "Dw", type = "down", penalty = beta, gap = 1, K = 3),
  Edge("Dw", "Dw", type = "null", K = 3),
  Edge("Up", "Up", type = "null", K = 3),
  StartEnd(start = "Dw", end = "Dw"))
gfpop(data = mydata, mygraph = myGraph, type = "mean")

#--------------------------------------------------#
## chunk 6 : Poisson model with isotonic up graph ##
#--------------------------------------------------#

n <- 1000
chgtpt <- c(0.1, 0.3, 0.5, 0.8, 1)
mydata <- dataGenerator(n, chgtpt, c(1, 3, 5, 7, 12), sigma = 1, type = "poisson")
beta <- 2 * log(n)
myGraph <- graph(type = "isotonic", gap = 2)
gfpop(data = mydata, mygraph = myGraph, type = "poisson")

#----------------------------------------------------------#
## chunk 7 : Negative binomial model with 3-segment graph ##
#----------------------------------------------------------#

mygraph <- graph(
  Edge("1", "2", type = "std", penalty = 0),
  Edge("2", "3", type = "std", penalty = 0),
  StartEnd(start = "1", end = "3"),
  all.null.edges = TRUE)
data <- dataGenerator(n = 1000, changepoints = c(0.3, 0.7, 1),
                      parameters = c(0.2, 0.25, 0.3), type = "negbin", sigma = 1)
gfpop(data, mygraph, type = "negbin")

#--------------------------------------------------#
## chunk 8 : A plotting function ###################
#--------------------------------------------------#

####### FIGURE 10 #######
pdf("figure10.pdf")

set.seed(86)
myData <- dataGenerator(1000, c(0.3, 0.4, 0.7, 0.95, 1),
                        c(1, 3, 1, -1, 4), "mean", sigma = 3)
s <- sdDiff(myData)
g <- gfpop(myData,
           graph(type = "relevant", gap = 0.5, penalty = 2 * s ^ 2 * log(1000)),
           type = "mean")
plot(x = g, data = myData, multiple = FALSE)


set.seed(86)
myData <- dataGenerator(1000, c(0.4, 0.8, 1), c(1, 1.3, 2.3), "exp")
s <- sdDiff(myData)
g <- gfpop(myData, type = "exp",
           graph(type = "isotonic", penalty = 2 * s ^ 2 * log(1000)))
plot(x = g, data = myData, multiple = TRUE)

# Close device
dev.off()

##################################################################################################################
#########################################################
######################## SECTION 5 ######################
#########################################################

# devtools::install_github("vrunge/gfpop.data")
if (!require("gfpop.data")) {
  install.packages("gfpop.data_0.1.1.tar.gz")
  library("gfpop.data")
}

data("profile614chr2", package = "gfpop.data")
data("neuroSpike", package = "gfpop.data")
data("ECG", package = "gfpop.data")

library("penaltyLearning")
library("data.table")
library("ggplot2")

## ----------------------------------------------- ##
## Relevant changes model for DNA copy number data ##
## ----------------------------------------------- ##

profile614chr2$probes[, change.after := floor(
  c(diff(position)/2 + position[-.N], NA))]
sngraph <- function(n.segs, type, gap) {
  stopifnot(is.integer(n.segs), length(n.segs) == 1, n.segs >= 1)
  s <- n.segs - 1
  seg.vec <- 1:s
  gfpop::graph(
    gfpop::StartEnd(start = 0, end = s),
    gfpop::Edge(seg.vec - 1, seg.vec, type, gap = gap), all.null.edges = TRUE)
}
select.dt <- rbind(
  data.table(n.segments = c(3, 7, 13), graph.name = "std", gap = 0),
  data.table(n.segments = 13, graph.name = "abs", gap = 1))
seg.dt.list <- list()
for (model.i in 1:nrow(select.dt)) {
  model.info <- select.dt[model.i]
  cat(sprintf("%4d / %4d models\n", model.i, nrow(select.dt)))
  g <- model.info[, sngraph(as.integer(n.segments), graph.name, gap = gap)]
  fit <- gfpop::gfpop(
    profile614chr2$probes$logratio,
    mygraph = g, type = "mean")
  end.i <- fit$changepoints
  change.i <- end.i[-length(end.i)]
  change.pos <- profile614chr2$probes$change.after[change.i]
  seg.dt.list[[model.i]] <- data.table(
    model.info,
    segStart = c(profile614chr2$probes[1, position], change.pos),
    segEnd = c(change.pos, profile614chr2$probes[.N, position]),
    mean = fit$parameters)
}
seg.dt <- do.call(rbind, seg.dt.list)

some.segs <- seg.dt[select.dt, on = list(n.segments, graph.name), allow.cartesian = TRUE]
some.change <- some.segs[min(segStart) < segStart]
some.change[, change := segStart]

err.dt <- some.segs[, {
  change.dt <- .SD[min(segStart) < segStart]
  change.dt[, change := segStart]
  change.dt[, prob := 1]
  change.dt[, nseg := n.segments]
  model.dt <- data.table(nseg = n.segments, prob = 1)
  penaltyLearning::labelError(
    model.dt,
    data.table(prob = 1, profile614chr2$labels),
    change.dt,
    change.var = "change",
    model.vars = "nseg",
    label.vars = c("labelStart", "labelEnd"),
    problem.vars = "prob")$label.errors
}, by = list(graph.name, n.segments)]

err.dt[, list(
  fp = sum(fp),
  fn = sum(fn),
  errors = sum(fp + fn)
), by = list(graph.name, n.segments)]

win <- function(min, max) data.table(windowStart = min * 1e5, windowEnd = max * 1e5)
windows <- rbind(
  win(  65,  71),
  win( 148, 171),
  win( 354, 361),
  win(1059,1065))
mb.fac <- 1e6
wfac <- function(x) {
  factor(x, c("6.5-7.1", "14.8-17.1", "35.4-36.1", "105.9-106.5"))
}
windows[, window := wfac(sprintf(
  "%.1f-%.1f", windowStart/mb.fac, windowEnd/mb.fac))]
setkey(windows, windowStart, windowEnd)
f <- function(dt, key.vec){
  setkeyv(dt, key.vec)
  dt
}
profile614chr2$probes[, pos0 := position]
over.list <- list(
  changes = f(some.change, c("change", "segStart")),
  segments = f(some.segs, c("segStart", "segEnd")),
  labels = f(profile614chr2$labels, c("labelStart", "labelEnd")),
  errors = f(err.dt, c("labelStart", "labelEnd")),
  probes = f(profile614chr2$probes, c("position", "pos0")))
join.list <- lapply(over.list, foverlaps, windows, nomatch = 0L)

show.err <- join.list$errors[, list(
  fp = sum(fp),
  fn = sum(fn),
  errors = sum(fp + fn)
), by = list(graph.name, n.segments)]

br <- c(6.5, 7.0, seq(15, 17, by = 0.5), 35.5, 36, 106, 106.5)
names(br) <- as.character(br)
gg.out <- ggplot() +
  theme_bw() +
  theme(panel.spacing = grid::unit(0, "lines")) +
  coord_cartesian(expand = FALSE) +
  facet_grid(
    graph.name + n.segments ~ window,
    scales = "free", space = "free") +
  penaltyLearning::geom_tallrect(aes(
    xmin = labelStart/mb.fac, xmax = labelEnd/mb.fac, fill = annotation),
    color = "grey",
    data = join.list$labels) +
  scale_fill_manual(
    "label",
    values = penaltyLearning::change.colors) +
  penaltyLearning::geom_tallrect(aes(
    xmin = labelStart/mb.fac, xmax = labelEnd/mb.fac, linetype = status),
    fill = NA,
    size = 1,
    color = "black",
    data = join.list$errors) +
  scale_linetype_manual(
    "error type",
    limits = c("correct",
               "false negative",
               "false positive"),
    values = c(correct = 0,
               "false negative" = 3,
               "false positive" = 1)) +
  geom_point(aes(
    position/mb.fac, logratio),
    shape = 1,
    color = "grey50",
    data = join.list$probes) +
  scale_y_continuous(
    "Logratio (Measure of DNA copy number)",
    breaks = seq(-2, 2, by = 2)
  ) +
  scale_x_continuous("Position on chr2 (mega bases)", breaks = br) +
  scale_color_manual(values = c(
    std = "green",
    abs = "deepskyblue")) +
  geom_segment(aes(
    ifelse(segStart < windowStart, windowStart, segStart)/mb.fac, mean,
    color = graph.name,
    xend = ifelse(windowEnd < segEnd, windowEnd, segEnd)/mb.fac, yend = mean),
    data = join.list$segments,
    size = 1) +
  geom_vline(aes(
    xintercept = change/mb.fac, color = graph.name),
    linetype = "dashed",
    size = 0.75,
    data = join.list$changes) +
  geom_text(aes(
    14.8, -3, label = sprintf(
      " %d label error%s for %d segment %s model",
      errors, ifelse(errors == 1, "", "s"), n.segments, graph.name)),
    hjust = 0,
    vjust = 0,
    data = data.table(show.err, window = wfac("14.8-17.1")))

pdf("figure11.pdf")
print(gg.out)
dev.off()

## ----------------------------------------------------- ##
## Multi-modal regression for neuro spike train data set ##
## ----------------------------------------------------- ##

fps <- 100
seconds.between.data <- 1/fps
sec.w <- seconds.between.data/3
myGraph <- gfpop::graph(
  gfpop::Edge(0, 0, "down", penalty = 0),
  gfpop::Edge(1, 1, "up", penalty = 0),
  gfpop::Edge(0, 1, "up", penalty = 5),
  gfpop::Edge(1, 0, "down", penalty = 0))

fit <- gfpop::gfpop(
  data = neuroSpike$calcium, mygraph = myGraph, type = "mean")

end.i <- fit$changepoints
start.i <- c(1, end.i[-length(end.i)] + 1)
res.dt <- with(neuroSpike, data.table(
  start.seconds = seconds[start.i],
  end.seconds = seconds[end.i],
  state = fit$states,
  mean = fit$parameters))
res.dt[, Multimodal := ifelse(mean < 0.1, 0, mean)]
over.dt <- res.dt[neuroSpike, on = list(
  start.seconds <= seconds, end.seconds >= seconds)]
tall.dt <- melt(
  over.dt,
  measure.vars = c("Multimodal", "AR1"),
  variable.name = "model")
tall.dt[, change.after := c(diff(value), NA)]
tall.dt[, seconds.after := c(start.seconds[-1], NA)]
tall.dt[, spike.i := cumsum(change.after < 0)]
tall.dt[, thresh := ifelse(model == "Multimodal", 0, 0)]
m <- function(model) {
  factor(
    model,
    c("AR1", "Multimodal"),
    c("Previous model:
AR1 changepoint
Jewell et al 2017", "Proposed model:
changepoint with
graph constraints"))
}
tall.dt[, model.fac := m(model)]
spike.dt <- tall.dt[0 < change.after & thresh < value, list(
  start.seconds = min(start.seconds),
  end.seconds = max(start.seconds)
), by = list(spike.i, model.fac)]
spike.dt[, mid.seconds := (start.seconds+end.seconds)/2]

lab <- function(xmin, xmax) {
  data.table(xmin, xmax, label = "oneSpike", annotation = "1change", problem = 1)
}
label.dt <- rbind(
  lab(166.5, 166.75),
  lab(169.7, 169.9))
ann.colors <- c(oneSpike = "#ffafaf")
xmin <- 166
xmax <- 171
spike.dt[, problem := 1]
models.dt <- spike.dt[, list(
  models = .N
), by = list(model.fac, problem)]
err.list <- penaltyLearning::labelError(
  models.dt, label.dt, spike.dt,
  change.var = "mid.seconds",
  problem.var = "problem",
  label.vars = c("xmin", "xmax"),
  model.vars = "model.fac")
type.colors <- c(
  data = "grey50",
  model = "blue")
show.spikes <- spike.dt[xmin < mid.seconds & mid.seconds < xmax]
show.data <- neuroSpike[xmin < seconds & seconds < xmax]
spike.y <- -1.5
gg.out <- ggplot() +
  theme_bw() +
  theme(panel.spacing = grid::unit(0, "lines")) +
  facet_grid(model.fac ~ .) +
  penaltyLearning::geom_tallrect(aes(
    xmin = xmin, xmax = xmax, fill = label),
    color = NA,
    data = label.dt) +
  penaltyLearning::geom_tallrect(aes(
    xmin = xmin, xmax = xmax, linetype = status),
    fill = NA,
    color = "black",
    data = err.list$label.errors) +
  scale_linetype_manual(
    "error type",
    limits = c("correct",
               "false negative",
               "false positive"),
    values = c(correct = 0,
               "false negative" = 3,
               "false positive" = 1)) +
  geom_point(aes(
    seconds, calcium, color = type),
    shape = 1,
    data = data.table(type = "data", show.data)) +
  geom_line(aes(
    start.seconds, value, color = type),
    data = data.table(
      type = "model",
      tall.dt[xmin < start.seconds & start.seconds < xmax]),
    size = 0.5) +
  geom_point(aes(
    mid.seconds, spike.y, color = type),
    shape = 1,
    data = data.table(type = "model", show.spikes)) +
  scale_y_continuous(
    "Fluorescence intensity
(Measure of neural activity)",
    breaks = seq(0, 10, by = 2),
    limits = c(-2, 10)
  ) +
  scale_x_continuous("Time (seconds)") +
  guides(color = "none") +
  scale_color_manual(values = type.colors) +
  geom_text(aes(
    x,y,label = label, color = type, hjust = hjust),
    size = 3,
    data = data.table(model.fac = m("AR1"), rbind(
      data.table(
        hjust = 0, x = 167, y = 2, label = "Noisy activity data", type = "data"),
      data.table(
        hjust = 1, x = 169.5, y = 5, label = "Mean model", type = "model"),
      data.table(
        hjust = 1, x = 169.4, y = spike.y, label = "Predicted spikes", type = "model"))))

pdf("figure12.pdf")
print(gg.out)
dev.off()


## -------------------------------------------------------------------------- ##
## Nine-state model for QRS complex detection in electrocardiogram (ECG) data ##
## -------------------------------------------------------------------------- ##

myGraph <- gfpop::graph(
  gfpop::Edge(0, 1, "down", penalty = 8e7, gap = 0),
  gfpop::Edge(1, 2, "up", penalty = 0, gap = 2000),
  gfpop::Edge(2, 3, "down", penalty = 0, gap = 5000),
  gfpop::Edge(3, 4, "up", penalty = 0, gap = 2000),
  gfpop::Edge(4, 5, "up", penalty = 0, gap = 1000),
  gfpop::Edge(5, 6, "up", penalty = 0, gap = 0),
  gfpop::Edge(6, 7, "down", penalty = 0, gap = 0),
  gfpop::Edge(7, 8, "down", penalty = 0, gap = 0),
  gfpop::Edge(8, 0, "up", penalty = 0, gap = 0),
  all.null.edges = TRUE)

fit <- gfpop::gfpop(
  data = ECG$data$millivolts, mygraph = myGraph, type = "mean")

end.i <- fit$changepoints
start.i <- c(1, end.i[-length(end.i)]+1)
segments.dt <- with(ECG$data, data.table(
  timeStart = time[start.i],
  timeEnd = time[end.i],
  state = as.numeric(fit$states),
  mean = fit$parameters))

segments.dt[, letter := c("beforeQ", "Q", "R", "S", "S1", "S2", "peak", "afterPeak", "foo")[state+1]]
mean.dt <- segments.dt[, data.table(
  time = as.numeric(rbind(timeStart-0.5, timeEnd+0.5)),
  mean = as.numeric(rbind(mean, mean)))]

m <- function(x){
  factor(x, c("previous", "changepoint"), c(
    "Previous model", "Proposed model"))
}
model.dt <- rbind(
  segments.dt[, data.table(
    model = m("changepoint"),
    time = ifelse(letter == "Q", timeEnd, (timeStart+timeEnd)/2),
    millivolts = mean,
    letter)],
  data.table(
    model = m("previous"),
    ECG$PanTompkins)
)[letter %in% c("Q", "R", "S")]

samples.per.second <- 250
truth.dt <- segments.dt[letter == "R", list(time = (timeStart+timeEnd)/2)]

gg <- ggplot() +
  geom_vline(aes(
    xintercept = time/samples.per.second),
    color = "red",
    data = truth.dt) +
  geom_text(aes(
    x, y, hjust = hjust, label = "True R"),
    color = "red",
    size = 3,
    data = data.table(
      x = 208.5, y = 6500, hjust = 1, label = "True R", model = m("changepoint"))) +
  theme_bw() +
  theme(panel.spacing = grid::unit(0, "lines")) +
  facet_grid(model ~ .) +
  geom_line(aes(
    time/samples.per.second, millivolts),
    color = "grey50",
    data = ECG$data) +
  geom_line(aes(
    time/samples.per.second, mean),
    data = data.table(model = m("changepoint"), mean.dt),
    color = "blue") +
  geom_label(aes(
    time/samples.per.second, millivolts,
    label = letter),
    color = "blue",
    size = 3,
    label.padding = grid::unit(0.1, "lines"),
    alpha = 0.6,
    data = model.dt) +
  coord_cartesian(xlim = c(52000, 52900)/samples.per.second, expand = FALSE) +
  xlab("Time (seconds)") +
  ylab("Electrocardiogram activity (mV)")

pdf("figure14.pdf")
print(gg)
dev.off()


#########################################################
######################## SECTION 6 ######################
#########################################################

# number of simulations reduced to 20 to reduce time execution of this script
# nbSimu <- 20
nbSimu <- 100 # number of simulations

# data length reduced to 1000 to reduce time execution of this script
# my_n <- 10^3  # time-series data length
my_n <- 10^4  # time-series data length

############################################################################
## (SECTION 6. JSS PAPER) Utility of penalized isotonic regression models ##
############################################################################

#-----------------------------------------------------#
## DATA SIMULATION FUNCTION for ISOTONIC SIMULATIONS ##
#-----------------------------------------------------#

simuData <- function(n = 10^3,
                     D = 10,
                     type = "isostep",
                     noise = "student",
                     dfree = 3,
                     pcorr = 0.3,
                     sigma = 10,
                     min.length = 10)
{
  # n = nb points
  # k = nb steps
  # type = isostep OR isolinear
  # noise = student OR corrupted
  # dfree = for student distribution : degree of freedom
  # pcorr = percent of corrupted data
  ###
  ### ALL signals between 0 and 100 !
  ###

  if (type == "isostep") {
    data <- list()
    data$signal <- (floor(seq(from = 0, to = D-0.000001, length.out = n)))*100/D
    data$signal <- data$signal - mean(data$signal)
    data$y <- data$signal ## modified later
    data$w <- rep(1, length(data$y))
    data$D <- D
  }
  if (type == "isolinear") {
    data <- list()
    data$signal <- (0:(n-1)) * (100/n)
    data$signal <- data$signal - mean(data$signal)
    data$y <- data$signal ## modified later
    data$w <- rep(1, length(data$y))
    data$D <- D
  }

  ### noise
  if (noise == "gauss") {
    data$y <- data$y + sigma * rnorm(length(data$y)) # gaussian distribution
  }
  if (noise == "student") {
    data$y <- data$y + sigma * sqrt((dfree-2)/dfree) * rt(length(data$y), df = dfree) # Student t distribution (!= gauss; heavy tails)
  }
  if (noise == "corrupted") {
    data$y <- data$y + sigma*rnorm(length(data$y)) # gaussian distribution
    isp <- sample(1:n, trunc(pcorr * n))
    data$y[isp] <- -data$y[isp] # opposition for data indices in isp
  }

  return(data)
}

#-----------------#
## ESTIMATE FIT  ##
#-----------------#

estimateAllSeg <- function(simuData, type = "mean", K = 2, pen = 2) {
  response <- list()

  response[[1]] <- isoreg(simuData$y)$yf
  response[[2]] <- reg_1d(simuData$y, simuData$w, metric = 2, unimodal = FALSE, decreasing = FALSE)
  response[[3]] <- reg_1d(simuData$y, simuData$w, metric = 1, unimodal = FALSE, decreasing = FALSE)

  isoFpop4 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = 0, type = "isotonic"), type = type)
  isoFpop5 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = 0, type = "isotonic", K = K), type = type)
  isoFpop6 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = pen, type = "isotonic"), type = type)
  isoFpop7 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = pen, type = "isotonic", K = K), type = type)

  response[[4]] <- rep(isoFpop4$parameters, diff(c(0, isoFpop4$changepoints)))
  response[[5]] <- rep(isoFpop5$parameters, diff(c(0, isoFpop5$changepoints)))
  response[[6]] <- rep(isoFpop6$parameters, diff(c(0, isoFpop6$changepoints)))
  response[[7]] <- rep(isoFpop7$parameters, diff(c(0, isoFpop7$changepoints)))

  return(response)
}

#----------------------------#
## Compute MSE of 7 methods ##
#----------------------------#

MSEall <- function(data, estimate) {
  n <- length(data$signal)
  res <- data.frame(matrix(0, nrow = 1, ncol = 7))

  for (i in 1:7) {
    u <- data$signal - estimate[[i]]
    res[1, i] <- (1/n) * (t(u) %*% u)
  }
  return(res)
}

#-----------------#
## MSEdataSignal ##
#-----------------#

MSEdataSignal <- function(data, signal, method = "NONE") {
  if (method == "NONE") {
    n <- length(data)
    u <- data - signal
    res <- (1/n) * (t(u) %*% u)
  }
  if (method == "linear") {
    n <- length(data)
    x <- 1:n
    linearMod <- lm(data ~ x)
    line <- linearMod$coefficients[1] + linearMod$coefficients[2] * x
    u <- line - signal
    res <- (1/n) * (t(u) %*% u)
  }
  return(res)
}

####################################################################################################

#--------------------------#
## get number of segments ##
#--------------------------#

getD <- function(estimate) {
  res <- sapply(estimate, FUN = function(x) sum(diff(x) != 0))
  return(res + 1)
}

#-----------#
## varDiff ##
#-----------#

varDiff <- function(x, method = "HALL") {
  if (method == "SD") {
    return(sd(diff(x)/sqrt(2)))
  }
  if (method == "MAD") {
    return(mad(diff(x)/sqrt(2)))
  }
  if (method == "HALL") {
    n <- length(x)
    wei <- c(0.1942, 0.2809, 0.3832, -0.8582)
    mat <- wei %*% t(x)

    mat[2, -n] <- mat[2, -1]
    mat[3, -c(n-1, n)] <- mat[3, -c(1, 2)]
    mat[4, -c(n-2, n-1, n)] <- mat[4, -c(1, 2, 3)]
    return(sqrt(sum(apply(mat[, -c(n-2, n-1, n)], 2, sum)^2) / (n-3)))
  }
}

#--------------#
## plotAllSeg ##
#--------------#

plotAllSeg <- function(data, estimate) {
  plot(data$y, pch = "+", cex = 1.8) # cex = 1.2
  lty <- c(1, 2, 3, 4, 5, 6, 7)
  col <- c("black", "blue", "red", "yellow", "orange", "magenta", "green")
  for (i in 1:7) lines(estimate[[i]], lty = lty[i], lwd = 2, col = col[i])
}

#--------------#
##  plotSeg   ##
#--------------#

plotSeg <- function(data, estimate) {
  plot(data$y, pch = ".", cex = 1) # cex = 1.2
  lty <- c(1, 3, 7)
  col <- c("red", "magenta", "blue")
  ttype <- c(4, 1, 5)
  lines(data$signal, lwd = 1.9, col = "black")
  for (i in 1:3) lines(estimate[[lty[i]]], lwd = 2, lty = ttype[i], col = col[i])
  legend("bottomright", legend = c("true signal", "isoreg", "reg_1d L1", "gfpop4"),
         col = c("black", col), lty = c(1, ttype), lwd = 2, cex = 1.5)
}

#############
############# one.simu
#############

one.simu <- function(i,
                     n = 10^2,
                     D = 10,
                     type = "isolinear",
                     noise = "gauss",
                     sigma = 10,
                     df = 3,
                     pcorr = 0.3,
                     estimating = "MSE",
                     sigma.estimate = FALSE)
{

  # SIGMA ESTIMATION
  if (sigma.estimate == TRUE)
  {
    sigma <- sdDiff(data$y)
  }

  # PENALTY + K
  myK <- 3 * (sigma) ^ 2
  myPen <- 2 * sigma * sigma * log(n)

  # DATA
  data <- simuData(n, D = D, type = type, noise = noise,
                   pcorr = pcorr, df = df, sigma = sigma)

  # Estimation signal
  estim <- estimateAllSeg(data, K = myK, pen = myPen)

  #----------------------------------#
  if (estimating == "MSE")
  {
    # all MSE (x9)
    ref <- MSEdataSignal(data$y, data$signal)
    ref_lin <- MSEdataSignal(data$y, data$signal, method = "linear")
    mseAll <- MSEall(data, estim)
    # ind, method, mse, beta, K
    df <- data.frame(numeric(0), character(), numeric(0), numeric(0), numeric(0), stringsAsFactors = FALSE)
    colnames(df) <- c("index", "method", "MSE", "penalty", "K")
    df[1, ] <- c(i, "MSE", ref, 0, Inf)
    df[2, ] <- c(i, "MSE_linearFit", ref_lin, 0, Inf)
    df[3, ] <- c(i, "isoreg", mseAll[1], 0, Inf)
    df[4, ] <- c(i, "reg_1d_L2", mseAll[2], 0, Inf)
    df[5, ] <- c(i, "reg_1d_L1", mseAll[3], 0, Inf)
    df[6, ] <- c(i, "gfpop", mseAll[4], 0, Inf)
    df[7, ] <- c(i, "gfpop", mseAll[5], 0, myK)
    df[8, ] <- c(i, "gfpop", mseAll[6], myPen, Inf)
    df[9, ] <- c(i, "gfpop", mseAll[7], myPen, myK)
    return(df)
  }

  #----------------------------------#
  if (estimating == "D")
  {
    # all D (x7)
    myD <- getD(estim)
    # ind, method, mse, beta, K
    df <- data.frame(numeric(0), character(), numeric(0), numeric(0), numeric(0), stringsAsFactors = FALSE)
    colnames(df) <- c("index", "method", "D", "penalty", "K")
    df[1, ] <- c(i, "isoreg", myD[1], 0, Inf)
    df[2, ] <- c(i, "reg_1d_L2", myD[2], 0, Inf)
    df[3, ] <- c(i, "reg_1d_L1", myD[3], 0, Inf)
    df[4, ] <- c(i, "gfpop", myD[4], 0, Inf)
    df[5, ] <- c(i, "gfpop", myD[5], 0, myK)
    df[6, ] <- c(i, "gfpop", myD[6], myPen, Inf)
    df[7, ] <- c(i, "gfpop", myD[7], myPen, myK)
    return(df)
  }
}

#------------------------------------------------------
##### ISOTONIC REGRESSION #####

library("isotone") # slow but more general
library("UniIsoRegression")
library("stats")
## https://www.sciencedirect.com/science/article/pii/S0167947308003964?via%3Dihub
## noise = corrupted simular to figure in https://arxiv.org/pdf/1707.09157.pdf
## reg_1d -> package UniIsoRegression
## isoreg -> package stats


#----------------------------------#
##            Figure 15           ##
#----------------------------------#

my_n = 10^4  #time-series data length
isolinearG <- simuData(n = my_n,
                       type = "isolinear",
                       noise = "gauss")$y
isolinearS <- simuData(n = my_n,
                       type = "isolinear",
                       noise = "student")$y
isolinearC <- simuData(n = my_n,
                       type = "isolinear",
                       noise = "corrupted")$y
isostepG <- simuData(n = my_n,
                     type = "isostep",
                     noise = "gauss")$y
isostepS <- simuData(n = my_n,
                     type = "isostep",
                     noise = "student")$y
isostepC <- simuData(n = my_n,
                     type = "isostep",
                     noise = "corrupted")$y

pdf("figure15.pdf")

mini <- min(isostepG)
maxi <- max(isostepG)
par(mfrow=c(3,2))
par(mar = c(2,2, 1, 1)) # Set the margin on all sides to 2
plot(isolinearG, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "")
plot(isostepG, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "")
plot(isolinearS, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "")
plot(isostepS, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "")
plot(isolinearC, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "")
plot(isostepC, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "")


sum(isolinearG > maxi)
sum(isolinearG < mini)

sum(isostepS > maxi)
sum(isostepS < mini)

sum(isolinearS > maxi)
sum(isolinearS < mini)

sum(isolinearC > maxi)
sum(isolinearC < mini)

sum(isostepC > maxi)
sum(isostepC < mini)


dev.off()

#----------------------------------#
##            Figure 16           ##
#----------------------------------#
set.seed(1)
n <- 10^4
sigma <- 10
myK <- (2 * sigma) ^ 2 # OR (sqrt(3) * sigma) ^ 2
myPen <- 2 * sigma * sigma * log(n)
# DATA
data <- simuData(n,
                 D = 10,
                 type = "isostep",
                 noise = "corrupted",
                 pcorr = 0.3,
                 sigma = sigma)

estim <- estimateAllSeg(data, K = myK, pen = myPen)

pdf("figure16.pdf")

plot(data$y, pch = ".", cex = 1) # cex = 1.2
lty <- c(1, 3, 7)
col <- c("red", "magenta", "blue")
ttype <- c(4, 1, 5)
lines(data$signal, lwd = 1.9, col = "black")
for (i in 1:3) {
    lines(estim[[lty[i]]], lwd = 2, lty = ttype[i], col = col[i])
}

legend("bottomright",
       legend = c("true signal", "isoreg", "reg_1d L1", "gfpop4"),
       col = c("black", col),
       lty = c(1, ttype),
       lwd = 2,
       cex = 1.5)

dev.off()

#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################

library("parallel")
cores <- detectCores()
# cores <- 1 # for windows user only
cores <- 97
RNGkind("L'Ecuyer-CMRG")

#########################################################################
nbSimu <- 100 # number of simulations
my_n <- 10^4  # time-series data length
########################################################################

#############################
### SIMULATION FOR TABLE 1

set.seed(50)
lres1_lin <- mclapply(1:nbSimu, FUN = one.simu,
                      n = my_n,
                      type = "isolinear",
                      noise = "gauss",
                      sigma = 10,
                      estimating = "MSE",
                      mc.cores = cores)

lres2_lin <- mclapply(1:nbSimu, FUN = one.simu,
                      n = my_n,
                      type = "isolinear",
                      noise = "student",
                      sigma = 10,
                      df = 3,
                      estimating = "MSE",
                      mc.cores = cores)

lres3_lin <- mclapply(1:nbSimu, FUN = one.simu,
                      n = my_n,
                      type = "isolinear",
                      noise = "corrupted",
                      sigma = 10,
                      pcorr = 0.3,
                      estimating = "MSE",
                      mc.cores = cores)

df_linear_gauss <- do.call(rbind, lres1_lin)
df_linear_student <- do.call(rbind, lres2_lin)
df_linear_corrupted <- do.call(rbind, lres3_lin)

#############################
### SIMULATION FOR TABLE 2
set.seed(25)
lres1_step <- mclapply(1:nbSimu, FUN = one.simu,
                       n = my_n,
                       type = "isostep",
                       noise = "gauss",
                       sigma = 10,
                       estimating = "MSE",
                       mc.cores = cores)

lres2_step <- mclapply(1:nbSimu, FUN = one.simu,
                       n = my_n,
                       type = "isostep",
                       noise = "student",
                       sigma = 10,
                       df = 3,
                       estimating = "MSE",
                       mc.cores = cores)

lres3_step <- mclapply(1:nbSimu, FUN = one.simu,
                       n = my_n,
                       type = "isostep",
                       noise = "corrupted",
                       sigma = 10,
                       pcorr = 0.3,
                       estimating = "MSE",
                       mc.cores = cores)


df_step_gauss <- do.call(rbind, lres1_step)
df_step_student <- do.call(rbind, lres2_step)
df_step_corrupted <- do.call(rbind, lres3_step)


#############################
### SIMULATION FOR TABLE 3
set.seed(60)
lres1_stepD <- mclapply(1:nbSimu, FUN = one.simu,
                        n = my_n,
                        D = 10,
                        type = "isostep",
                        noise = "gauss",
                        sigma = 10,
                        estimating = "D",
                        mc.cores = cores)

lres2_stepD <- mclapply(1:nbSimu, FUN = one.simu,
                        n = my_n,
                        D = 10,
                        type = "isostep",
                        noise = "student",
                        sigma = 10,
                        df = 3,
                        estimating = "D",
                        mc.cores = cores)

lres3_stepD <- mclapply(1:nbSimu, FUN = one.simu,
                        n = my_n,
                        D = 10,
                        type = "isostep",
                        noise = "corrupted",
                        sigma = 10,
                        pcorr = 0.3,
                        estimating = "D",
                        mc.cores = cores)

df_D_gauss <- do.call(rbind, lres1_stepD)
df_D_student <- do.call(rbind, lres2_stepD)
df_D_corrupted <- do.call(rbind, lres3_stepD)

##########################################
#----------------------#
## TABLE 1: isolinear ##
#----------------------#

meth <- c(df_linear_gauss[1:5, 2], "gfpop1", "gfpop2", "gfpop3", "gfpop4")
df_linear_gauss[, 2] <- rep(meth, nbSimu)
df_linear_student[, 2] <- rep(meth, nbSimu)
df_linear_corrupted[, 2] <- rep(meth, nbSimu)

df_linear_gauss$MSE <- as.numeric(df_linear_gauss$MSE)
df_linear_student$MSE <- as.numeric(df_linear_student$MSE)
df_linear_corrupted$MSE <- as.numeric(df_linear_corrupted$MSE)


## One Inf to remove !! on corrupted fpop2, instable due to beta = 0

# df_linear_student <- df_linear_student[!(df_linear_student$MSE == Inf), ]
# df_linear_corrupted <- df_linear_corrupted[!(df_linear_corrupted$MSE == Inf), ]

with(df_linear_gauss, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)]
with(df_linear_gauss, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)]

with(df_linear_student, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)]
with(df_linear_student, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)]

with(df_linear_corrupted, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)]
with(df_linear_corrupted, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)]

#--------------------#
## TABLE 2: isostep ##
#--------------------#

meth <- c(df_step_gauss[1:5, 2], "gfpop1", "gfpop2", "gfpop3", "gfpop4")
df_step_gauss[, 2] <- rep(meth, nbSimu)
df_step_student[, 2] <- rep(meth, nbSimu)
df_step_corrupted[, 2] <- rep(meth, nbSimu)

df_step_gauss$MSE <- as.numeric(df_step_gauss$MSE)
df_step_student$MSE <- as.numeric(df_step_student$MSE)
df_step_corrupted$MSE <- as.numeric(df_step_corrupted$MSE)

with(df_step_gauss, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)]
with(df_step_gauss, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)]

with(df_step_student, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)]
with(df_step_student, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)]

with(df_step_corrupted, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)]
with(df_step_corrupted, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)]

#------------------#
## TABLE 3: D HAT ##
#------------------#

meth <- c(df_D_gauss[1:3, 2], "gfpop1", "gfpop2", "gfpop3", "gfpop4")
df_D_gauss[, 2] <- rep(meth, nbSimu)
df_D_student[, 2] <- rep(meth, nbSimu)
df_D_corrupted[, 2] <- rep(meth, nbSimu)

df_D_gauss$D <- as.numeric(df_D_gauss$D)
df_D_student$D <- as.numeric(df_D_student$D)
df_D_corrupted$D <- as.numeric(df_D_corrupted$D)

with(df_D_gauss, tapply(D, method, mean))[c(5:7, 1:4)]
with(df_D_gauss, tapply(D, method, sd))[c(5:7, 1:4)]

with(df_D_student, tapply(D, method, mean))[c(5:7, 1:4)]
with(df_D_student, tapply(D, method, sd))[c(5:7, 1:4)]

with(df_D_corrupted, tapply(D, method, mean))[c(5:7, 1:4)]
with(df_D_corrupted, tapply(D, method, sd))[c(5:7, 1:4)]

#---------------------------------------------------------------#
## Figure 17: Violin plots of the MSE for iso-step simulations ##
#---------------------------------------------------------------#

# create factor
# force numeric values
df_step_student$method <- as.factor(df_step_student$method)
df_step_student$MSE <- as.numeric(df_step_student$MSE)

# partial plot
me <- c("isoreg", "reg_1d_L1", "gfpop2", "gfpop3", "gfpop4")
df <- df_step_student[which(df_step_student$method %in% me), ]

# create factor
# force numeric values
df$method <- factor(df$method, levels = me)
df$MSE <- as.numeric(df$MSE)

# ggplot(df, aes(x = method, y = MSE, color = method)) + geom_violin()
p <- ggplot(df, aes(x = method, y = MSE, color = method)) + geom_boxplot()

pdf("figure24.pdf")
print(p)
dev.off()

##################################################################################################################
### ### ### ### ### ###
###  7. Conclusion  ###
### ### ### ### ### ###

## time gfpop
set.seed(1)
myData <- rnorm(10^5)
myGraph <- graph(penalty = 2 * log(n), type = "std")

system.time(
  gfpop(data = myData, mygraph = myGraph, type = "mean")
)

## time fpop

## Install if necessary
if (!require("fpopw")) {
  install.packages("fpopw")
  library("fpopw")
}

library("fpopw")
system.time(
  Fpop(x = myData, lambda = 2 * log(length(myData)))
)