# (method 1) ON GITHUB # devtools::install_github("vrunge/gfpop", force = TRUE) # (method 2) ON CRAN if (!require("gfpop")) { install.packages("gfpop") library("gfpop") } library("ggplot2") ################################################################################################################## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### 2. Constraint graphs and change-points model as a HMM ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### paperGraph(4) paperGraph(5) paperGraph(6) paperGraph(7) paperGraph(8) paperGraph(9) ## Plus figures in Appendix A: paperGraph(17) paperGraph(18) paperGraph(19) paperGraph(20) paperGraph(21) paperGraph(22) paperGraph(23) ################################################################################################################## ### ### ### ### ### ### ### ### ### 4. The gfpop package ### ### ### ### ### ### ### ### ### #-----------------------------------# ## chunk 1 : An example of edge ##### #-----------------------------------# e1 <- Edge(state1 = "Dw", state2 = "Up", type = "up", penalty = 10, gap = 0.5) e1 #-----------------------------------# ## chunk 2 : An example of graph #### #-----------------------------------# graph( Edge(state1 = "mu0", state2 = "mu0", penalty = 0, K = 3), Edge(state1 = "mu0", state2 = "Coll", penalty = 10, type = "std"), Edge(state1 = "Coll", state2 = "Coll", penalty = 0), Edge(state1 = "Coll", state2 = "mu0", penalty = 0, type = "std", K = 3), StartEnd(start = "mu0", end = c("mu0", "Coll")), Node(state = "mu0", min = 0, max = 0) ) #-----------------------------------# ## chunk 3 : Some default graphs. ### #-----------------------------------# graph(type = "isotonic", penalty = 12) ## chunk 4 : Gaussian model with an up-down graph set.seed(75) n <- 1000 myData <- dataGenerator(n, c(0.1, 0.3, 0.5, 0.8, 1), c(1, 2, 1, 3, 1), sigma = 1) myGraph <- graph(penalty = 2 * log(n), type = "updown") gfpop(data = myData, mygraph = myGraph, type = "mean") ## chunk 5 : Gaussian Robust biweight model with an up-down graph n <- 1000 chgtpt <- c(0.1, 0.3, 0.5, 0.8, 1) mydata <- dataGenerator(n, chgtpt, c(0, 1, 0, 1, 0), sigma = 1) mydata <- mydata + 5 * (rbinom(n, 1, 0.05)) - 5 * (rbinom(n, 1, 0.05)) beta <- 2 * log(n) myGraph <- graph( Edge("Dw", "Up", type = "up", penalty = beta, gap = 1, K = 3), Edge("Up", "Dw", type = "down", penalty = beta, gap = 1, K = 3), Edge("Dw", "Dw", type = "null", K = 3), Edge("Up", "Up", type = "null", K = 3), StartEnd(start = "Dw", end = "Dw")) gfpop(data = mydata, mygraph = myGraph, type = "mean") #--------------------------------------------------# ## chunk 6 : Poisson model with isotonic up graph ## #--------------------------------------------------# n <- 1000 chgtpt <- c(0.1, 0.3, 0.5, 0.8, 1) mydata <- dataGenerator(n, chgtpt, c(1, 3, 5, 7, 12), sigma = 1, type = "poisson") beta <- 2 * log(n) myGraph <- graph(type = "isotonic", gap = 2) gfpop(data = mydata, mygraph = myGraph, type = "poisson") #----------------------------------------------------------# ## chunk 7 : Negative binomial model with 3-segment graph ## #----------------------------------------------------------# mygraph <- graph( Edge("1", "2", type = "std", penalty = 0), Edge("2", "3", type = "std", penalty = 0), StartEnd(start = "1", end = "3"), all.null.edges = TRUE) data <- dataGenerator(n = 1000, changepoints = c(0.3, 0.7, 1), parameters = c(0.2, 0.25, 0.3), type = "negbin", sigma = 1) gfpop(data, mygraph, type = "negbin") #--------------------------------------------------# ## chunk 8 : A plotting function ################### #--------------------------------------------------# ####### FIGURE 10 ####### pdf("figure10.pdf") set.seed(86) myData <- dataGenerator(1000, c(0.3, 0.4, 0.7, 0.95, 1), c(1, 3, 1, -1, 4), "mean", sigma = 3) s <- sdDiff(myData) g <- gfpop(myData, graph(type = "relevant", gap = 0.5, penalty = 2 * s ^ 2 * log(1000)), type = "mean") plot(x = g, data = myData, multiple = FALSE) set.seed(86) myData <- dataGenerator(1000, c(0.4, 0.8, 1), c(1, 1.3, 2.3), "exp") s <- sdDiff(myData) g <- gfpop(myData, type = "exp", graph(type = "isotonic", penalty = 2 * s ^ 2 * log(1000))) plot(x = g, data = myData, multiple = TRUE) # Close device dev.off() ################################################################################################################## ######################################################### ######################## SECTION 5 ###################### ######################################################### # devtools::install_github("vrunge/gfpop.data") if (!require("gfpop.data")) { install.packages("gfpop.data_0.1.1.tar.gz") library("gfpop.data") } data("profile614chr2", package = "gfpop.data") data("neuroSpike", package = "gfpop.data") data("ECG", package = "gfpop.data") library("penaltyLearning") library("data.table") library("ggplot2") ## ----------------------------------------------- ## ## Relevant changes model for DNA copy number data ## ## ----------------------------------------------- ## profile614chr2$probes[, change.after := floor( c(diff(position)/2 + position[-.N], NA))] sngraph <- function(n.segs, type, gap) { stopifnot(is.integer(n.segs), length(n.segs) == 1, n.segs >= 1) s <- n.segs - 1 seg.vec <- 1:s gfpop::graph( gfpop::StartEnd(start = 0, end = s), gfpop::Edge(seg.vec - 1, seg.vec, type, gap = gap), all.null.edges = TRUE) } select.dt <- rbind( data.table(n.segments = c(3, 7, 13), graph.name = "std", gap = 0), data.table(n.segments = 13, graph.name = "abs", gap = 1)) seg.dt.list <- list() for (model.i in 1:nrow(select.dt)) { model.info <- select.dt[model.i] cat(sprintf("%4d / %4d models\n", model.i, nrow(select.dt))) g <- model.info[, sngraph(as.integer(n.segments), graph.name, gap = gap)] fit <- gfpop::gfpop( profile614chr2$probes$logratio, mygraph = g, type = "mean") end.i <- fit$changepoints change.i <- end.i[-length(end.i)] change.pos <- profile614chr2$probes$change.after[change.i] seg.dt.list[[model.i]] <- data.table( model.info, segStart = c(profile614chr2$probes[1, position], change.pos), segEnd = c(change.pos, profile614chr2$probes[.N, position]), mean = fit$parameters) } seg.dt <- do.call(rbind, seg.dt.list) some.segs <- seg.dt[select.dt, on = list(n.segments, graph.name), allow.cartesian = TRUE] some.change <- some.segs[min(segStart) < segStart] some.change[, change := segStart] err.dt <- some.segs[, { change.dt <- .SD[min(segStart) < segStart] change.dt[, change := segStart] change.dt[, prob := 1] change.dt[, nseg := n.segments] model.dt <- data.table(nseg = n.segments, prob = 1) penaltyLearning::labelError( model.dt, data.table(prob = 1, profile614chr2$labels), change.dt, change.var = "change", model.vars = "nseg", label.vars = c("labelStart", "labelEnd"), problem.vars = "prob")$label.errors }, by = list(graph.name, n.segments)] err.dt[, list( fp = sum(fp), fn = sum(fn), errors = sum(fp + fn) ), by = list(graph.name, n.segments)] win <- function(min, max) data.table(windowStart = min * 1e5, windowEnd = max * 1e5) windows <- rbind( win( 65, 71), win( 148, 171), win( 354, 361), win(1059,1065)) mb.fac <- 1e6 wfac <- function(x) { factor(x, c("6.5-7.1", "14.8-17.1", "35.4-36.1", "105.9-106.5")) } windows[, window := wfac(sprintf( "%.1f-%.1f", windowStart/mb.fac, windowEnd/mb.fac))] setkey(windows, windowStart, windowEnd) f <- function(dt, key.vec){ setkeyv(dt, key.vec) dt } profile614chr2$probes[, pos0 := position] over.list <- list( changes = f(some.change, c("change", "segStart")), segments = f(some.segs, c("segStart", "segEnd")), labels = f(profile614chr2$labels, c("labelStart", "labelEnd")), errors = f(err.dt, c("labelStart", "labelEnd")), probes = f(profile614chr2$probes, c("position", "pos0"))) join.list <- lapply(over.list, foverlaps, windows, nomatch = 0L) show.err <- join.list$errors[, list( fp = sum(fp), fn = sum(fn), errors = sum(fp + fn) ), by = list(graph.name, n.segments)] br <- c(6.5, 7.0, seq(15, 17, by = 0.5), 35.5, 36, 106, 106.5) names(br) <- as.character(br) gg.out <- ggplot() + theme_bw() + theme(panel.spacing = grid::unit(0, "lines")) + coord_cartesian(expand = FALSE) + facet_grid( graph.name + n.segments ~ window, scales = "free", space = "free") + penaltyLearning::geom_tallrect(aes( xmin = labelStart/mb.fac, xmax = labelEnd/mb.fac, fill = annotation), color = "grey", data = join.list$labels) + scale_fill_manual( "label", values = penaltyLearning::change.colors) + penaltyLearning::geom_tallrect(aes( xmin = labelStart/mb.fac, xmax = labelEnd/mb.fac, linetype = status), fill = NA, size = 1, color = "black", data = join.list$errors) + scale_linetype_manual( "error type", limits = c("correct", "false negative", "false positive"), values = c(correct = 0, "false negative" = 3, "false positive" = 1)) + geom_point(aes( position/mb.fac, logratio), shape = 1, color = "grey50", data = join.list$probes) + scale_y_continuous( "Logratio (Measure of DNA copy number)", breaks = seq(-2, 2, by = 2) ) + scale_x_continuous("Position on chr2 (mega bases)", breaks = br) + scale_color_manual(values = c( std = "green", abs = "deepskyblue")) + geom_segment(aes( ifelse(segStart < windowStart, windowStart, segStart)/mb.fac, mean, color = graph.name, xend = ifelse(windowEnd < segEnd, windowEnd, segEnd)/mb.fac, yend = mean), data = join.list$segments, size = 1) + geom_vline(aes( xintercept = change/mb.fac, color = graph.name), linetype = "dashed", size = 0.75, data = join.list$changes) + geom_text(aes( 14.8, -3, label = sprintf( " %d label error%s for %d segment %s model", errors, ifelse(errors == 1, "", "s"), n.segments, graph.name)), hjust = 0, vjust = 0, data = data.table(show.err, window = wfac("14.8-17.1"))) pdf("figure11.pdf") print(gg.out) dev.off() ## ----------------------------------------------------- ## ## Multi-modal regression for neuro spike train data set ## ## ----------------------------------------------------- ## fps <- 100 seconds.between.data <- 1/fps sec.w <- seconds.between.data/3 myGraph <- gfpop::graph( gfpop::Edge(0, 0, "down", penalty = 0), gfpop::Edge(1, 1, "up", penalty = 0), gfpop::Edge(0, 1, "up", penalty = 5), gfpop::Edge(1, 0, "down", penalty = 0)) fit <- gfpop::gfpop( data = neuroSpike$calcium, mygraph = myGraph, type = "mean") end.i <- fit$changepoints start.i <- c(1, end.i[-length(end.i)] + 1) res.dt <- with(neuroSpike, data.table( start.seconds = seconds[start.i], end.seconds = seconds[end.i], state = fit$states, mean = fit$parameters)) res.dt[, Multimodal := ifelse(mean < 0.1, 0, mean)] over.dt <- res.dt[neuroSpike, on = list( start.seconds <= seconds, end.seconds >= seconds)] tall.dt <- melt( over.dt, measure.vars = c("Multimodal", "AR1"), variable.name = "model") tall.dt[, change.after := c(diff(value), NA)] tall.dt[, seconds.after := c(start.seconds[-1], NA)] tall.dt[, spike.i := cumsum(change.after < 0)] tall.dt[, thresh := ifelse(model == "Multimodal", 0, 0)] m <- function(model) { factor( model, c("AR1", "Multimodal"), c("Previous model: AR1 changepoint Jewell et al 2017", "Proposed model: changepoint with graph constraints")) } tall.dt[, model.fac := m(model)] spike.dt <- tall.dt[0 < change.after & thresh < value, list( start.seconds = min(start.seconds), end.seconds = max(start.seconds) ), by = list(spike.i, model.fac)] spike.dt[, mid.seconds := (start.seconds+end.seconds)/2] lab <- function(xmin, xmax) { data.table(xmin, xmax, label = "oneSpike", annotation = "1change", problem = 1) } label.dt <- rbind( lab(166.5, 166.75), lab(169.7, 169.9)) ann.colors <- c(oneSpike = "#ffafaf") xmin <- 166 xmax <- 171 spike.dt[, problem := 1] models.dt <- spike.dt[, list( models = .N ), by = list(model.fac, problem)] err.list <- penaltyLearning::labelError( models.dt, label.dt, spike.dt, change.var = "mid.seconds", problem.var = "problem", label.vars = c("xmin", "xmax"), model.vars = "model.fac") type.colors <- c( data = "grey50", model = "blue") show.spikes <- spike.dt[xmin < mid.seconds & mid.seconds < xmax] show.data <- neuroSpike[xmin < seconds & seconds < xmax] spike.y <- -1.5 gg.out <- ggplot() + theme_bw() + theme(panel.spacing = grid::unit(0, "lines")) + facet_grid(model.fac ~ .) + penaltyLearning::geom_tallrect(aes( xmin = xmin, xmax = xmax, fill = label), color = NA, data = label.dt) + penaltyLearning::geom_tallrect(aes( xmin = xmin, xmax = xmax, linetype = status), fill = NA, color = "black", data = err.list$label.errors) + scale_linetype_manual( "error type", limits = c("correct", "false negative", "false positive"), values = c(correct = 0, "false negative" = 3, "false positive" = 1)) + geom_point(aes( seconds, calcium, color = type), shape = 1, data = data.table(type = "data", show.data)) + geom_line(aes( start.seconds, value, color = type), data = data.table( type = "model", tall.dt[xmin < start.seconds & start.seconds < xmax]), size = 0.5) + geom_point(aes( mid.seconds, spike.y, color = type), shape = 1, data = data.table(type = "model", show.spikes)) + scale_y_continuous( "Fluorescence intensity (Measure of neural activity)", breaks = seq(0, 10, by = 2), limits = c(-2, 10) ) + scale_x_continuous("Time (seconds)") + guides(color = "none") + scale_color_manual(values = type.colors) + geom_text(aes( x,y,label = label, color = type, hjust = hjust), size = 3, data = data.table(model.fac = m("AR1"), rbind( data.table( hjust = 0, x = 167, y = 2, label = "Noisy activity data", type = "data"), data.table( hjust = 1, x = 169.5, y = 5, label = "Mean model", type = "model"), data.table( hjust = 1, x = 169.4, y = spike.y, label = "Predicted spikes", type = "model")))) pdf("figure12.pdf") print(gg.out) dev.off() ## -------------------------------------------------------------------------- ## ## Nine-state model for QRS complex detection in electrocardiogram (ECG) data ## ## -------------------------------------------------------------------------- ## myGraph <- gfpop::graph( gfpop::Edge(0, 1, "down", penalty = 8e7, gap = 0), gfpop::Edge(1, 2, "up", penalty = 0, gap = 2000), gfpop::Edge(2, 3, "down", penalty = 0, gap = 5000), gfpop::Edge(3, 4, "up", penalty = 0, gap = 2000), gfpop::Edge(4, 5, "up", penalty = 0, gap = 1000), gfpop::Edge(5, 6, "up", penalty = 0, gap = 0), gfpop::Edge(6, 7, "down", penalty = 0, gap = 0), gfpop::Edge(7, 8, "down", penalty = 0, gap = 0), gfpop::Edge(8, 0, "up", penalty = 0, gap = 0), all.null.edges = TRUE) fit <- gfpop::gfpop( data = ECG$data$millivolts, mygraph = myGraph, type = "mean") end.i <- fit$changepoints start.i <- c(1, end.i[-length(end.i)]+1) segments.dt <- with(ECG$data, data.table( timeStart = time[start.i], timeEnd = time[end.i], state = as.numeric(fit$states), mean = fit$parameters)) segments.dt[, letter := c("beforeQ", "Q", "R", "S", "S1", "S2", "peak", "afterPeak", "foo")[state+1]] mean.dt <- segments.dt[, data.table( time = as.numeric(rbind(timeStart-0.5, timeEnd+0.5)), mean = as.numeric(rbind(mean, mean)))] m <- function(x){ factor(x, c("previous", "changepoint"), c( "Previous model", "Proposed model")) } model.dt <- rbind( segments.dt[, data.table( model = m("changepoint"), time = ifelse(letter == "Q", timeEnd, (timeStart+timeEnd)/2), millivolts = mean, letter)], data.table( model = m("previous"), ECG$PanTompkins) )[letter %in% c("Q", "R", "S")] samples.per.second <- 250 truth.dt <- segments.dt[letter == "R", list(time = (timeStart+timeEnd)/2)] gg <- ggplot() + geom_vline(aes( xintercept = time/samples.per.second), color = "red", data = truth.dt) + geom_text(aes( x, y, hjust = hjust, label = "True R"), color = "red", size = 3, data = data.table( x = 208.5, y = 6500, hjust = 1, label = "True R", model = m("changepoint"))) + theme_bw() + theme(panel.spacing = grid::unit(0, "lines")) + facet_grid(model ~ .) + geom_line(aes( time/samples.per.second, millivolts), color = "grey50", data = ECG$data) + geom_line(aes( time/samples.per.second, mean), data = data.table(model = m("changepoint"), mean.dt), color = "blue") + geom_label(aes( time/samples.per.second, millivolts, label = letter), color = "blue", size = 3, label.padding = grid::unit(0.1, "lines"), alpha = 0.6, data = model.dt) + coord_cartesian(xlim = c(52000, 52900)/samples.per.second, expand = FALSE) + xlab("Time (seconds)") + ylab("Electrocardiogram activity (mV)") pdf("figure14.pdf") print(gg) dev.off() ######################################################### ######################## SECTION 6 ###################### ######################################################### # number of simulations reduced to 20 to reduce time execution of this script # nbSimu <- 20 nbSimu <- 100 # number of simulations # data length reduced to 1000 to reduce time execution of this script # my_n <- 10^3 # time-series data length my_n <- 10^4 # time-series data length ############################################################################ ## (SECTION 6. JSS PAPER) Utility of penalized isotonic regression models ## ############################################################################ #-----------------------------------------------------# ## DATA SIMULATION FUNCTION for ISOTONIC SIMULATIONS ## #-----------------------------------------------------# simuData <- function(n = 10^3, D = 10, type = "isostep", noise = "student", dfree = 3, pcorr = 0.3, sigma = 10, min.length = 10) { # n = nb points # k = nb steps # type = isostep OR isolinear # noise = student OR corrupted # dfree = for student distribution : degree of freedom # pcorr = percent of corrupted data ### ### ALL signals between 0 and 100 ! ### if (type == "isostep") { data <- list() data$signal <- (floor(seq(from = 0, to = D-0.000001, length.out = n)))*100/D data$signal <- data$signal - mean(data$signal) data$y <- data$signal ## modified later data$w <- rep(1, length(data$y)) data$D <- D } if (type == "isolinear") { data <- list() data$signal <- (0:(n-1)) * (100/n) data$signal <- data$signal - mean(data$signal) data$y <- data$signal ## modified later data$w <- rep(1, length(data$y)) data$D <- D } ### noise if (noise == "gauss") { data$y <- data$y + sigma * rnorm(length(data$y)) # gaussian distribution } if (noise == "student") { data$y <- data$y + sigma * sqrt((dfree-2)/dfree) * rt(length(data$y), df = dfree) # Student t distribution (!= gauss; heavy tails) } if (noise == "corrupted") { data$y <- data$y + sigma*rnorm(length(data$y)) # gaussian distribution isp <- sample(1:n, trunc(pcorr * n)) data$y[isp] <- -data$y[isp] # opposition for data indices in isp } return(data) } #-----------------# ## ESTIMATE FIT ## #-----------------# estimateAllSeg <- function(simuData, type = "mean", K = 2, pen = 2) { response <- list() response[[1]] <- isoreg(simuData$y)$yf response[[2]] <- reg_1d(simuData$y, simuData$w, metric = 2, unimodal = FALSE, decreasing = FALSE) response[[3]] <- reg_1d(simuData$y, simuData$w, metric = 1, unimodal = FALSE, decreasing = FALSE) isoFpop4 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = 0, type = "isotonic"), type = type) isoFpop5 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = 0, type = "isotonic", K = K), type = type) isoFpop6 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = pen, type = "isotonic"), type = type) isoFpop7 <- gfpop(data = simuData$y, weights = simuData$w, mygraph = graph(penalty = pen, type = "isotonic", K = K), type = type) response[[4]] <- rep(isoFpop4$parameters, diff(c(0, isoFpop4$changepoints))) response[[5]] <- rep(isoFpop5$parameters, diff(c(0, isoFpop5$changepoints))) response[[6]] <- rep(isoFpop6$parameters, diff(c(0, isoFpop6$changepoints))) response[[7]] <- rep(isoFpop7$parameters, diff(c(0, isoFpop7$changepoints))) return(response) } #----------------------------# ## Compute MSE of 7 methods ## #----------------------------# MSEall <- function(data, estimate) { n <- length(data$signal) res <- data.frame(matrix(0, nrow = 1, ncol = 7)) for (i in 1:7) { u <- data$signal - estimate[[i]] res[1, i] <- (1/n) * (t(u) %*% u) } return(res) } #-----------------# ## MSEdataSignal ## #-----------------# MSEdataSignal <- function(data, signal, method = "NONE") { if (method == "NONE") { n <- length(data) u <- data - signal res <- (1/n) * (t(u) %*% u) } if (method == "linear") { n <- length(data) x <- 1:n linearMod <- lm(data ~ x) line <- linearMod$coefficients[1] + linearMod$coefficients[2] * x u <- line - signal res <- (1/n) * (t(u) %*% u) } return(res) } #################################################################################################### #--------------------------# ## get number of segments ## #--------------------------# getD <- function(estimate) { res <- sapply(estimate, FUN = function(x) sum(diff(x) != 0)) return(res + 1) } #-----------# ## varDiff ## #-----------# varDiff <- function(x, method = "HALL") { if (method == "SD") { return(sd(diff(x)/sqrt(2))) } if (method == "MAD") { return(mad(diff(x)/sqrt(2))) } if (method == "HALL") { n <- length(x) wei <- c(0.1942, 0.2809, 0.3832, -0.8582) mat <- wei %*% t(x) mat[2, -n] <- mat[2, -1] mat[3, -c(n-1, n)] <- mat[3, -c(1, 2)] mat[4, -c(n-2, n-1, n)] <- mat[4, -c(1, 2, 3)] return(sqrt(sum(apply(mat[, -c(n-2, n-1, n)], 2, sum)^2) / (n-3))) } } #--------------# ## plotAllSeg ## #--------------# plotAllSeg <- function(data, estimate) { plot(data$y, pch = "+", cex = 1.8) # cex = 1.2 lty <- c(1, 2, 3, 4, 5, 6, 7) col <- c("black", "blue", "red", "yellow", "orange", "magenta", "green") for (i in 1:7) lines(estimate[[i]], lty = lty[i], lwd = 2, col = col[i]) } #--------------# ## plotSeg ## #--------------# plotSeg <- function(data, estimate) { plot(data$y, pch = ".", cex = 1) # cex = 1.2 lty <- c(1, 3, 7) col <- c("red", "magenta", "blue") ttype <- c(4, 1, 5) lines(data$signal, lwd = 1.9, col = "black") for (i in 1:3) lines(estimate[[lty[i]]], lwd = 2, lty = ttype[i], col = col[i]) legend("bottomright", legend = c("true signal", "isoreg", "reg_1d L1", "gfpop4"), col = c("black", col), lty = c(1, ttype), lwd = 2, cex = 1.5) } ############# ############# one.simu ############# one.simu <- function(i, n = 10^2, D = 10, type = "isolinear", noise = "gauss", sigma = 10, df = 3, pcorr = 0.3, estimating = "MSE", sigma.estimate = FALSE) { # SIGMA ESTIMATION if (sigma.estimate == TRUE) { sigma <- sdDiff(data$y) } # PENALTY + K myK <- 3 * (sigma) ^ 2 myPen <- 2 * sigma * sigma * log(n) # DATA data <- simuData(n, D = D, type = type, noise = noise, pcorr = pcorr, df = df, sigma = sigma) # Estimation signal estim <- estimateAllSeg(data, K = myK, pen = myPen) #----------------------------------# if (estimating == "MSE") { # all MSE (x9) ref <- MSEdataSignal(data$y, data$signal) ref_lin <- MSEdataSignal(data$y, data$signal, method = "linear") mseAll <- MSEall(data, estim) # ind, method, mse, beta, K df <- data.frame(numeric(0), character(), numeric(0), numeric(0), numeric(0), stringsAsFactors = FALSE) colnames(df) <- c("index", "method", "MSE", "penalty", "K") df[1, ] <- c(i, "MSE", ref, 0, Inf) df[2, ] <- c(i, "MSE_linearFit", ref_lin, 0, Inf) df[3, ] <- c(i, "isoreg", mseAll[1], 0, Inf) df[4, ] <- c(i, "reg_1d_L2", mseAll[2], 0, Inf) df[5, ] <- c(i, "reg_1d_L1", mseAll[3], 0, Inf) df[6, ] <- c(i, "gfpop", mseAll[4], 0, Inf) df[7, ] <- c(i, "gfpop", mseAll[5], 0, myK) df[8, ] <- c(i, "gfpop", mseAll[6], myPen, Inf) df[9, ] <- c(i, "gfpop", mseAll[7], myPen, myK) return(df) } #----------------------------------# if (estimating == "D") { # all D (x7) myD <- getD(estim) # ind, method, mse, beta, K df <- data.frame(numeric(0), character(), numeric(0), numeric(0), numeric(0), stringsAsFactors = FALSE) colnames(df) <- c("index", "method", "D", "penalty", "K") df[1, ] <- c(i, "isoreg", myD[1], 0, Inf) df[2, ] <- c(i, "reg_1d_L2", myD[2], 0, Inf) df[3, ] <- c(i, "reg_1d_L1", myD[3], 0, Inf) df[4, ] <- c(i, "gfpop", myD[4], 0, Inf) df[5, ] <- c(i, "gfpop", myD[5], 0, myK) df[6, ] <- c(i, "gfpop", myD[6], myPen, Inf) df[7, ] <- c(i, "gfpop", myD[7], myPen, myK) return(df) } } #------------------------------------------------------ ##### ISOTONIC REGRESSION ##### library("isotone") # slow but more general library("UniIsoRegression") library("stats") ## https://www.sciencedirect.com/science/article/pii/S0167947308003964?via%3Dihub ## noise = corrupted simular to figure in https://arxiv.org/pdf/1707.09157.pdf ## reg_1d -> package UniIsoRegression ## isoreg -> package stats #----------------------------------# ## Figure 15 ## #----------------------------------# my_n = 10^4 #time-series data length isolinearG <- simuData(n = my_n, type = "isolinear", noise = "gauss")$y isolinearS <- simuData(n = my_n, type = "isolinear", noise = "student")$y isolinearC <- simuData(n = my_n, type = "isolinear", noise = "corrupted")$y isostepG <- simuData(n = my_n, type = "isostep", noise = "gauss")$y isostepS <- simuData(n = my_n, type = "isostep", noise = "student")$y isostepC <- simuData(n = my_n, type = "isostep", noise = "corrupted")$y pdf("figure15.pdf") mini <- min(isostepG) maxi <- max(isostepG) par(mfrow=c(3,2)) par(mar = c(2,2, 1, 1)) # Set the margin on all sides to 2 plot(isolinearG, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "") plot(isostepG, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "") plot(isolinearS, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "") plot(isostepS, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "") plot(isolinearC, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "") plot(isostepC, pch = '+', cex = 0.3, ylim = c(mini,maxi), xlab = "", ylab = "") sum(isolinearG > maxi) sum(isolinearG < mini) sum(isostepS > maxi) sum(isostepS < mini) sum(isolinearS > maxi) sum(isolinearS < mini) sum(isolinearC > maxi) sum(isolinearC < mini) sum(isostepC > maxi) sum(isostepC < mini) dev.off() #----------------------------------# ## Figure 16 ## #----------------------------------# set.seed(1) n <- 10^4 sigma <- 10 myK <- (2 * sigma) ^ 2 # OR (sqrt(3) * sigma) ^ 2 myPen <- 2 * sigma * sigma * log(n) # DATA data <- simuData(n, D = 10, type = "isostep", noise = "corrupted", pcorr = 0.3, sigma = sigma) estim <- estimateAllSeg(data, K = myK, pen = myPen) pdf("figure16.pdf") plot(data$y, pch = ".", cex = 1) # cex = 1.2 lty <- c(1, 3, 7) col <- c("red", "magenta", "blue") ttype <- c(4, 1, 5) lines(data$signal, lwd = 1.9, col = "black") for (i in 1:3) { lines(estim[[lty[i]]], lwd = 2, lty = ttype[i], col = col[i]) } legend("bottomright", legend = c("true signal", "isoreg", "reg_1d L1", "gfpop4"), col = c("black", col), lty = c(1, ttype), lwd = 2, cex = 1.5) dev.off() ######################################################################### ######################################################################### ######################################################################### ######################################################################### ######################################################################### library("parallel") cores <- detectCores() # cores <- 1 # for windows user only cores <- 97 RNGkind("L'Ecuyer-CMRG") ######################################################################### nbSimu <- 100 # number of simulations my_n <- 10^4 # time-series data length ######################################################################## ############################# ### SIMULATION FOR TABLE 1 set.seed(50) lres1_lin <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, type = "isolinear", noise = "gauss", sigma = 10, estimating = "MSE", mc.cores = cores) lres2_lin <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, type = "isolinear", noise = "student", sigma = 10, df = 3, estimating = "MSE", mc.cores = cores) lres3_lin <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, type = "isolinear", noise = "corrupted", sigma = 10, pcorr = 0.3, estimating = "MSE", mc.cores = cores) df_linear_gauss <- do.call(rbind, lres1_lin) df_linear_student <- do.call(rbind, lres2_lin) df_linear_corrupted <- do.call(rbind, lres3_lin) ############################# ### SIMULATION FOR TABLE 2 set.seed(25) lres1_step <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, type = "isostep", noise = "gauss", sigma = 10, estimating = "MSE", mc.cores = cores) lres2_step <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, type = "isostep", noise = "student", sigma = 10, df = 3, estimating = "MSE", mc.cores = cores) lres3_step <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, type = "isostep", noise = "corrupted", sigma = 10, pcorr = 0.3, estimating = "MSE", mc.cores = cores) df_step_gauss <- do.call(rbind, lres1_step) df_step_student <- do.call(rbind, lres2_step) df_step_corrupted <- do.call(rbind, lres3_step) ############################# ### SIMULATION FOR TABLE 3 set.seed(60) lres1_stepD <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, D = 10, type = "isostep", noise = "gauss", sigma = 10, estimating = "D", mc.cores = cores) lres2_stepD <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, D = 10, type = "isostep", noise = "student", sigma = 10, df = 3, estimating = "D", mc.cores = cores) lres3_stepD <- mclapply(1:nbSimu, FUN = one.simu, n = my_n, D = 10, type = "isostep", noise = "corrupted", sigma = 10, pcorr = 0.3, estimating = "D", mc.cores = cores) df_D_gauss <- do.call(rbind, lres1_stepD) df_D_student <- do.call(rbind, lres2_stepD) df_D_corrupted <- do.call(rbind, lres3_stepD) ########################################## #----------------------# ## TABLE 1: isolinear ## #----------------------# meth <- c(df_linear_gauss[1:5, 2], "gfpop1", "gfpop2", "gfpop3", "gfpop4") df_linear_gauss[, 2] <- rep(meth, nbSimu) df_linear_student[, 2] <- rep(meth, nbSimu) df_linear_corrupted[, 2] <- rep(meth, nbSimu) df_linear_gauss$MSE <- as.numeric(df_linear_gauss$MSE) df_linear_student$MSE <- as.numeric(df_linear_student$MSE) df_linear_corrupted$MSE <- as.numeric(df_linear_corrupted$MSE) ## One Inf to remove !! on corrupted fpop2, instable due to beta = 0 # df_linear_student <- df_linear_student[!(df_linear_student$MSE == Inf), ] # df_linear_corrupted <- df_linear_corrupted[!(df_linear_corrupted$MSE == Inf), ] with(df_linear_gauss, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)] with(df_linear_gauss, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)] with(df_linear_student, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)] with(df_linear_student, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)] with(df_linear_corrupted, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)] with(df_linear_corrupted, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)] #--------------------# ## TABLE 2: isostep ## #--------------------# meth <- c(df_step_gauss[1:5, 2], "gfpop1", "gfpop2", "gfpop3", "gfpop4") df_step_gauss[, 2] <- rep(meth, nbSimu) df_step_student[, 2] <- rep(meth, nbSimu) df_step_corrupted[, 2] <- rep(meth, nbSimu) df_step_gauss$MSE <- as.numeric(df_step_gauss$MSE) df_step_student$MSE <- as.numeric(df_step_student$MSE) df_step_corrupted$MSE <- as.numeric(df_step_corrupted$MSE) with(df_step_gauss, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)] with(df_step_gauss, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)] with(df_step_student, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)] with(df_step_student, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)] with(df_step_corrupted, tapply(MSE, method, mean))[c(7, 5, 8, 9, 1:4)] with(df_step_corrupted, tapply(MSE, method, sd))[c(7, 5, 8, 9, 1:4)] #------------------# ## TABLE 3: D HAT ## #------------------# meth <- c(df_D_gauss[1:3, 2], "gfpop1", "gfpop2", "gfpop3", "gfpop4") df_D_gauss[, 2] <- rep(meth, nbSimu) df_D_student[, 2] <- rep(meth, nbSimu) df_D_corrupted[, 2] <- rep(meth, nbSimu) df_D_gauss$D <- as.numeric(df_D_gauss$D) df_D_student$D <- as.numeric(df_D_student$D) df_D_corrupted$D <- as.numeric(df_D_corrupted$D) with(df_D_gauss, tapply(D, method, mean))[c(5:7, 1:4)] with(df_D_gauss, tapply(D, method, sd))[c(5:7, 1:4)] with(df_D_student, tapply(D, method, mean))[c(5:7, 1:4)] with(df_D_student, tapply(D, method, sd))[c(5:7, 1:4)] with(df_D_corrupted, tapply(D, method, mean))[c(5:7, 1:4)] with(df_D_corrupted, tapply(D, method, sd))[c(5:7, 1:4)] #---------------------------------------------------------------# ## Figure 17: Violin plots of the MSE for iso-step simulations ## #---------------------------------------------------------------# # create factor # force numeric values df_step_student$method <- as.factor(df_step_student$method) df_step_student$MSE <- as.numeric(df_step_student$MSE) # partial plot me <- c("isoreg", "reg_1d_L1", "gfpop2", "gfpop3", "gfpop4") df <- df_step_student[which(df_step_student$method %in% me), ] # create factor # force numeric values df$method <- factor(df$method, levels = me) df$MSE <- as.numeric(df$MSE) # ggplot(df, aes(x = method, y = MSE, color = method)) + geom_violin() p <- ggplot(df, aes(x = method, y = MSE, color = method)) + geom_boxplot() pdf("figure24.pdf") print(p) dev.off() ################################################################################################################## ### ### ### ### ### ### ### 7. Conclusion ### ### ### ### ### ### ### ## time gfpop set.seed(1) myData <- rnorm(10^5) myGraph <- graph(penalty = 2 * log(n), type = "std") system.time( gfpop(data = myData, mygraph = myGraph, type = "mean") ) ## time fpop ## Install if necessary if (!require("fpopw")) { install.packages("fpopw") library("fpopw") } library("fpopw") system.time( Fpop(x = myData, lambda = 2 * log(length(myData))) )