## =========================================
## The file contains the function definitions available at
## https://github.com/pfryz/wild-binary-segmentation-2.0/blob/ebdeca927c7b148cefbe5e373acbfa5411578fa5/WBS2_SDLL_github_v5.R
## https://github.com/pfryz/wild-binary-segmentation-2.0/blob/ebdeca927c7b148cefbe5e373acbfa5411578fa5/utils.R
## =========================================


wbs.sdll.cpt <- function(x, sigma = stats::mad(diff(x)/sqrt(2)), universal = TRUE, M = NULL, th.const = NULL, th.const.min.mult = 0.3, lambda = 0.9, cusums = "systematic") {
	
	n <- length(x)
    if (n <= 1) {
        no.of.cpt <- 0
        cpt <- integer(0)
    }
    else {
		if (sigma == 0) stop("Noise level estimated at zero; therefore no change-points to estimate.")
		if (universal) {
        	u <- universal.M.th.v3(n, lambda)
        	th.const <- u$th.const
        	M <- u$M
    	}
    	else if (is.null(M) || is.null(th.const)) stop("If universal is FALSE, then M and th.const must be specified.")
    	th.const.min <- th.const * th.const.min.mult
    	th <- th.const * sqrt(2 * log(n)) * sigma
    	th.min <- th.const.min * sqrt(2 * log(n)) * sigma

		if (cusums == "random") cusum.sampling <- random.cusums else if (cusums == "systematic") cusum.sampling <- systematic.cusums

 		rc <- t(wbs.K.int(x, M, cusum.sampling))
 		if (max(abs(rc[,4])) < th) {
    	    no.of.cpt <- 0
        	cpt <- integer(0)

 		}
		else {
			indices <- which(abs(rc[,4]) > th.min)
			if (length(indices) == 1) {
				cpt <- rc[indices, 3]
				no.of.cpt <- 1
			}
			else {
				rc.sel <- rc[indices,,drop=F]
				ord <- order(abs(rc.sel[,4]), decreasing=T)
				z <- abs(rc.sel[ord,4])
				z.l <- length(z)
				dif <- -diff(log(z))
				dif.ord <- order(dif, decreasing=T)
				j <- 1
				while ((j < z.l) & (z[dif.ord[j]+1] > th)) j <- j+1
				if (j < z.l) no.of.cpt <- dif.ord[j] else no.of.cpt <- z.l
				cpt <- sort((rc.sel[ord,3])[1:no.of.cpt])			
			}
		} 
    }
    est <- mean.from.cpt(x, cpt)
	list(est=est, no.of.cpt=no.of.cpt, cpt=cpt)
}


wbs.sdll.cpt.rep <- function(x, sigma = stats::mad(diff(x)/sqrt(2)), universal = TRUE, M = NULL, th.const = NULL, th.const.min.mult = 0.3, lambda = 0.9, repeats = 9) {

	res <- vector("list", repeats)
	
	cpt.combined <- integer(0)
	
	nos.of.cpts <- rep(0, repeats)
	
	for (i in 1:repeats) {
		
		res[[i]] <- wbs.sdll.cpt(x, sigma, universal, M, th.const, th.const.min.mult, lambda, "random")
		cpt.combined <- c(cpt.combined, res[[i]]$cpt)
		nos.of.cpts[i] <- res[[i]]$no.of.cpt				
		
	}

	med.no.of.cpt <- median(nos.of.cpts)
	
	med.index <- which.min(abs(nos.of.cpts - med.no.of.cpt))
	
	med.run <- res[[med.index]]
	
	list(med.run = med.run, cpt.combined = sort(cpt.combined))

}


wbs.K.int <- function(x, M, cusum.sampling) {
	
	n <- length(x)
	if (n == 1) return(matrix(NA, 4, 0))
	else {
		cpt <- t(cusum.sampling(x, M)$max.val)
		return(cbind(cpt, wbs.K.int(x[1:cpt[3]], M, cusum.sampling), wbs.K.int(x[(cpt[3]+1):n], M, cusum.sampling) + c(rep(cpt[3], 3), 0)            ))
	}
	
}


random.cusums <- function(x, M) {

	y <- c(0, cumsum(x))

	n <- length(x)
	
	M <- min(M, (n-1)*n/2)
		
	res <- matrix(0, M, 4)
	
	if (n==2) ind <- matrix(c(1, 2), 2, 1)
	else if (M == (n-1)*n/2) {
		ind <- matrix(0, 2, M)
		ind[1,] <- rep(1:(n-1), (n-1):1)
		ind[2,] <- 2:(M+1) - rep(cumsum(c(0, (n-2):1)), (n-1):1)
	}
	else {
		ind <- ind2 <- matrix(floor(runif(2*M) * (n-1)), nrow=2)
		ind2[1,] <- apply(ind, 2, min)
		ind2[2,] <- apply(ind, 2, max)
		ind <- ind2 + c(1, 2)
	}

	res[,1:2] <- t(ind)
	res[,3:4] <- t(apply(ind, 2, max.cusum, y))

	max.ind <- which.max(abs(res[,4]))

	max.val <- res[max.ind,,drop=F]

	list(res=res, max.val=max.val, M.eff=M)

}


systematic.cusums <- function(x, M) {
	
	y <- c(0, cumsum(x))

	n <- length(x)
	
	M <- min(M, (n-1)*n/2)
			
	ind <- grid.intervals(n, M)

	M <- dim(ind)[2]

	res <- matrix(0, M, 4)

	res[,1:2] <- t(ind)
	res[,3:4] <- t(apply(ind, 2, max.cusum, y))

	max.ind <- which.max(abs(res[,4]))

	max.val <- res[max.ind,,drop=F]

	list(res=res, max.val=max.val, M.eff=M)
		
}


max.cusum <- function(ind, y) {
	
		z <- y[(ind[1]+1):(ind[2]+1)] - y[ind[1]]
		m <- ind[2]-ind[1]+1
		ip <- sqrt(((m-1):1) / m / (1:(m-1))) * z[1:(m-1)] - sqrt((1:(m-1)) / m / ((m-1):1)) * (z[m] - z[1:(m-1)])
		ip.max <- which.max(abs(ip))
		
		c(ip.max + ind[1] - 1, ip[ip.max])

}


## =========================================
## Contents of utils.R
## =========================================
mean.from.cpt <- function(x, cpt) {


	n <- length(x)

	len.cpt <- length(cpt)

	if (len.cpt) cpt <- sort(cpt)

	beg <- endd <- rep(0, len.cpt+1)

	beg[1] <- 1

	endd[len.cpt+1] <- n

	if (len.cpt) {

		beg[2:(len.cpt+1)] <- cpt+1

		endd[1:len.cpt] <- cpt

	}

	means <- rep(0, len.cpt+1)

	for (i in 1:(len.cpt+1)) means[i] <- mean(x[beg[i]:endd[i]])

	rep(means, endd-beg+1)

}


universal.M.th.v3 <- function(n, lambda = 0.9) {


	mat.90 <- matrix(0, 24, 3)

	mat.90[,1] <- c(10, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1500, 2000, 2500, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000)

	mat.90[,2] <- c(1.420, 1.310, 1.280, 1.270, 1.250, 1.220, 1.205, 1.205, 1.200, 1.200, 1.200, 1.185, 1.185, 1.170, 1.170, 1.160, 1.150, 1.150, 1.150, 1.150, 1.145, 1.145, 1.135, 1.135)

	mat.90[,3] <- rep(100, 24)


	mat.95 <- matrix(0, 24, 3)

	mat.95[,1] <- c(10, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1500, 2000, 2500, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000)

	mat.95[,2] <- c(1.550, 1.370, 1.340, 1.320, 1.300, 1.290, 1.265, 1.265, 1.247, 1.247, 1.247, 1.225, 1.225, 1.220, 1.210, 1.190, 1.190, 1.190, 1.190, 1.190, 1.190, 1.180, 1.170, 1.170)

	mat.95[,3] <- rep(100, 24)


	if (lambda == 0.9) A <- mat.90 else A <- mat.95


	d <- dim(A)

	if (n < A[1,1]) {

		th <- A[1,2]

		M <- A[1,3]

	}

	else if (n > A[d[1],1]) {

		th <- A[d[1],2]

		M <- A[d[1],3]

	}

	else {

		ind <- order(abs(n - A[,1]))[1:2]

		s <- min(ind)

		e <- max(ind)

		th <- A[s,2] * (A[e,1] - n)/(A[e,1] - A[s,1]) + A[e,2] * (n - A[s,1])/(A[e,1] - A[s,1])

		M <- A[s,3] * (A[e,1] - n)/(A[e,1] - A[s,1]) + A[e,3] * (n - A[s,1])/(A[e,1] - A[s,1])

	}


	list(th.const=th, M=M)

}


all.intervals.flat <- function(n) {

	if (n == 2) ind <- matrix(1:2, 2, 1) else {
		M <- (n-1)*n/2
		ind <- matrix(0, 2, M)
		ind[1,] <- rep(1:(n-1), (n-1):1)
		ind[2,] <- 2:(M+1) - rep(cumsum(c(0, (n-2):1)), (n-1):1)
	}
	ind

}

grid.intervals <- function(n, M) {

	if (n==2) ind <- matrix(c(1, 2), 2, 1)

	else if (M >= (n-1)*n/2) ind <- all.intervals.flat(n)

	else {
		k <- 1
		while (k*(k-1)/2 < M) k <- k+1
		ind2 <- all.intervals.flat(k)
		ind2.mx <- max(ind2)
		ind <- round((ind2 - 1) * ((n-1) / (ind2.mx-1)) + 1)
	}

	ind
}