library("copulaedas")


###############################################################################
# 2.1. General procedure of an EDA
###############################################################################

# Save the information to create Figure 1.

saveData <- function (eda, gen, fEvals, model, pop, popEval) {
  dataDir <- file.path(getwd(), paste(gen))
  dir.create(dataDir)
  setwd(dataDir)
  
  writeData <- function (data, file)
    write.table(data, file, row.names = FALSE, col.names = FALSE)
  writeData(pop, "pop.txt")
  writeData(popEval, "popEval.txt")
  s <- edaSelect(eda, gen, pop, popEval)
  selectedPop <- pop[s, ]
  selectedEval <- popEval[s]
  writeData(selectedPop, "selectedPop.txt")
  writeData(selectedEval, "selectedEval.txt")
  if (gen > 1) {
    writeData(t(sapply(model$margins, function (x) x)), "model.txt")
  }
  
  setwd(file.path(getwd(), ".."))
}

setMethod("edaReport", "CEDA", edaReportCombined(edaReportSimple, saveData))
setMethod("edaTerminate", "CEDA", edaTerminateEval)
umda <- CEDA(copula = "indep", margin = "norm", popSize = 30, 
  fEval = 0, fEvalTol = 1e-2)

set.seed(12345)
edaRun(umda, fSphere, rep(-10, 2), rep(10, 2))

# Plot Figure 1.

library("lattice")
library("tikzDevice")

x1mean <- numeric(3)
x2mean <- numeric(3)
x1sd <- numeric(3)
x2sd <- numeric(3)
for (i in seq(length = 3)) {
  model <- read.table(paste(i+1, "/model.txt", sep = ""))
  x1mean[i] <- model[1, 1]
  x2mean[i] <- model[2, 1]
  x1sd[i] <- model[1, 2]
  x2sd[i] <- model[2, 2]
}
lengths <- c(40, 45, 50)

f <- function (x, y, x1mean, x1sd, x2mean, x2sd) {
  dnorm(x, mean = x1mean, sd = x1sd) * dnorm(y, mean = x2mean, sd = x2sd)
}

for (i in seq(along = x1mean)) {
  tikz(file = paste(i+1, "/model.tex", sep = ""), width = 3.5, height = 3.5)
  grid <- expand.grid(x = seq(from = -10, to = 10, length = lengths[i]),
    y = seq(from = -10, to = 10, length = lengths[i]))
  grid$z <- with(grid, f(x, y, x1mean[i], x1sd[i], x2mean[i], x2sd[i]))
  plot(wireframe(z ~ x * y, grid, zlab = "", xlab = "$x_1$", ylab = "$x_2$",
    par.settings = list(axis.line = list(col = "transparent"),
      box.3d = list(col = "transparent")),
    border = "transparent", scales = list(x = list(arrows = FALSE),
    y = list(arrows = FALSE), z = list(draw = FALSE))))
  dev.off()
}

removeMethod("edaReport", "CEDA")
removeMethod("edaTerminate", "CEDA")

###############################################################################
# 4.1 Running the EDAs implemented in the package
###############################################################################

setMethod("edaTerminate", "EDA",
  edaTerminateCombined(edaTerminateEval, edaTerminateMaxGen))

setMethod("edaReport", "EDA", edaReportSimple)

gceda <- CEDA(copula = "normal", margin = "kernel",
  popSize = 200, fEval = 0, fEvalTol = 1e-6, maxGen = 50)
gceda@name <- "Gaussian Copula Estimation of Distribution Algorithm"

set.seed(12345)
result <- edaRun(gceda, fSphere, rep(-300, 5), rep(900, 5))

show(result)

setMethod("edaReport", "EDA", edaReportDisabled)
set.seed(12345)
results <- edaIndepRuns(gceda, fSphere, rep(-300, 5), rep(900, 5), 30)

show(results)

summary(results)


###############################################################################
# 4.2 Implementation of a new EDA based on copulas
###############################################################################

setClass("CopulaMIMIC", contains = "EDA",
  prototype = prototype(name = "Copula MIMIC"))

CopulaMIMIC <- function (...) {
  new("CopulaMIMIC", parameters = list(...))
}

fbetamargin <- function (x, lower, upper) {
  x <- (x - lower) / (upper - lower)
  loglik <- function (s) sum(dbeta(x, s[1], s[2], log = TRUE))
  s <- optim(c(1, 1), loglik, control = list(fnscale = -1))$par
  list(lower = lower, upper = upper, a = s[1], b = s[2])
}

pbetamargin <- function (q, lower, upper, a, b) {
  q <- (q - lower) / (upper - lower)
  pbeta(q, a, b)
}

qbetamargin <- function (p, lower, upper, a, b) {
  q <- qbeta(p, a, b)
  lower + q * (upper - lower)
}

edaLearnCopulaMIMIC <- function (eda, gen, previousModel,
    selectedPop, selectedEval, lower, upper) {
  margin <- eda@parameters$margin
  copula <- eda@parameters$copula

  if (is.null(margin)) margin <- "betamargin"
  if (is.null(copula)) copula <- "normal"

  fmargin <- get(paste("f", margin, sep = ""))
  pmargin <- get(paste("p", margin, sep = ""))
  copula <- switch(copula,
    normal = copula::normalCopula(0), frank = copula::frankCopula(0))

  n <- ncol(selectedPop)

  # Estimate the parameters of the marginal distributions.
  margins <- lapply(seq(length = n),
    function (i) fmargin(selectedPop[ , i], lower[i], upper[i]))
  uniformPop <- sapply(seq(length = n),
    function (i) do.call(pmargin,
      c(list(selectedPop[ , i]), margins[[i]])))

  # Calculate pairwise mutual information by using copula entropy.
  C <- matrix(list(NULL), nrow = n, ncol = n)
  I <- matrix(0, nrow = n, ncol = n)
  for (i in seq(from = 2, to = n)) {
    for (j in seq(from = 1, to = i - 1)) {
      # Estimate the parameters of the copula.
      data <- cbind(uniformPop[ , i], uniformPop[ , j])
      startCopula <- copula::fitCopula(copula, data, method = "itau",
        estimate.variance = FALSE)@copula
      C[[i, j]] <- tryCatch(
        copula::fitCopula(startCopula, data, method = "ml",
          start = startCopula@parameters,
          estimate.variance = FALSE)@copula,
        error = function (error) startCopula)
      # Calculate mutual information.
      if (is(C[[i, j]], "normalCopula")) {
  	    I[i, j] <- -0.5 * log(1 - C[[i, j]]@parameters^2)
      } else {
        u <- rcopula(C[[i, j]], 100)
        I[i, j] <- sum(log(dcopula(C[[i, j]], u))) / 100
      }
      C[[j, i]] <- C[[i, j]]; I[j, i] <- I[i, j]
    }
  }

  # Select a permutation of the variables.
  perm <- as.vector(arrayInd(which.max(I), dim(I)))
  copulas <- C[perm[1], perm[2]]
  I[perm, ] <- -Inf
  for (k in seq(length = n - 2)) {
    ik <- which.max(I[ , perm[1]])
    perm <- c(ik, perm)
    copulas <- c(C[perm[1], perm[2]], copulas)
    I[ik, ] <- -Inf
  }

  list(margins = margins, perm = perm, copulas = copulas)
}

setMethod("edaLearn", "CopulaMIMIC", edaLearnCopulaMIMIC)

edaSampleCopulaMIMIC <- function (eda, gen, model, lower, upper) {
  popSize <- eda@parameters$popSize
  margin <- eda@parameters$margin

  if (is.null(popSize)) popSize <- 100
  if (is.null(margin)) margin <- "betamargin"

  qmargin <- get(paste("q", margin, sep = ""))

  n <- length(model$margins)
  perm <- model$perm
  copulas <- model$copulas

  # Simulate the chain structure with the copulas.
  uniformPop <- matrix(0, nrow = popSize, ncol = n)
  uniformPop[ , perm[n]] <- runif(popSize)
  for (k in seq(from = n - 1, to = 1)) {
    u <- runif(popSize)
    v <- uniformPop[ , perm[k + 1]]
    uniformPop[ , perm[k]] <- vines::hinverse(copulas[[k]], u, v)
  }

  # Evaluate the inverse of the marginal distributions. 
  pop <- sapply(seq(length = n),
    function (i) do.call(qmargin, 
      c(list(uniformPop[ , i]), model$margins[[i]])))

  pop
}

setMethod("edaSample", "CopulaMIMIC", edaSampleCopulaMIMIC)


###############################################################################
# 4.3 Performing an empirical study on benchmark problems
###############################################################################

umda <- CEDA(copula = "indep", margin = "norm")
umda@name <- "UMDA"

gceda <- CEDA(copula = "normal", margin = "norm")
gceda@name <- "GCEDA"

cveda <- VEDA(vine = "CVine", indepTestSigLevel = 0.01,
  copulas = c("normal"), margin = "norm")
cveda@name <- "CVEDA"

dveda <- VEDA(vine = "DVine", indepTestSigLevel = 0.01,
  copulas = c("normal"), margin = "norm")
dveda@name <- "DVEDA"

copulamimic <- CopulaMIMIC(copula = "normal", margin = "norm")
copulamimic@name <- "CopulaMIMIC"

edas <- list(umda, gceda, cveda, dveda, copulamimic)
fNames <- c("Sphere", "SummationCancellation")
experiments <- list()
for (eda in edas) {
  for (fName in fNames) {
    experiment <- list(eda = eda, fName = fName)
    experiments <- c(experiments, list(experiment))
  }
}

runExperiment <- function (experiment) {
  eda <- experiment$eda
  fName <- experiment$fName

  # Objective function parameters.
  fInfo <- list(
    Sphere = list(lower = -600, upper = 600, fEval = 0),
    SummationCancellation = list(lower = -0.16, upper = 0.16, 
        fEval = -1e5))
  lower <- rep(fInfo[[fName]]$lower, 10)
  upper <- rep(fInfo[[fName]]$upper, 10)
  f <- get(paste("f", fName, sep = ""))

  # Configure termination criteria and disable reporting.
  eda@parameters$fEval <- fInfo[[fName]]$fEval
  eda@parameters$fEvalTol <- 1e-6
  eda@parameters$fEvalStdDev <- 1e-8
  eda@parameters$maxEvals <- 300000
  setMethod("edaTerminate", "EDA",
    edaTerminateCombined(edaTerminateEval, edaTerminateMaxEvals,
      edaTerminateEvalStdDev))
  setMethod("edaReport", "EDA", edaReportDisabled)

  sink(paste(eda@name, "_", fName, ".txt", sep = ""))

  # Determine the critical population size.
  set.seed(12345)
  results <- edaCriticalPopSize(eda, f, lower, upper,
    eda@parameters$fEval, eda@parameters$fEvalTol, lowerPop = 50,
    upperPop = 2000, totalRuns = 30, successRuns = 30,
    stopPercent = 10, verbose = TRUE)
  if (is.null(results)) {
    # Run the experiment with the largest population size, if the
    # critical population size was not found.
    eda@parameters$popSize <- 2000
    set.seed(12345)
    edaIndepRuns(eda, f, lower, upper, runs = 30, verbose = TRUE)
  }
  sink(NULL)
}

for (experiment in experiments) runExperiment(experiment)

###############################################################################
# 4.4 Solution of the molecular docking problem
###############################################################################

system("R CMD SHLIB docking.c")
dyn.load(paste("docking", .Platform$dynlib.ext, sep = ""))
.C("docking_load", as.character("2z5u.dat"))

lower <- c(.C("docking_xlo", out=as.double(0))$out,
  .C("docking_ylo", out=as.double(0))$out,
  .C("docking_zlo", out=as.double(0))$out,
  0, -pi/2, -pi,
  rep(-pi, .C("docking_ntor", out=as.integer(0))$out))

upper <- c(.C("docking_xhi", out=as.double(0))$out,
  .C("docking_yhi", out=as.double(0))$out,
  .C("docking_zhi", out=as.double(0))$out,
  2*pi, pi/2, pi, 
  rep(pi, .C("docking_ntor", out=as.integer(0))$out))

fDocking <- function (sol) {
  .C("docking_score", sol=as.double(sol), out=as.double(0))$out
}

setMethod("edaTerminate", "EDA", edaTerminateMaxGen)

# Auxiliar function to print information about the vines (to generate 
# Figure 2 and the statistics about the number of trees in the vine).

setMethod("edaReport", "VEDA",
function (eda, gen, fEvals, model, pop, popEval) {
  sink(sprintf("%s.csv", eda@parameters$vine), append = TRUE)
  if (is.null(model)) {
    cat(gen, 0, 0, sep = ",")
  } else {
    normalCopulas <- sum(sapply(model$vine@copulas,
            function (copula) is(copula, "normalCopula")))
    cat(gen, model$vine@trees, normalCopulas, sep = ",")
  }
  cat("\n")
  sink(NULL)
})

cveda <- VEDA(vine = "CVine", indepTestSigLevel = 0.01,
  copulas = c("normal"), margin = "truncnorm",
  popSize = 1400, maxGen = 100)

dveda <- VEDA(vine = "DVine", indepTestSigLevel = 0.01,
  copulas = c("normal"), margin = "truncnorm",
  popSize = 1200, maxGen = 100)

set.seed(12345)
cvedaResults <- edaIndepRuns(cveda, fDocking, lower, upper, runs = 30)
summary(cvedaResults)

set.seed(12345)
dvedaResults <- edaIndepRuns(dveda, fDocking, lower, upper, runs = 30)
summary(dvedaResults)

# Results given in Table 4.

printMeanResults <- function (edaResults) {
  bestEval <- numeric(length(edaResults))
  rmsd <- numeric(length(edaResults))
  for (i in seq(along = edaResults)) {
    bestEval[i] <- edaResults[[i]]@bestEval
    rmsd[i] <- .C("docking_rmsd", sol=as.double(edaResults[[i]]@bestSol),
                  out=as.double(0))$out
  }
  cat(mean(bestEval), sd(bestEval), mean(rmsd), sd(rmsd), "\n")
}

printMeanResults(cvedaResults)
printMeanResults(dvedaResults)

# Plot Figure 2.

library("lattice")
library("tikzDevice")

fig2data <- data.frame(
    Algorithm = c(rep("CVEDA", 100), rep("DVEDA", 100)), 
    Generation = rep(seq(from = 1, to = 100), 2), 
    Normal = numeric(200))

cvineData <- read.csv("CVine.csv")
dvineData <- read.csv("DVine.csv")
for (gen in seq(from = 1, to = 100)) {
  fig2data$Normal[gen] <- mean(cvineData[cvineData[ , 1] == gen, 3])
  fig2data$Normal[100 + gen] <- mean(dvineData[dvineData[ , 1] == gen, 3])
}

tikz(file = "normal-copulas.tex", width = 5, height = 5)

xyplot(Normal ~ Generation, fig2data, groups = Algorithm,
  col.symbol = "black", pch = c(2, 1),
  key = list(space = "top", columns = 2, text = list(c("DVEDA", "CVEDA")),
    points = list(pch = c(1, 2), col = "black")),
  scales = list(x = list(at = c(0, 20, 40, 60, 80, 100),
    labels = c("1", "20", "40", "60", "80", "100"))),
  xlab = "Generation", ylab = "Average Number of Normal Copulas")

dev.off()

# Median number of vine trees fitted in the 30 independent runs.

cvineData <- read.csv("CVine.csv")
cat(median(cvineData[ , 2]))

dvineData <- read.csv("DVine.csv")
cat(median(dvineData[ , 2]))