library("copulaedas") ############################################################################### # 2.1. General procedure of an EDA ############################################################################### # Save the information to create Figure 1. saveData <- function (eda, gen, fEvals, model, pop, popEval) { dataDir <- file.path(getwd(), paste(gen)) dir.create(dataDir) setwd(dataDir) writeData <- function (data, file) write.table(data, file, row.names = FALSE, col.names = FALSE) writeData(pop, "pop.txt") writeData(popEval, "popEval.txt") s <- edaSelect(eda, gen, pop, popEval) selectedPop <- pop[s, ] selectedEval <- popEval[s] writeData(selectedPop, "selectedPop.txt") writeData(selectedEval, "selectedEval.txt") if (gen > 1) { writeData(t(sapply(model$margins, function (x) x)), "model.txt") } setwd(file.path(getwd(), "..")) } setMethod("edaReport", "CEDA", edaReportCombined(edaReportSimple, saveData)) setMethod("edaTerminate", "CEDA", edaTerminateEval) umda <- CEDA(copula = "indep", margin = "norm", popSize = 30, fEval = 0, fEvalTol = 1e-2) set.seed(12345) edaRun(umda, fSphere, rep(-10, 2), rep(10, 2)) # Plot Figure 1. library("lattice") library("tikzDevice") x1mean <- numeric(3) x2mean <- numeric(3) x1sd <- numeric(3) x2sd <- numeric(3) for (i in seq(length = 3)) { model <- read.table(paste(i+1, "/model.txt", sep = "")) x1mean[i] <- model[1, 1] x2mean[i] <- model[2, 1] x1sd[i] <- model[1, 2] x2sd[i] <- model[2, 2] } lengths <- c(40, 45, 50) f <- function (x, y, x1mean, x1sd, x2mean, x2sd) { dnorm(x, mean = x1mean, sd = x1sd) * dnorm(y, mean = x2mean, sd = x2sd) } for (i in seq(along = x1mean)) { tikz(file = paste(i+1, "/model.tex", sep = ""), width = 3.5, height = 3.5) grid <- expand.grid(x = seq(from = -10, to = 10, length = lengths[i]), y = seq(from = -10, to = 10, length = lengths[i])) grid$z <- with(grid, f(x, y, x1mean[i], x1sd[i], x2mean[i], x2sd[i])) plot(wireframe(z ~ x * y, grid, zlab = "", xlab = "$x_1$", ylab = "$x_2$", par.settings = list(axis.line = list(col = "transparent"), box.3d = list(col = "transparent")), border = "transparent", scales = list(x = list(arrows = FALSE), y = list(arrows = FALSE), z = list(draw = FALSE)))) dev.off() } removeMethod("edaReport", "CEDA") removeMethod("edaTerminate", "CEDA") ############################################################################### # 4.1 Running the EDAs implemented in the package ############################################################################### setMethod("edaTerminate", "EDA", edaTerminateCombined(edaTerminateEval, edaTerminateMaxGen)) setMethod("edaReport", "EDA", edaReportSimple) gceda <- CEDA(copula = "normal", margin = "kernel", popSize = 200, fEval = 0, fEvalTol = 1e-6, maxGen = 50) gceda@name <- "Gaussian Copula Estimation of Distribution Algorithm" set.seed(12345) result <- edaRun(gceda, fSphere, rep(-300, 5), rep(900, 5)) show(result) setMethod("edaReport", "EDA", edaReportDisabled) set.seed(12345) results <- edaIndepRuns(gceda, fSphere, rep(-300, 5), rep(900, 5), 30) show(results) summary(results) ############################################################################### # 4.2 Implementation of a new EDA based on copulas ############################################################################### setClass("CopulaMIMIC", contains = "EDA", prototype = prototype(name = "Copula MIMIC")) CopulaMIMIC <- function (...) { new("CopulaMIMIC", parameters = list(...)) } fbetamargin <- function (x, lower, upper) { x <- (x - lower) / (upper - lower) loglik <- function (s) sum(dbeta(x, s[1], s[2], log = TRUE)) s <- optim(c(1, 1), loglik, control = list(fnscale = -1))$par list(lower = lower, upper = upper, a = s[1], b = s[2]) } pbetamargin <- function (q, lower, upper, a, b) { q <- (q - lower) / (upper - lower) pbeta(q, a, b) } qbetamargin <- function (p, lower, upper, a, b) { q <- qbeta(p, a, b) lower + q * (upper - lower) } edaLearnCopulaMIMIC <- function (eda, gen, previousModel, selectedPop, selectedEval, lower, upper) { margin <- eda@parameters$margin copula <- eda@parameters$copula if (is.null(margin)) margin <- "betamargin" if (is.null(copula)) copula <- "normal" fmargin <- get(paste("f", margin, sep = "")) pmargin <- get(paste("p", margin, sep = "")) copula <- switch(copula, normal = copula::normalCopula(0), frank = copula::frankCopula(0)) n <- ncol(selectedPop) # Estimate the parameters of the marginal distributions. margins <- lapply(seq(length = n), function (i) fmargin(selectedPop[ , i], lower[i], upper[i])) uniformPop <- sapply(seq(length = n), function (i) do.call(pmargin, c(list(selectedPop[ , i]), margins[[i]]))) # Calculate pairwise mutual information by using copula entropy. C <- matrix(list(NULL), nrow = n, ncol = n) I <- matrix(0, nrow = n, ncol = n) for (i in seq(from = 2, to = n)) { for (j in seq(from = 1, to = i - 1)) { # Estimate the parameters of the copula. data <- cbind(uniformPop[ , i], uniformPop[ , j]) startCopula <- copula::fitCopula(copula, data, method = "itau", estimate.variance = FALSE)@copula C[[i, j]] <- tryCatch( copula::fitCopula(startCopula, data, method = "ml", start = startCopula@parameters, estimate.variance = FALSE)@copula, error = function (error) startCopula) # Calculate mutual information. if (is(C[[i, j]], "normalCopula")) { I[i, j] <- -0.5 * log(1 - C[[i, j]]@parameters^2) } else { u <- rcopula(C[[i, j]], 100) I[i, j] <- sum(log(dcopula(C[[i, j]], u))) / 100 } C[[j, i]] <- C[[i, j]]; I[j, i] <- I[i, j] } } # Select a permutation of the variables. perm <- as.vector(arrayInd(which.max(I), dim(I))) copulas <- C[perm[1], perm[2]] I[perm, ] <- -Inf for (k in seq(length = n - 2)) { ik <- which.max(I[ , perm[1]]) perm <- c(ik, perm) copulas <- c(C[perm[1], perm[2]], copulas) I[ik, ] <- -Inf } list(margins = margins, perm = perm, copulas = copulas) } setMethod("edaLearn", "CopulaMIMIC", edaLearnCopulaMIMIC) edaSampleCopulaMIMIC <- function (eda, gen, model, lower, upper) { popSize <- eda@parameters$popSize margin <- eda@parameters$margin if (is.null(popSize)) popSize <- 100 if (is.null(margin)) margin <- "betamargin" qmargin <- get(paste("q", margin, sep = "")) n <- length(model$margins) perm <- model$perm copulas <- model$copulas # Simulate the chain structure with the copulas. uniformPop <- matrix(0, nrow = popSize, ncol = n) uniformPop[ , perm[n]] <- runif(popSize) for (k in seq(from = n - 1, to = 1)) { u <- runif(popSize) v <- uniformPop[ , perm[k + 1]] uniformPop[ , perm[k]] <- vines::hinverse(copulas[[k]], u, v) } # Evaluate the inverse of the marginal distributions. pop <- sapply(seq(length = n), function (i) do.call(qmargin, c(list(uniformPop[ , i]), model$margins[[i]]))) pop } setMethod("edaSample", "CopulaMIMIC", edaSampleCopulaMIMIC) ############################################################################### # 4.3 Performing an empirical study on benchmark problems ############################################################################### umda <- CEDA(copula = "indep", margin = "norm") umda@name <- "UMDA" gceda <- CEDA(copula = "normal", margin = "norm") gceda@name <- "GCEDA" cveda <- VEDA(vine = "CVine", indepTestSigLevel = 0.01, copulas = c("normal"), margin = "norm") cveda@name <- "CVEDA" dveda <- VEDA(vine = "DVine", indepTestSigLevel = 0.01, copulas = c("normal"), margin = "norm") dveda@name <- "DVEDA" copulamimic <- CopulaMIMIC(copula = "normal", margin = "norm") copulamimic@name <- "CopulaMIMIC" edas <- list(umda, gceda, cveda, dveda, copulamimic) fNames <- c("Sphere", "SummationCancellation") experiments <- list() for (eda in edas) { for (fName in fNames) { experiment <- list(eda = eda, fName = fName) experiments <- c(experiments, list(experiment)) } } runExperiment <- function (experiment) { eda <- experiment$eda fName <- experiment$fName # Objective function parameters. fInfo <- list( Sphere = list(lower = -600, upper = 600, fEval = 0), SummationCancellation = list(lower = -0.16, upper = 0.16, fEval = -1e5)) lower <- rep(fInfo[[fName]]$lower, 10) upper <- rep(fInfo[[fName]]$upper, 10) f <- get(paste("f", fName, sep = "")) # Configure termination criteria and disable reporting. eda@parameters$fEval <- fInfo[[fName]]$fEval eda@parameters$fEvalTol <- 1e-6 eda@parameters$fEvalStdDev <- 1e-8 eda@parameters$maxEvals <- 300000 setMethod("edaTerminate", "EDA", edaTerminateCombined(edaTerminateEval, edaTerminateMaxEvals, edaTerminateEvalStdDev)) setMethod("edaReport", "EDA", edaReportDisabled) sink(paste(eda@name, "_", fName, ".txt", sep = "")) # Determine the critical population size. set.seed(12345) results <- edaCriticalPopSize(eda, f, lower, upper, eda@parameters$fEval, eda@parameters$fEvalTol, lowerPop = 50, upperPop = 2000, totalRuns = 30, successRuns = 30, stopPercent = 10, verbose = TRUE) if (is.null(results)) { # Run the experiment with the largest population size, if the # critical population size was not found. eda@parameters$popSize <- 2000 set.seed(12345) edaIndepRuns(eda, f, lower, upper, runs = 30, verbose = TRUE) } sink(NULL) } for (experiment in experiments) runExperiment(experiment) ############################################################################### # 4.4 Solution of the molecular docking problem ############################################################################### system("R CMD SHLIB docking.c") dyn.load(paste("docking", .Platform$dynlib.ext, sep = "")) .C("docking_load", as.character("2z5u.dat")) lower <- c(.C("docking_xlo", out=as.double(0))$out, .C("docking_ylo", out=as.double(0))$out, .C("docking_zlo", out=as.double(0))$out, 0, -pi/2, -pi, rep(-pi, .C("docking_ntor", out=as.integer(0))$out)) upper <- c(.C("docking_xhi", out=as.double(0))$out, .C("docking_yhi", out=as.double(0))$out, .C("docking_zhi", out=as.double(0))$out, 2*pi, pi/2, pi, rep(pi, .C("docking_ntor", out=as.integer(0))$out)) fDocking <- function (sol) { .C("docking_score", sol=as.double(sol), out=as.double(0))$out } setMethod("edaTerminate", "EDA", edaTerminateMaxGen) # Auxiliar function to print information about the vines (to generate # Figure 2 and the statistics about the number of trees in the vine). setMethod("edaReport", "VEDA", function (eda, gen, fEvals, model, pop, popEval) { sink(sprintf("%s.csv", eda@parameters$vine), append = TRUE) if (is.null(model)) { cat(gen, 0, 0, sep = ",") } else { normalCopulas <- sum(sapply(model$vine@copulas, function (copula) is(copula, "normalCopula"))) cat(gen, model$vine@trees, normalCopulas, sep = ",") } cat("\n") sink(NULL) }) cveda <- VEDA(vine = "CVine", indepTestSigLevel = 0.01, copulas = c("normal"), margin = "truncnorm", popSize = 1400, maxGen = 100) dveda <- VEDA(vine = "DVine", indepTestSigLevel = 0.01, copulas = c("normal"), margin = "truncnorm", popSize = 1200, maxGen = 100) set.seed(12345) cvedaResults <- edaIndepRuns(cveda, fDocking, lower, upper, runs = 30) summary(cvedaResults) set.seed(12345) dvedaResults <- edaIndepRuns(dveda, fDocking, lower, upper, runs = 30) summary(dvedaResults) # Results given in Table 4. printMeanResults <- function (edaResults) { bestEval <- numeric(length(edaResults)) rmsd <- numeric(length(edaResults)) for (i in seq(along = edaResults)) { bestEval[i] <- edaResults[[i]]@bestEval rmsd[i] <- .C("docking_rmsd", sol=as.double(edaResults[[i]]@bestSol), out=as.double(0))$out } cat(mean(bestEval), sd(bestEval), mean(rmsd), sd(rmsd), "\n") } printMeanResults(cvedaResults) printMeanResults(dvedaResults) # Plot Figure 2. library("lattice") library("tikzDevice") fig2data <- data.frame( Algorithm = c(rep("CVEDA", 100), rep("DVEDA", 100)), Generation = rep(seq(from = 1, to = 100), 2), Normal = numeric(200)) cvineData <- read.csv("CVine.csv") dvineData <- read.csv("DVine.csv") for (gen in seq(from = 1, to = 100)) { fig2data$Normal[gen] <- mean(cvineData[cvineData[ , 1] == gen, 3]) fig2data$Normal[100 + gen] <- mean(dvineData[dvineData[ , 1] == gen, 3]) } tikz(file = "normal-copulas.tex", width = 5, height = 5) xyplot(Normal ~ Generation, fig2data, groups = Algorithm, col.symbol = "black", pch = c(2, 1), key = list(space = "top", columns = 2, text = list(c("DVEDA", "CVEDA")), points = list(pch = c(1, 2), col = "black")), scales = list(x = list(at = c(0, 20, 40, 60, 80, 100), labels = c("1", "20", "40", "60", "80", "100"))), xlab = "Generation", ylab = "Average Number of Normal Copulas") dev.off() # Median number of vine trees fitted in the 30 independent runs. cvineData <- read.csv("CVine.csv") cat(median(cvineData[ , 2])) dvineData <- read.csv("DVine.csv") cat(median(dvineData[ , 2]))