############################################################################### ### BatchJobs ############################################################################### # Create and switch to a temporary directory # -> Do not clutter the file system with files generated by these examples # -> Note that for batch execution (not using the interactive default mode), # you need to adjust the working directory to a shared directory accessible # on all computational nodes path <- tempfile("example-") dir.create(path) setwd(path) ############################################################################### ### Introducing example ############################################################################### library("BatchJobs") library("soobench") # Create registry object reg <- makeRegistry(id = "optimexample", file.dir = "optimexample-files", packages = "soobench") # Definition of function to apply and its parameters starts <- replicate(10, runif(5, min = -5, max = 5), simplify = FALSE) myoptim <- function(start) optim(par = start, fn = rosenbrock_function(5), method = "SANN") # Define jobs using batchMap batchMap(reg, myoptim, starts) # Submit jobs, per default all jobs not yet submitted submitJobs(reg) # Wait for jobs to terminate waitForJobs(reg) # Reduce results into a vector reduceResultsVector(reg, fun = function(job, res) res$value) ############################################################################### ### Status overview ############################################################################### showStatus(reg) ############################################################################### ### Reduction ############################################################################### # Create new registry reg <- makeRegistry(id = "reduceexample") # Define jobs by mapping an anonymous function over the vector 1:3 batchMap(reg, function(x) rnorm(2), 1:3) # Submit jobs and wait for their termination submitJobs(reg) waitForJobs(reg) # Reduce results as element-wise sums of partial results reduceResults(reg, fun = function(aggr, job, res) aggr + res) # cbind results into a matrix reduceResults(reg, fun = function(aggr, job, res) cbind(aggr, res)) reduceResultsMatrix(reg, rows = FALSE) ############################################################################### ### Debugging example ############################################################################### # Squares the input or throws an error flakeyFunction = function(value) { if (value %in% 2:3) stop("Ooops.") value^2 } # Create new registry reg <- makeRegistry(id = "error") # Define jobs by mapping over the vector 1:4 batchMap(reg, flakeyFunction, 1:4) # Submit jobs submitJobs(reg) # Wait for jobs, then print the status summary waitForJobs(reg) showStatus(reg) # Extract job IDs of jobs which terminated with an error failed <- findErrors(reg) # This would open a pager displaying the log file of the first failed job # showLog(reg, failed[1]) # Patch / overwrite function with corrected version only for failed IDs setJobFunction(reg, failed, fun = function(value) value^2) # Resubmit failed jobs submitJobs(reg, failed) # print status summary showStatus(reg) ############################################################################### ### Template for .BatchJobs.R configuration file ############################################################################### # This will not work without customization if (FALSE) { cluster.functions <- makeClusterFunctionsTorque("~/torque.tmpl") mail.start <- "first"; mail.done <- "last"; mail.error <- "all" mail.from <- "" mail.to <- "" mail.control <- list(smtpServer = "mx.uni.edu") } ############################################################################### ### BatchExperiments iris example ############################################################################### # Load package and create a experiment registry library("BatchExperiments") reg <- makeExperimentRegistry(id = "iris_example") # Dynamic problem function (subsampling) subsample <- function(static, ratio) { n <- nrow(static) train <- sample(n, floor(n * ratio)) test <- setdiff(seq(n), train) list(test = test, train = train) } # Attach data (used as static problem part) data("iris", package = "datasets") # Add problem to the registry with id "iris" addProblem(reg, id = "iris", static = iris, dynamic = subsample, seed = 123) # First algorithm: classification tree # Returns a confusion matrix tree.wrapper <- function(static, dynamic, ...) { library("rpart") mod <- rpart(Species ~ ., data = static[dynamic$train, ], ...) pred <- predict(mod, newdata = static[dynamic$test, ], type = "class") table(static$Species[dynamic$test], pred) } # Add algorithm to the registry with id "tree" addAlgorithm(reg, id = "tree", fun = tree.wrapper) # Second algorithm: classification random forest # Returns a confusion matrix forest.wrapper <- function(static, dynamic, ...) { library("randomForest") mod <- randomForest(Species ~ ., data = static, subset = dynamic$train, ...) pred <- predict(mod, newdata = static[dynamic$test, ]) table(static$Species[dynamic$test], pred) } # Add algorithm to the registry with id "forest" addAlgorithm(reg, id = "forest", fun = forest.wrapper) # Generate design for problem "iris" pars <- list(ratio = c(0.67, 0.9)) iris.design <- makeDesign("iris", exhaustive = pars) # Generate design for algorithm "tree" pars <- list(minsplit = c(5, 10, 20), cp = c(0.01, 0.1)) tree.design <- makeDesign("tree", exhaustive = pars) # Generate design for algorithm "forest" pars <- list(ntree = c(100, 500, 1000)) forest.design <- makeDesign("forest", exhaustive = pars) # Define and add experiments using problem and algorithm designs # Each experiment will get replicated 50 times addExperiments(reg, repls = 50, prob.designs = iris.design, algo.designs = list(tree.design, forest.design)) # Print a quick summary of defined experiments summarizeExperiments(reg) # Extract two experiment IDs for testing: # (1) The first experiment where the algorithm "tree" is used # (2) The first experiment where the algorithm "forest" is used and the # parameter "ntree" equals 1000 id1 <- findExperiments(reg, algo.pattern = "tree")[1] id2 <- findExperiments(reg, algo.pattern = "forest", algo.pars=(ntree == 1000))[1] # Test both jobs in a spawned R process, # All output is directed to the current R session testJob(reg, id1) testJob(reg, id2) # Example on how to query experiment IDs and filter them # depending on their computational state # (we have not submitted anything yet, ids2 will be an empty integer vector) ids1 <- findExperiments(reg, algo.pattern = "tree") ids2 <- findRunning(reg) killJobs(reg, intersect(id1, id2), FALSE) # We will now submit the jobs, note that this may take some time submitJobs(reg) # Reduction function which calculates the misclassification rate from the # confusion matrices reduce <- function(job, res) { n <- sum(res) list(mcr = (n - sum(diag(res))) / n) } # Run the reduction res <- reduceResultsExperiments(reg, fun = reduce) # Print head and tail of the resulting data frame print(res[c(1:2, 899:900), ]) # Aggregate over replications for sets of problem, algorithm and parameters library("plyr") vars <- setdiff(names(res), c("id", "repl", "mcr")) print(head(ddply(res, vars, summarise, mean.mcr = mean(mcr))))