# Replication Script

## Python setup

### Install NUBO and other packages

# It is recommend to create a virtual environment before installing the following
# packages. However, it is not required.

# for version from the JSS paper
# pip install nubopy==1.2.1

# for latest version
!pip install nubopy

# (in case needed) also install botorch, gpytorch, torch and pygpgo
!pip install botorch gpytorch torch
!pip install ConfigSpace smac
!pip install bayesian-optimization
!pip install pygpgo


### List of all used packages

!pip freeze


### Parameters for plotting

import matplotlib.pyplot as plt

def add_annotation(ax, fig, x, y, label):
    """Add annotations such as A), B) and C) to subplots of a figure."""
    ax.text(x, y, label, transform = fig.transFigure,
            size = 12, weight = "bold", ha = "left", va = "top")

# specify font used in plotting
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "Arial"
plt.rc("font", size = 9)          # controls default text sizes
plt.rc("axes", titlesize = 9)     # fontsize of the axes title
plt.rc("axes", labelsize = 9)     # fontsize of the x and y labels
plt.rc("xtick", labelsize = 9)    # fontsize of the tick labels
plt.rc("ytick", labelsize = 9)    # fontsize of the tick labels
plt.rc("legend", fontsize = 9)    # legend fontsize
plt.rc("figure", titlesize = 9)   # fontsize of the figure title


## 1. Introduction

### Comparison to other packages

# NUBO (single-point)

def nubo(N, n0, f, lower_bounds, upper_bounds):

    import torch
    import numpy as np
    import time
    from nubo.acquisition import UpperConfidenceBound
    from nubo.models import GaussianProcess, fit_gp
    from nubo.optimisation import single
    from nubo.utils import gen_inputs
    from gpytorch.likelihoods import GaussianLikelihood

    start_time = time.time()

    bounds = np.vstack([lower_bounds, upper_bounds])
    bounds = torch.from_numpy(bounds)

    x_train = gen_inputs(num_points=n0,
                         num_dims=bounds.size(1),
                         bounds=bounds)

    y_train = torch.zeros(n0, dtype=torch.double)
    for i in range(n0):
        y_train[i] = f(*x_train[i, :].numpy())

    for iter in range(N-n0):

        # specify Gaussian process
        likelihood = GaussianLikelihood()
        gp = GaussianProcess(x_train, y_train, likelihood=likelihood)

        # fit Gaussian process
        fit_gp(x_train, y_train, gp = gp, likelihood = likelihood,
               lr = 0.1, steps = 200)

        # specify acquisition function
        acq = UpperConfidenceBound(gp = gp, beta = 4)

        # optimise acquisition function
        x_new, _ = single(func = acq,
                          method = "L-BFGS-B",
                          bounds = bounds,
                          num_starts = 10,
                          num_samples = 100)

        # evaluate new point
        y_new = f(*x_new.reshape(-1).detach().numpy())

        # add to data
        x_train = torch.vstack((x_train, x_new))
        y_train = torch.hstack((y_train, torch.tensor(y_new)))

        elapsed_time = time.time() - start_time

    return torch.hstack([x_train, y_train.reshape(-1, 1)]).numpy(), elapsed_time


# NUBO (multi-point)

def nubo_multi(N, n0, batch_size, f, lower_bounds, upper_bounds):

    import torch
    import numpy as np
    import time
    from nubo.acquisition import MCUpperConfidenceBound
    from nubo.models import GaussianProcess, fit_gp
    from nubo.optimisation import multi_sequential
    from nubo.utils import gen_inputs
    from gpytorch.likelihoods import GaussianLikelihood

    start_time = time.time()

    bounds = np.vstack([lower_bounds, upper_bounds])
    bounds = torch.from_numpy(bounds)

    x_train = gen_inputs(num_points = n0,
                         num_dims = bounds.size(1),
                         bounds = bounds)

    y_train = torch.zeros(n0, dtype = torch.double)
    for i in range(n0):
        y_train[i] = f(*x_train[i, :].numpy())

    for iter in range(int((N-n0)/batch_size)):
      
        # specify Gaussian process
        likelihood = GaussianLikelihood()
        gp = GaussianProcess(x_train, y_train, likelihood = likelihood)

        # fit Gaussian process
        fit_gp(x_train, y_train, gp = gp, likelihood = likelihood,
               lr = 0.1, steps = 200)

        # specify acquisition function
        acq = MCUpperConfidenceBound(gp = gp, beta = 4, samples = 64,
                                     fix_base_samples = True)

        # optimise acquisition function
        x_new, _ = multi_sequential(func = acq,
                                    method = "L-BFGS-B",
                                    batch_size = batch_size,
                                    bounds = bounds,
                                    num_starts = 10,
                                    num_samples = 100)

        # evaluate new points
        y_new = torch.zeros(batch_size)
        for i in range(batch_size):
          y_new[i] = f(*x_new[i, :].detach().numpy())

        # add to data
        x_train = torch.vstack((x_train, x_new))
        y_train = torch.hstack((y_train, y_new))

        elapsed_time = time.time() - start_time

    return torch.hstack([x_train, y_train.reshape(-1, 1)]).numpy(), elapsed_time


# BoTorch (single-point)

def botorch(N, n0, f, lower_bounds, upper_bounds):

    import torch
    import numpy as np
    import time
    from botorch.fit import fit_gpytorch_mll
    from botorch.utils.transforms import unnormalize, standardize
    from gpytorch.mlls import ExactMarginalLogLikelihood
    from botorch.optim import optimize_acqf
    from botorch.acquisition import UpperConfidenceBound
    from botorch.models.gp_regression import SingleTaskGP

    start_time = time.time()

    dims = len(lower_bounds)

    bounds = np.vstack([lower_bounds, upper_bounds])
    bounds = torch.from_numpy(bounds)
    opt_bounds = torch.vstack([torch.zeros((1, dims)),
                               torch.ones((1, dims))])

    x_train = torch.rand(n0, dims, dtype = torch.double)

    y_train = torch.zeros(n0, dtype = torch.double)
    x_temp = unnormalize(x_train, bounds)
    for i in range(x_train.size(0)):
        y_train[i] = f(*x_temp.numpy()[i, :])

    for i in range(N-n0):

        gp = SingleTaskGP(x_train, standardize(y_train).unsqueeze(-1))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_mll(mll)

        ucb = UpperConfidenceBound(gp, beta = 4)

        x_new, acq_value = optimize_acqf(
            ucb, bounds = opt_bounds, q = 1, num_restarts = 10, raw_samples = 100,
        )

        # evaluate new point
        x_temp = unnormalize(x_new, bounds)
        y_new = f(*x_temp.reshape(-1).detach().numpy())

        # add to data
        x_train = torch.vstack((x_train, x_new))
        y_train = torch.hstack((y_train, torch.tensor(y_new)))

        elapsed_time = time.time() - start_time

    return torch.hstack([x_train, y_train.reshape(-1, 1)]).numpy(), elapsed_time


# BoTorch (multi-point)

def botorch_multi(N, n0, batch_size, f, lower_bounds, upper_bounds):

    import torch
    import numpy as np
    import time
    from botorch.fit import fit_gpytorch_mll
    from botorch.utils.transforms import unnormalize, standardize
    from gpytorch.mlls import ExactMarginalLogLikelihood
    from botorch.optim import optimize_acqf
    from botorch.models.gp_regression import SingleTaskGP
    from botorch.acquisition import qUpperConfidenceBound
    from botorch.sampling import  SobolQMCNormalSampler

    start_time = time.time()

    dims = len(lower_bounds)

    bounds = np.vstack([lower_bounds, upper_bounds])
    bounds = torch.from_numpy(bounds)
    opt_bounds = torch.vstack([torch.zeros((1, dims)),
                               torch.ones((1, dims))])

    x_train = torch.rand(n0, dims, dtype = torch.double)

    y_train = torch.zeros(n0, dtype = torch.double)
    x_temp = unnormalize(x_train, bounds)
    for i in range(n0):
        y_train[i] = f(*x_temp.numpy()[i, :])

    for i in range(int((N-n0)/batch_size)):

        gp = SingleTaskGP(x_train, standardize(y_train).unsqueeze(-1))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_mll(mll)

        sampler = SobolQMCNormalSampler(sample_shape = torch.Size([64]))
        ei = qUpperConfidenceBound(gp, beta = 4, sampler = sampler)

        x_new, acq_value = optimize_acqf(
            ei, bounds = opt_bounds, q = batch_size, num_restarts = 10,
            raw_samples = 100, sequential = True
        )

        # evaluate new point
        x_temp = unnormalize(x_new, bounds)
        y_new = torch.zeros(batch_size)
        for i in range(batch_size):
            y_new[i] = f(*x_temp[i, :].detach().numpy())

        # add to data
        x_train = torch.vstack((x_train, x_new))
        y_train = torch.hstack((y_train, y_new))

        elapsed_time = time.time() - start_time

    return torch.hstack([x_train, y_train.reshape(-1, 1)]).numpy(), elapsed_time


# SMAC3

def smac3(N, n0, f, lower_bounds, upper_bounds):

    import time
    import numpy as np
    import shutil
    from ConfigSpace import ConfigurationSpace
    from smac.facade import BlackBoxFacade
    from smac.scenario import Scenario
    from smac.acquisition.function.confidence_bound import LCB
    from smac.initial_design import LatinHypercubeInitialDesign

    start_time = time.time()

    dims = len(lower_bounds)

    # target function
    def target(config, seed) -> float:
        if dims == 2:
            return -f(config["x1"], config["x2"])
        elif dims == 6:
            return -f(config["x1"], config["x2"], config["x3"], config["x4"],
                      config["x5"], config["x6"])

    # make parameter bounds
    bounds = {}
    for d in range(dims):
        bounds[f"x{d+1}"] = (lower_bounds[d], upper_bounds[d])
    configspace = ConfigurationSpace(bounds)

    # Scenario object specifying the optimization environment
    scenario = Scenario(configspace, deterministic = True, n_trials = N)

    # Use SMAC to find the best configuration/hyperparameters
    seed = np.random.randint(0, 1000000, 1)
    inits = LatinHypercubeInitialDesign(scenario, n_configs = n0, max_ratio = 0.5,
                                        seed = int(seed))
    lcb = LCB(beta = 4)
    smac = BlackBoxFacade(scenario,
                          target,
                          initial_design = inits,
                          acquisition_function = lcb,
                          logging_level = 50)
    incumbent = smac.optimize()

    res = np.zeros((N, dims+1))
    for n in range(N):
        config = smac._optimizer._runhistory.get_config(n+1)
        for d in range(dims):
            res[n, d] = config._values[f"x{d+1}"]
        res[n, -1] = -smac._optimizer._runhistory.get_cost(config)

    elapsed_time = time.time() - start_time

    # Remove SMAC3 outputs
    shutil.rmtree("smac3_output/")

    return res, elapsed_time


# bayes_opt (bayesian-optimization)

def bayes_opt(N, n0, f, lower_bounds, upper_bounds):

    import numpy as np
    import time
    from bayes_opt import BayesianOptimization
    from bayes_opt import acquisition

    start_time = time.time()

    dims = len(lower_bounds)

    # make parameter bounds
    bounds = {}
    for d in range(dims):
        bounds[f"x{d+1}"] = (lower_bounds[d], upper_bounds[d])

    # Bayesian optimisation
    seed = np.random.randint(0, 1000000, 1)
    # acquisition function (bayes_opt version 2.0.4 as of 18-may-2025)
    acquisition_function = acquisition.UpperConfidenceBound(kappa = np.sqrt(4))
    # acquisition function (bayes_opt version < 2.0.4)
    # acquisition_function = acquisition.UtilityFunction(kind = "ucb",
    #                        kappa = np.sqrt(4))
    optimizer = BayesianOptimization(f = f,
                                     pbounds = bounds,
                                     verbose = 0,
                                     random_state = int(seed),
                                     acquisition_function = acquisition_function)

    # optimise (bayes_opt version 2.0.4 as of 18-may-2025)
    optimizer.maximize(init_points = n0, n_iter = N-n0)
    # optimise (bayes_opt version < 2.0.4)
    # optimizer.maximize(init_points = n0, n_iter = N-n0,
    #                    acquisition_function = acquisition_function)
    # !!! Note: acquisition function is no longer in maximize in version 2.0.4
    # and has to be passed to the optimizer instead

    res = np.zeros((N, len(lower_bounds) + 1))
    for i in range(N):
        for d in range(dims):
            res[i, d] = optimizer.res[i]["params"][f"x{d+1}"]
        res[i, -1] = optimizer.res[i]["target"]

    elapsed_time = time.time() - start_time

    return res, elapsed_time


# pyGPGO

def pygpgo(N, n0, f, lower_bounds, upper_bounds):

    import numpy as np
    import time
    from pyGPGO.covfunc import matern52
    from pyGPGO.acquisition import Acquisition
    from pyGPGO.surrogates.GaussianProcess import GaussianProcess
    from pyGPGO.GPGO import GPGO

    start_time = time.time()

    dims = len(lower_bounds)

    cov = matern52()
    gp = GaussianProcess(cov)
    acq = Acquisition(mode = "UCB", beta = np.sqrt(4))

    # make parameter bounds
    bounds = {}
    for d in range(dims):
        bounds[f"x{d+1}"] = ("cont", [lower_bounds[d], upper_bounds[d]])

    gpgo = GPGO(gp, acq, f, bounds)
    gpgo.run(init_evals = n0, max_iter = N-n0)

    res = np.hstack([gpgo.GP.X, gpgo.GP.y.reshape((-1, 1))])

    elapsed_time = time.time() - start_time

    return res, elapsed_time


#### Simulations on test functions

# Function to compute performance line

import torch
import numpy as np

def perf_line(filename, N, runs):
    """Make performance line for plotting."""
    res = np.zeros((runs, N))
    for run in range(runs):
        res[run, :] = np.loadtxt(f"{filename}_{run}.csv",
                                 skiprows = 1, delimiter = ",")[:, -1]
    res = np.maximum.accumulate(res, axis = 1)
    means = np.mean(res, axis = 0)
    stds = np.std(res, axis = 0)
    return means, stds


# Uncomment to create the directory if it doesn't exist
# import os
# os.makedirs("../Tables/", exist_ok = True)


# 2D Levy (single-point)

# pyGPGO and smac3 are suppressed from the replication codes.
# In case, pyrfr can be successfully build simply uncomment the respective lines
# see also:
# https://open-box.readthedocs.io/en/latest/installation/install_pyrfr.html

# set seeds for reproducibility
np.random.seed(1)
torch.manual_seed(1)
 
# def levy(x1, x2):
#     w1 = 1.0 + (x1 - 1.0)/4.0
#     w2 = 1.0 + (x2 - 1.0)/4.0
#     term_1 = np.sin(np.pi * w1)**2
#     term_2 = np.sum((w1 - 1.0)**2 * (1.0 + 10.0 * np.sin(np.pi * w1 + 1.0)**2),
#                     axis = -1)
#     term_3 = (w2 - 1.0)**2 * (1.0 + np.sin(2.0 * np.pi * w2)**2)
#     y = term_1 + term_2 + term_3
#     return -y
# 
# # specify problem
# N = 30
# n0 = 10
# f = levy
# lower_bounds = np.array([-10.0, -10.0])
# upper_bounds = np.array([10.0, 10.0])
# runs = 2
# 
# times = np.zeros((10, 5))
# for run in range(runs):
#     # run comparisons
#     res_nubo, times[run, 0] = nubo(N = N, n0 = n0, f=f,
#                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
#     res_botorch, times[run, 1] = botorch(N = N, n0 = n0, f = f,
#                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
#     res_bayes_opt, times[run, 3] = bayes_opt(N = N, n0 = n0, f = f,
#                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
#     #res_smac3, times[run, 2] = smac3(N = N, n0 = n0, f = f,
#     #                 lower_bounds = lower_bounds, upper_bounds = upper_bounds)
#     #res_pygpgo, times[run, 4] = pygpgo(N = N, n0 = n0, f = f,
#     #                 lower_bounds = lower_bounds, upper_bounds = upper_bounds)
# 
#     # save results
#     np.savetxt(f"../Tables/levy_times.csv", times, delimiter = ",",
#                header = "NUBO,BoTorch,bayes_opt", comments="")
#     #np.savetxt(f"../Tables/levy_times.csv", times, delimiter = ",",
#     #          header = "NUBO,BoTorch,SMAC3,bayes_opt,pyGPGO", comments="")
#     np.savetxt(f"../Tables/levy_nubo_{run}.csv", res_nubo, delimiter = ",",
#                header = "x1,x2,y", comments = "")
#     np.savetxt(f"../Tables/levy_botorch_{run}.csv", res_botorch, delimiter = ",",
#                header = "x1,x2,y", comments = "")
#     np.savetxt(f"../Tables/levy_bayes_opt_{run}.csv", res_bayes_opt, delimiter = ",",
#                header = "x1,x2,y", comments = "")
#     #np.savetxt(f"../Tables/levy_smac3_{run}.csv", res_smac3, delimiter = ",",
#     #          header = "x1,x2,y", comments = "")
#     #np.savetxt(f"../Tables/levy_pygpgo_{run}.csv", res_pygpgo, delimiter = ",",
#     #          header = "x1,x2,y", comments = "")
# 
# # performance
# nubo_means, nubo_stds = perf_line("../Tables/levy_nubo", N, runs)
# botorch_means, botorch_stds = perf_line("../Tables/levy_botorch", N, runs)
# bayes_opt_means, bayes_opt_stds = perf_line("../Tables/levy_bayes_opt", N, runs)
# #smac3_means, smac3_stds = perf_line("../Tables/levy_smac3", N, runs)
# #pygpgo_means, pygpgo_stds = perf_line("../Tables/levy_pygpgo", N, runs)
# 
# # tables
# means = np.transpose(np.vstack([nubo_means, botorch_means, bayes_opt_means]))
# stds = np.transpose(np.vstack([nubo_stds, botorch_stds, bayes_opt_stds]))
# np.savetxt("../Tables/levy_means.csv", means, delimiter = ",",
#           header = "NUBO,BoTorch,bayes_opt", comments = "")
# np.savetxt("../Tables/levy_stds.csv", stds, delimiter = ",",
#           header = "NUBO,BoTorch,bayes_opt", comments = "")
# #means = np.transpose(np.vstack([nubo_means, botorch_means, smac3_means,
# #         bayes_opt_means, pygpgo_means]))
# #stds = np.transpose(np.vstack([nubo_stds, botorch_stds, smac3_stds,
# #         bayes_opt_stds, pygpgo_stds]))
# #np.savetxt("../Tables/levy_means.csv", means, delimiter = ",",
# #         header = "NUBO,BoTorch,SMAC3,bayes_opt,pyGPGO", comments = "")
# #np.savetxt("../Tables/levy_stds.csv", stds, delimiter = ",",
# #         header = "NUBO,BoTorch,SMAC3,bayes_opt,pyGPGO", comments = "")


# Without pyGPGO and smac3

# set seeds for reproducibility
np.random.seed(1)
torch.manual_seed(1)
# 
# def levy(x1, x2):
#     w1 = 1.0 + (x1 - 1.0)/4.0
#     w2 = 1.0 + (x2 - 1.0)/4.0
#     term_1 = np.sin(np.pi * w1)**2
#     term_2 = np.sum((w1 - 1.0)**2 * (1.0 + 10.0 * np.sin(np.pi * w1 + 1.0)**2),
#                     axis=-1)
#     term_3 = (w2 - 1.0)**2 * (1.0 + np.sin(2.0 * np.pi * w2)**2)
#     y = term_1 + term_2 + term_3
#     return -y
# 
# # specify problem
# N = 30
# n0 = 10
# f = levy
# lower_bounds = np.array([-10.0, -10.0])
# upper_bounds = np.array([10.0, 10.0])
# runs = 10
# 
# times = np.zeros((10, 3))
# for run in range(runs):
#     # run comparisons
#     res_nubo, times[run, 0] = nubo(N = N, n0 = n0, f = f,
#                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
#     res_botorch, times[run, 1] = botorch(N = N, n0 = n0, f = f,
#                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
#     res_bayes_opt, times[run, 2] = bayes_opt(N = N, n0 = n0, f = f,
#                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
# 
#     # save results
#     np.savetxt(f"../Tables/levy_times.csv", times, delimiter = ",",
#                header = "NUBO,BoTorch,bayes_opt", comments = "")
#     np.savetxt(f"../Tables/levy_nubo_{run}.csv", res_nubo, delimiter = ",",
#                header = "x1,x2,y", comments = "")
#     np.savetxt(f"../Tables/levy_botorch_{run}.csv", res_botorch,
#                delimiter = ",", header = "x1,x2,y", comments = "")
#     np.savetxt(f"../Tables/levy_bayes_opt_{run}.csv", res_bayes_opt,
#                delimiter = ",", header = "x1,x2,y", comments = "")
# 
# # performance
# nubo_means, nubo_stds = perf_line("../Tables/levy_nubo", N, runs)
# botorch_means, botorch_stds = perf_line("../Tables/levy_botorch", N, runs)
# bayes_opt_means, bayes_opt_stds = perf_line("../Tables/levy_bayes_opt", N, runs)
# 
# # tables
# means = np.transpose(np.vstack([nubo_means, botorch_means, bayes_opt_means]))
# stds = np.transpose(np.vstack([nubo_stds, botorch_stds, bayes_opt_stds]))
# np.savetxt("../Tables/levy_means.csv", means,
#            delimiter = ",", header = "NUBO,BoTorch,bayes_opt", comments = "")
# np.savetxt("../Tables/levy_stds.csv", stds,
#            delimiter = ",", header = "NUBO,BoTorch,bayes_opt", comments = "")


# 6D Hartmann (single-point)

# Suppressed pyGPGO and smac3 codes

# set seeds for reproducibility
np.random.seed(1)
torch.manual_seed(1)

def hartmann(x1, x2, x3, x4, x5, x6):

     a = np.array([1.0, 1.2, 3.0, 3.2])
     A = np.array([[10.0,  3.0, 17.0,  3.5,  1.7,  8.0],
                   [0.05, 10.0, 17.0,  0.1,  8.0, 14.0],
                   [ 3.0,  3.5,  1.7, 10.0, 17.0,  8.0],
                   [17.0,  8.0, 0.05, 10.0,  0.1, 14.0]])
     P = 10**-4 * np.array([[1312.0, 1696.0, 5569.0,  124.0, 8283.0, 5886.0],
                            [2329.0, 4135.0, 8307.0, 3736.0, 1004.0, 9991.0],
                            [2348.0, 1451.0, 3522.0, 2883.0, 3047.0, 6650.0],
                            [4047.0, 8828.0, 8732.0, 5743.0, 1091.0,  381.0]])
 
     x = np.array([x1, x2, x3, x4, x5, x6]).reshape((1, 6))
 
     y = -np.sum(a * np.exp(-np.sum(A * (x - P)**2, axis = -1)), axis = -1)
 
     return -y
 
# specify problem
N = 60
n0 = 10
f = hartmann
lower_bounds = np.zeros(6)
upper_bounds = np.ones(6)
runs = 10

times = np.zeros((10, 5))
for run in range(runs):
     # run comparisons
     res_nubo, times[run, 0] = nubo(N = N, n0 = n0, f = f,
                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
     res_botorch, times[run, 1] = botorch(N = N, n0 = n0, f = f,
                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
     res_bayes_opt, times[run, 3] = bayes_opt(N = N, n0 = n0, f = f,
                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
     #res_smac3, times[run, 2] = smac3(N = N, n0 = n0, f = f,
     #                 lower_bounds = lower_bounds, upper_bounds = upper_bounds)
     #res_pygpgo, times[run, 4] = pygpgo(N = N, n0 = n0, f = f,
     #                 lower_bounds = lower_bounds, upper_bounds = upper_bounds)
 
     # save results
     np.savetxt(f"../Tables/hartmann_times.csv", times, delimiter = ",",
                header="NUBO,BoTorch,bayes_opt", comments = "")
     #np.savetxt(f"../Tables/hartmann_times.csv", times, delimiter = ",",
     #          header="NUBO,BoTorch,SMAC3,bayes_opt,pyGPGO", comments = "")
     np.savetxt(f"../Tables/hartmann_nubo_{run}.csv", res_nubo, delimiter = ",",
                header="x1,x2,x3,x4,x5,x6,y", comments = "")
     np.savetxt(f"../Tables/hartmann_botorch_{run}.csv", res_botorch, delimiter = ",",
                header="x1,x2,x3,x4,x5,x6,y", comments = "")
     np.savetxt(f"../Tables/hartmann_bayes_opt_{run}.csv", res_bayes_opt, delimiter = ",",
                header="x1,x2,x3,x4,x5,x6,y", comments = "")
     #np.savetxt(f"../Tables/hartmann_smac3_{run}.csv", res_smac3, delimiter = ",",
     #          header="x1,x2,x3,x4,x5,x6,y", comments = "")
     #np.savetxt(f"../Tables/hartmann_pygpgo_{run}.csv", res_pygpgo, delimiter = ",",
     #          header="x1,x2,x3,x4,x5,x6,y", comments = "")
 
# performance
nubo_means, nubo_stds = perf_line("../Tables/hartmann_nubo", N, runs)
botorch_means, botorch_stds = perf_line("../Tables/hartmann_botorch", N, runs)
bayes_opt_means, bayes_opt_stds = perf_line("../Tables/hartmann_bayes_opt", N, runs)
#smac3_means, smac3_stds = perf_line("../Tables/hartmann_smac3", N, runs)
#pygpgo_means, pygpgo_stds = perf_line("../Tables/hartmann_pygpgo", N, runs)
 
# tables
means = np.transpose(np.vstack([nubo_means, botorch_means, bayes_opt_means]))
stds = np.transpose(np.vstack([nubo_stds, botorch_stds, bayes_opt_stds]))
np.savetxt("../Tables/hartmann_means.csv", means, delimiter = ",",
         header = "NUBO,BoTorch,bayes_opt", comments = "")
np.savetxt("../Tables/hartmann_stds.csv", stds, delimiter = ",",
         header = "NUBO,BoTorch,bayes_opt", comments = "")
#means = np.transpose(np.vstack([nubo_means, botorch_means, smac3_means,
#         bayes_opt_means, pygpgo_means]))
#stds = np.transpose(np.vstack([nubo_stds, botorch_stds, smac3_stds,
#         bayes_opt_stds, pygpgo_stds]))
#np.savetxt("../Tables/hartmann_means.csv", means, delimiter = ",",
#         header = "NUBO,BoTorch,SMAC3,bayes_opt,pyGPGO", comments = "")
#np.savetxt("../Tables/hartmann_stds.csv", stds, delimiter = ",",
#         header = "NUBO,BoTorch,SMAC3,bayes_opt,pyGPGO", comments = "")


# Without pyGPGO and smac3

# set seeds for reproducibility
np.random.seed(1)
torch.manual_seed(1)
 
def hartmann(x1, x2, x3, x4, x5, x6):
 
     a = np.array([1.0, 1.2, 3.0, 3.2])
     A = np.array([[10.0,  3.0, 17.0,  3.5,  1.7,  8.0],
                   [0.05, 10.0, 17.0,  0.1,  8.0, 14.0],
                   [ 3.0,  3.5,  1.7, 10.0, 17.0,  8.0],
                   [17.0,  8.0, 0.05, 10.0,  0.1, 14.0]])
     P = 10**-4 * np.array([[1312.0, 1696.0, 5569.0,  124.0, 8283.0, 5886.0],
                            [2329.0, 4135.0, 8307.0, 3736.0, 1004.0, 9991.0],
                            [2348.0, 1451.0, 3522.0, 2883.0, 3047.0, 6650.0],
                            [4047.0, 8828.0, 8732.0, 5743.0, 1091.0,  381.0]])
 
     x = np.array([x1, x2, x3, x4, x5, x6]).reshape((1, 6))
 
     y = -np.sum(a * np.exp(-np.sum(A * (x - P)**2, axis = -1)), axis = -1)
 
     return -y
 
# specify problem
N = 60
n0 = 10
f = hartmann
lower_bounds = np.zeros(6)
upper_bounds = np.ones(6)
runs = 10
 
times = np.zeros((10, 3))
for run in range(runs):
     # run comparisons
     res_nubo, times[run, 0] = nubo(N = N, n0 = n0, f = f,
                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
     res_botorch, times[run, 1] = botorch(N = N, n0 = n0, f = f,
                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
     res_bayes_opt, times[run, 2] = bayes_opt(N = N, n0 = n0, f = f,
                       lower_bounds = lower_bounds, upper_bounds = upper_bounds)
 
     # save results
     np.savetxt(f"../Tables/hartmann_times.csv", times, delimiter = ",",
                header = "NUBO,BoTorch,bayes_opt", comments = "")
     np.savetxt(f"../Tables/hartmann_nubo_{run}.csv", res_nubo, delimiter = ",",
                header = "x1,x2,x3,x4,x5,x6,y", comments = "")
     np.savetxt(f"../Tables/hartmann_botorch_{run}.csv", res_botorch, delimiter = ",",
                header = "x1,x2,x3,x4,x5,x6,y", comments = "")
     np.savetxt(f"../Tables/hartmann_bayes_opt_{run}.csv", res_bayes_opt, delimiter = ",",
                header = "x1,x2,x3,x4,x5,x6,y", comments = "")
 
# performance
nubo_means, nubo_stds = perf_line("../Tables/hartmann_nubo", N, runs)
botorch_means, botorch_stds = perf_line("../Tables/hartmann_botorch", N, runs)
bayes_opt_means, bayes_opt_stds = perf_line("../Tables/hartmann_bayes_opt", N, runs)
 
# tables
means = np.transpose(np.vstack([nubo_means, botorch_means, bayes_opt_means]))
stds = np.transpose(np.vstack([nubo_stds, botorch_stds, bayes_opt_stds]))
np.savetxt("../Tables/hartmann_means.csv", means, delimiter = ",",
           header = "NUBO,BoTorch,bayes_opt", comments = "")
np.savetxt("../Tables/hartmann_stds.csv", stds, delimiter = ",",
           header = "NUBO,BoTorch,bayes_opt", comments = "")


# 2D Levy (multi-point)

# set seeds for reproducibility
np.random.seed(1)
torch.manual_seed(1)

def levy(x1, x2):
    w1 = 1.0 + (x1 - 1.0)/4.0
    w2 = 1.0 + (x2 - 1.0)/4.0
    term_1 = np.sin(np.pi * w1)**2
    term_2 = np.sum((w1 - 1.0)**2 * (1.0 + 10.0 * np.sin(np.pi * w1 + 1.0)**2),
                    axis = -1)
    term_3 = (w2 - 1.0)**2 * (1.0 + np.sin(2.0 * np.pi * w2)**2)
    y = term_1 + term_2 + term_3
    # cast back xi to float64 (from float32)
    # this is not an issue in the hartmann multi-point algorithm
    # since np arrays are already float64
    return np.double(-y)

# specify problem
N = 30
n0 = 10
batch_size = 4
f = levy
lower_bounds = np.array([-10.0, -10.0])
upper_bounds = np.array([10.0, 10.0])
runs = 10

times = np.zeros((10, 2))
for run in range(runs):
    # run comparisons
    res_nubo, times[run, 0] = nubo_multi(N = N, n0 = n0, f = f,
                    batch_size = batch_size, lower_bounds = lower_bounds,
                    upper_bounds = upper_bounds)
    res_botorch, times[run, 0] = botorch_multi(N = N, n0 = n0, f = f,
                    batch_size = batch_size, lower_bounds = lower_bounds,
                    upper_bounds = upper_bounds)

    # save results
    np.savetxt(f"../Tables/levy_parallel_times.csv", times, delimiter = ",",
               header = "NUBO,BoTorch", comments = "")
    np.savetxt(f"../Tables/levy_parallel_nubo_{run}.csv", res_nubo, delimiter = ",",
               header = "x1,x2,y", comments = "")
    np.savetxt(f"../Tables/levy_parallel_botorch_{run}.csv", res_botorch, delimiter = ",",
               header = "x1,x2,y", comments = "")

# performance
nubo_means, nubo_stds = perf_line("../Tables/levy_parallel_nubo", N, runs)
botorch_means, botorch_stds = perf_line("../Tables/levy_parallel_botorch", N, runs)

# tables
means = np.transpose(np.vstack([nubo_means, botorch_means]))
stds = np.transpose(np.vstack([nubo_stds, botorch_stds]))
np.savetxt("../Tables/levy_parallel_means.csv", means, delimiter = ",",
           header = "NUBO,BoTorch", comments = "")
np.savetxt("../Tables/levy_parallel_stds.csv", stds, delimiter = ",",
           header = "NUBO,BoTorch", comments = "")


# 6D Hartmann (multi-point)

# set seeds for reproducibility
np.random.seed(1)
torch.manual_seed(1)

def hartmann(x1, x2, x3, x4, x5, x6):

    a = np.array([1.0, 1.2, 3.0, 3.2])
    A = np.array([[10.0,  3.0, 17.0,  3.5,  1.7,  8.0],
                  [0.05, 10.0, 17.0,  0.1,  8.0, 14.0],
                  [ 3.0,  3.5,  1.7, 10.0, 17.0,  8.0],
                  [17.0,  8.0, 0.05, 10.0,  0.1, 14.0]])
    P = 10**-4 * np.array([[1312.0, 1696.0, 5569.0,  124.0, 8283.0, 5886.0],
                           [2329.0, 4135.0, 8307.0, 3736.0, 1004.0, 9991.0],
                           [2348.0, 1451.0, 3522.0, 2883.0, 3047.0, 6650.0],
                           [4047.0, 8828.0, 8732.0, 5743.0, 1091.0,  381.0]])

    x = np.array([x1, x2, x3, x4, x5, x6]).reshape((1, 6))
    y = -np.sum(a * np.exp(-np.sum(A * (x - P)**2, axis = -1)), axis = -1)

    return -y

# specify problem
N = 100
n0 = 20
batch_size = 4
f = hartmann
lower_bounds = np.zeros(6)
upper_bounds = np.ones(6)
runs = 10

times = np.zeros((10, 2))
for run in range(runs):
    # run comparisons
    res_nubo, times[run, 0] = nubo_multi(N = N, n0 = n0, f = f,
                          batch_size = batch_size, lower_bounds = lower_bounds,
                          upper_bounds = upper_bounds)
    res_botorch, times[run, 1] = botorch_multi(N = N, n0 = n0, f = f,
                          batch_size = batch_size, lower_bounds = lower_bounds,
                          upper_bounds = upper_bounds)

    # save results
    np.savetxt(f"../Tables/hartmann_parallel_times.csv", times, delimiter = ",",
               header = "NUBO,BoTorch", comments = "")
    np.savetxt(f"../Tables/hartmann_parallel_nubo_{run}.csv", res_nubo,
               delimiter = ",", header = "x1,x2,x3,x4,x5,x6,y", comments = "")
    np.savetxt(f"../Tables/hartmann_parallel_botorch_{run}.csv", res_botorch,
               delimiter = ",", header = "x1,x2,x3,x4,x5,x6,y", comments = "")

# performance
nubo_means, nubo_stds = perf_line("../Tables/hartmann_parallel_nubo", N, runs)
botorch_means, botorch_stds = perf_line("../Tables/hartmann_parallel_botorch", N, runs)

# tables
means = np.transpose(np.vstack([nubo_means, botorch_means]))
stds = np.transpose(np.vstack([nubo_stds, botorch_stds]))
np.savetxt("../Tables/hartmann_parallel_means.csv", means, delimiter = ",",
           header = "NUBO,BoTorch", comments = "")
np.savetxt("../Tables/hartmann_parallel_stds.csv", stds, delimiter = ",",
           header = "NUBO,BoTorch", comments = "")


#### Figure 1

# Uncomment to create the directory if it doesn't exist
# import os
# os.makedirs("../Figures/", exist_ok=True)

# make figure
fig, axs = plt.subplots(2, 2, figsize = (6.125, 5))

# plot A
res_A = np.loadtxt("../Tables/levy_means.csv",
                   delimiter = ",", skiprows = 1)
xs = np.linspace(1, res_A.shape[0], res_A.shape[0])
axs[0, 0].plot(xs, res_A[:, 0], c = "tab:blue", label = "NUBO")
axs[0, 0].plot(xs, res_A[:, 1], c = "tab:orange", label = "BoTorch")
axs[0, 0].plot(xs, res_A[:, 2], c = "tab:red", label = "bayes_opt")
#axs[0, 0].plot(xs, res_A[:, 3], c = "tab:green", label = "SMAC3")
#axs[0, 0].plot(xs, res_A[:, 4], c = "tab:purple", label = "pyGPGO")
axs[0, 0].set_xlabel("Evaluation")
axs[0, 0].set_ylabel("Output")

# plot B
res_B = np.loadtxt("../Tables/hartmann_means.csv",
                   delimiter = ",", skiprows = 1)
xs = np.linspace(1, res_B.shape[0], res_B.shape[0])
axs[0, 1].plot(xs, res_B[:, 0], c = "tab:blue")
axs[0, 1].plot(xs, res_B[:, 1], c = "tab:orange")
axs[0, 1].plot(xs, res_B[:, 2], c = "tab:red")
#axs[0, 1].plot(xs, res_B[:, 3], c = "tab:green")
#axs[0, 1].plot(xs, res_B[:, 4], c = "tab:purple")
axs[0, 1].set_xlabel("Evaluation")
axs[0, 1].set_ylabel("Output")

# plot C
res_C = np.loadtxt("../Tables/levy_parallel_means.csv",
                   delimiter = ",", skiprows = 1)
xs = np.linspace(1, res_C.shape[0], res_C.shape[0])
axs[1, 0].plot(xs, res_C[:, 0], c = "tab:blue")
axs[1, 0].plot(xs, res_C[:, 1], c = "tab:orange")
axs[1, 0].set_xlabel("Evaluation")
axs[1, 0].set_ylabel("Output")

# plot D
res_D = np.loadtxt("../Tables/hartmann_parallel_means.csv",
                   delimiter = ",", skiprows = 1)
xs = np.linspace(1, res_D.shape[0], res_D.shape[0])
axs[1, 1].plot(xs, res_D[:, 0], c = "tab:blue")
axs[1, 1].plot(xs, res_D[:, 1], c = "tab:orange")
axs[1, 1].set_xlabel("Evaluation")
axs[1, 1].set_ylabel("Output")

# legend
fig.legend(ncol = 5, loc = "lower center")
plt.tight_layout()
plt.subplots_adjust(bottom = 0.15)

# annotations
x1 = 0.02
x2 = 0.51
y1 = 0.52
y2 = 0.99

add_annotation(axs[0, 0], fig, x1, y2, "A)")
add_annotation(axs[0, 1], fig, x2, y2, "B)")
add_annotation(axs[1, 0], fig, x1, y1, "C)")
add_annotation(axs[1, 1], fig, x2, y1, "D)")

# save figure 
plt.savefig("../Figures/Figure1.pdf")
plt.show()
plt.clf()


#### Comparison tables

import numpy as np

np.set_printoptions(suppress=True)

# means
means_A = np.loadtxt("../Tables/levy_means.csv",
                     delimiter = ",", skiprows = 1)[-1, :]
means_B = np.loadtxt("../Tables/hartmann_means.csv",
                     delimiter = ",", skiprows = 1)[-1, :]
means_C = np.loadtxt("../Tables/levy_parallel_means.csv",
                     delimiter = ",", skiprows = 1)[-1, :]
means_D = np.loadtxt("../Tables/hartmann_parallel_means.csv",
                     delimiter = ",", skiprows = 1)[-1, :]

print("A) means: ", np.round(means_A, 2))
print("B) means: ", np.round(means_B, 2))
print("C) means: ", np.round(means_C, 2))
print("D) means: ", np.round(means_D, 2))

# standard errors
stds_A = np.loadtxt("../Tables/levy_stds.csv",
                    delimiter = ",", skiprows = 1)[-1, :]
stds_B = np.loadtxt("../Tables/hartmann_stds.csv",
                    delimiter = ",", skiprows = 1)[-1, :]
stds_C = np.loadtxt("../Tables/levy_parallel_stds.csv",
                    delimiter = ",", skiprows = 1)[-1, :]
stds_D = np.loadtxt("../Tables/hartmann_parallel_stds.csv",
                    delimiter = ",", skiprows = 1)[-1, :]

print("A) stds: ", np.round(stds_A, 2))
print("B) stds: ", np.round(stds_B, 2))
print("C) stds: ", np.round(stds_C, 2))
print("D) stds: ", np.round(stds_D, 2))

# times
times_A = np.loadtxt("../Tables/levy_times.csv",
                     delimiter = ",", skiprows = 1)
times_B = np.loadtxt("../Tables/hartmann_times.csv",
                     delimiter = ",", skiprows = 1)
times_C = np.loadtxt("../Tables/levy_parallel_times.csv",
                     delimiter = ",", skiprows = 1)
times_D = np.loadtxt("../Tables/hartmann_parallel_times.csv",
                     delimiter = ",", skiprows = 1)

print("A) times: ", np.round(np.mean(times_A/20, axis = 0), 2))
print("B) times: ", np.round(np.mean(times_B/50, axis = 0), 2))
print("C) times: ", np.round(np.mean(times_C/20, axis = 0), 2))
print("D) times: ", np.round(np.mean(times_D/80, axis = 0), 2))


## 2. Bayesian optimisation

### Figure 2

# imports for Bayesian optimisation
import torch
from nubo.acquisition import UpperConfidenceBound
from nubo.models import GaussianProcess, fit_gp
from nubo.optimisation import lbfgsb
from nubo.utils import gen_inputs
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.constraints import Interval

# imports for plotting
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt


# Uncomment to create the directory if it doesn't exist
# import os
# os.makedirs("../Figures/", exist_ok = True)


# set seed for reproducibility
torch.manual_seed(1)

# objective function
def func(x):
    y = x * torch.sin(x)
    return y.squeeze()

# input space
dims = 1
bounds = torch.tensor([[0.], [10.]])

# training data
x_train = gen_inputs(num_points = 2,
                     num_dims = 1,
                     bounds = bounds,
                     num_lhd = 2)
y_train = func(x_train)

# Bayesian optimisation loop
iters = 8

# make figure
fig, axs = plt.subplots(4, 2, figsize = (6.125, 9))

for iter in range(iters):

    ##### BAYESIAN OPTIMISATION

    # specify Gaussian process
    likelihood = GaussianLikelihood(noise_constraint = Interval(0.0, 1e-10))
    gp = GaussianProcess(x_train, y_train, likelihood = likelihood)

    # fit Gaussian process
    fit_gp(x_train, y_train, gp = gp, likelihood = likelihood, lr = 0.1, steps = 200)

    # specify acquisition function
    acq = UpperConfidenceBound(gp = gp, beta = 16)

    # optimise acquisition function
    x_new, _ = lbfgsb(func = acq, bounds = bounds, num_starts = 5, num_samples = 1000)

    # evaluate new point
    y_new = func(x_new)

    # add to data
    x_train = torch.vstack((x_train, x_new))
    y_train = torch.hstack((y_train, y_new))

    ##### PLOTS

    # compute GP mean and variance for plotting
    x_plot = torch.linspace(start = -0.5, end = 10.5, steps = 1001, dtype = torch.double)
    gp.eval()
    pred = gp(x_plot)
    gp_means = pred.mean
    gp_vars = pred.variance.clamp_min(1e-10)
    gp_ci_upper = gp_means + torch.sqrt(gp_vars)*1.96
    gp_ci_lower = gp_means - torch.sqrt(gp_vars)*1.96

    # true objective function
    truth = func(x_plot)

    # acquisition function
    acq_func = torch.zeros(x_plot.size(0))
    for i in range(x_plot.size(0)):
        acq_func[i] = -acq(torch.reshape(x_plot[i], (1, 1)))

    # torch to numpy for plotting with matplotlib
    x_plot = x_plot.detach().numpy()
    gp_means = gp_means.detach().numpy()
    gp_ci_upper = gp_ci_upper.detach().numpy()
    gp_ci_lower = gp_ci_lower.detach().numpy()
    acq_func = acq_func.detach().numpy()

    row = int(iter/2)
    col = int(iter%2)

    # make plot
    axs[row, col].plot(x_train[:-1], y_train[:-1], marker = "o", linewidth = 0,
                       color = "navy", label = "Observations", zorder = 6)
    axs[row, col].axvline(float(x_train[-1]), color = "red", linestyle = "dotted",
                       linewidth = 1, label = "Candidate", zorder = 5)
    axs[row, col].plot(x_plot, gp_means, color = "firebrick",
                       label = "Prediction", zorder = 4 )
    axs[row, col].plot(x_plot, truth, color = "black", linestyle = "dashed",
                       label = "Truth", zorder = 2)
    axs[row, col].fill_between(x_plot, gp_ci_upper, gp_ci_lower, color = "skyblue",
                       label = "Uncertainty", zorder = 1)
    axs[row, col].plot(x_plot, acq_func, color = "darkorange",
                       label = "UCB", zorder = 3)


    # set title and labels
    axs[row, col].set_title(f"Iteration {iter+1}")
    axs[row, col].set_ylabel("Output")
    axs[row, col].set_xlabel("Input")

    # format axes
    axs[row, col].set_xlim((-0.5, 10.5))
    axs[row, col].set_ylim((-10., 12.))

# make custom legend
legend_elements = [Line2D([0], [0], marker = "o", linewidth = 0, color = "navy",
                          label = "Observations"),
                   Line2D([0], [0], color = "darkorange", label = "Acquisition"),
                   Line2D([0], [0], color = "firebrick", label = "Prediction"),
                   Line2D([0], [0], color = "red", linestyle = "dotted",
                          linewidth = 1, label = "Candidate"),
                   Patch(color = "skyblue", label = "Uncertainty"),
                   Line2D([0], [0], color = "black", linestyle = "dashed",
                          label = "Truth")]

# legend
fig.legend(handles = legend_elements, ncol = 3, loc = "lower center")
plt.tight_layout()
plt.subplots_adjust(bottom = 0.105)

# save figure 
plt.savefig("../Figures/Figure2.pdf")
plt.show()
plt.clf()


## 3. NUBO

# Create dummy training data to allow running the script

x_train = torch.rand((10, 6), dtype = torch.double)
y_train = torch.rand((10,), dtype = torch.double)

### 3.1 Gaussian processes

# With estimated noise:
from nubo.models import GaussianProcess, fit_gp
from gpytorch.likelihoods import GaussianLikelihood

likelihood = GaussianLikelihood()
gp = GaussianProcess(x_train, y_train, likelihood = likelihood)
fit_gp(x_train, y_train, gp = gp, likelihood = likelihood)

# With known noise level:
from nubo.models import GaussianProcess, fit_gp
from gpytorch.likelihoods import FixedNoiseGaussianLikelihood

noise = torch.ones(x_train.size(0)) * 0.025
likelihood = FixedNoiseGaussianLikelihood(noise = noise,
                                          learn_additional_noise = True)
gp = GaussianProcess(x_train, y_train, likelihood = likelihood)
fit_gp(x_train, y_train, gp = gp, likelihood = likelihood)


### 3.2 Bayesian optimisation

# Bayesian optimisation step:

import torch
from nubo.acquisition import UpperConfidenceBound
from nubo.models import GaussianProcess, fit_gp
from nubo.optimisation import single
from gpytorch.likelihoods import GaussianLikelihood

bounds = torch.tensor([[0., 0., 0., 0., 0., 0.],
                       [1., 1., 1., 1., 1., 1.]])

# x_train = load inputs as torch.Tensor
# y_train = load outputs as torch.Tensor

likelihood = GaussianLikelihood()
gp = GaussianProcess(x_train, y_train, likelihood = likelihood)
fit_gp(x_train, y_train, gp = gp, likelihood = likelihood)

acq = UpperConfidenceBound(gp = gp, beta = 4)
x_new, _ = single(func = acq, method = "L-BFGS-B", bounds = bounds)

# Sequential single-point optimisation:

from nubo.acquisition import ExpectedImprovement, UpperConfidenceBound
from nubo.optimisation import single

acq = ExpectedImprovement(gp = gp, y_best = torch.max(y_train))
acq = UpperConfidenceBound(gp = gp, beta = 4)
x_new, _ = single(func = acq, method = "L-BFGS-B", bounds = bounds,
                  num_starts = 5, num_samples = 50)

# Parallel mutli-point optimisation:

from nubo.acquisition import MCExpectedImprovement, MCUpperConfidenceBound
from nubo.optimisation import multi_joint, multi_sequential

acq = MCExpectedImprovement(gp = gp, y_best = torch.max(y_train), samples = 256)
acq = MCUpperConfidenceBound(gp = gp, beta = 4, samples = 256)

x_new, _ = multi_joint(func = acq, method = "Adam", lr = 0.1, steps = 100,
                       batch_size = 4, bounds = bounds)
x_new, _ = multi_sequential(func = acq, method = "Adam", lr = 0.1, steps = 100,
                       batch_size = 4, bounds = bounds)

from nubo.acquisition import MCUpperConfidenceBound
from nubo.optimisation import multi_joint, multi_sequential

acq = MCUpperConfidenceBound(gp = gp, beta = 4, fix_base_samples = True)
x_new, _ = multi_joint(func = acq, method = "L-BFGS-B",
                       batch_size = 4, bounds = bounds)

acq = MCUpperConfidenceBound(gp = gp, beta = 4, fix_base_samples = True)
x_new, _ = multi_sequential(func = acq, method = "L-BFGS-B",
                       batch_size = 4, bounds = bounds)

# Asynchronouse optimisation:

import torch
from nubo.acquisition import MCUpperConfidenceBound
from nubo.optimisation import multi_joint, multi_sequential

x_pend = torch.tensor([[0.2, 0.9, 0.8, 0.4, 0.4, 0.1],
                       [0.1, 0.3, 0.7, 0.2, 0.1, 0.2]])

acq = MCUpperConfidenceBound(gp = gp, beta = 4, x_pending = x_pend)

x_new, _ = multi_joint(func = acq, method = "Adam",
                       batch_size = 4, bounds = bounds)
x_new, _ = multi_sequential(func = acq, method = "Adam",
                       batch_size = 4, bounds = bounds)

# Constrained optimisation:

import torch

bounds = torch.tensor([[0., 0., 0., 0., 0., 0.], [1., 1., 1., 1., 1., 1.]])
cons = [{"type": "ineq", "fun": lambda x: 0.5 - x[0] - x[1]},
        {"type": "eq", "fun": lambda x: 1.2442 - x[3] - x[4] - x[5]}]

from nubo.acquisition import UpperConfidenceBound
from nubo.optimisation import single

acq = UpperConfidenceBound(gp = gp, beta = 4)
x_new, _ = single(func = acq, method = "SLSQP",
                  bounds = bounds, constraints = cons)

# Mixed optimisation:

import torch

bounds = torch.tensor([[0., 0., 0., 0., 0., 0.], [1., 1., 1., 1., 1., 1.]])
disc = {0: [0.2, 0.4, 0.6, 0.8], 4: [0.3, 0.6, 0.9]}

from nubo.acquisition import UpperConfidenceBound
from nubo.optimisation import single

acq = UpperConfidenceBound(gp = gp, beta = 4)
x_new, _ = single(func = acq, method = "L-BFGS-B",
                  bounds = bounds, discrete = disc)


### 3.3 Test functions and utilities

from nubo.test_functions import Ackley, Hartmann6D

func = Ackley(dims = 5, noise_std = 0.1, minimise = False)
func = Hartmann6D(minimise = False)
dims = func.dims
bounds = func.bounds

from nubo.utils import gen_inputs

x_train = gen_inputs(num_points = dims * 5, num_dims = dims, bounds = bounds)
y_train = func(x_train)

from nubo.utils import standardise, normalise, unnormalise

x_norm = normalise(x_train, bounds = bounds)
x_train = unnormalise(x_norm, bounds = bounds)
y_stand = standardise(y_train)


#### Figure 4

import torch
from nubo.utils import gen_inputs
import matplotlib.pyplot as plt

# set seed for reproducibility
torch.manual_seed(1)

# generate random and Latin hypercube design
x_random = torch.rand((10, 2))
x_lhs = gen_inputs(10, 2)

# plot
fig, ax = plt.subplots(figsize=(4, 4))

plt.scatter(x_random[:, 0], x_random[:, 1], label = "Random", marker = "^")
plt.scatter(x_lhs[:, 0], x_lhs[:, 1], label = "LHS", marker = "x")
plt.hlines(torch.linspace(0., 1., 11), 0, 1, colors = "grey",
           linestyles = "dashed", linewidth = 1)
plt.vlines(torch.linspace(0., 1., 11), 0, 1, colors = "grey",
           linestyles = "dashed", linewidth = 1)
plt.xlabel("Input 1")
plt.ylabel("Input 2")
plt.xlim(0, 1)
plt.ylim(0, 1)
fig.legend(loc="lower center", ncol = 2)
ax.set_aspect("equal")
plt.tight_layout()
plt.subplots_adjust(bottom = 0.1875)

# save figure
plt.savefig("../Figures/Figure4.pdf")
plt.show()
plt.clf()


## 4. Case study

# Set manual seed for reproducibility and format print output.

import torch

torch.manual_seed(123)
torch.set_printoptions(precision = 4, sci_mode = False)

# Set-up $6$-dimensional Hartmann function as a substitute for a black box 
# function, that is a computer simulation or a physical experiment.

from nubo.test_functions import Hartmann6D

black_box = Hartmann6D(noise_std = 0.1, minimise = False)

# Specify input parameter space.

dims = 6
bounds = torch.tensor([[0., 0., 0., 0., 0., 0.], [1., 1., 1., 1., 1., 1.]])
discrete = {0: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}

# Generate initial data.

from nubo.utils import gen_inputs


x_train = gen_inputs(num_points = dims*5,
                     num_dims = dims,
                     bounds = bounds)
x_train[:, 0] = torch.round(x_train[:, 0], decimals = 1)
y_train = black_box(x_train)

# Define optimisation loop.

from nubo.acquisition import MCUpperConfidenceBound
from nubo.models import GaussianProcess, fit_gp
from nubo.optimisation import multi_sequential
from gpytorch.likelihoods import GaussianLikelihood

def bo(x_train, y_train):

    # specify Gaussian process
    likelihood = GaussianLikelihood()
    gp = GaussianProcess(x_train, y_train, likelihood = likelihood)

    # fit Gaussian process
    fit_gp(x_train, y_train, gp = gp, likelihood = likelihood,
           lr = 0.1, steps = 200)

    # specify acquisition function
    acq = MCUpperConfidenceBound(gp = gp, beta = 4, samples = 128)

    # optimise acquisition function
    x_new, _ = multi_sequential(func = acq,
                                method = "Adam",
                                batch_size = 4,
                                bounds = bounds,
                                discrete = discrete,
                                lr = 0.1,
                                steps = 200,
                                num_starts = 2,
                                num_samples = 100)

    return x_new

# Run optimisation loop for $10$ iterations. This can take a few minutes.

# Bayesian optimisation loop
iters = 10

for iter in range(iters):

    # compute new point
    x_new = bo(x_train, y_train)

    # evaluate new point
    y_new = black_box(x_new)

    # add to data
    x_train = torch.vstack((x_train, x_new))
    y_train = torch.hstack((y_train, y_new))

# Look at results."""

print(torch.hstack([x_train, y_train.reshape(-1, 1)]))

# Approximate solution:

best_iter = int(torch.argmax(y_train))

print("Approximate solution")
print("--------------------")
print(f"Evaluation: {best_iter+1}")
print(f"Inputs: {x_train[best_iter]}")
print(f"Output: {y_train[best_iter]:.4f}")


### Figure 5

import matplotlib.pyplot as plt
import os
import numpy as np

# set seed for reproducibility
torch.manual_seed(123)

# generate random and Latin hypercube designs
random = black_box(torch.rand((70, dims)))
lhs = black_box(gen_inputs(num_points = 70,
                           num_dims = dims,
                           bounds = bounds))

# plot
fig, ax = plt.subplots(figsize = (4, 3))

plt.plot(range(1, 71), np.maximum.accumulate(random), label = "Random")
plt.plot(range(1, 71), np.maximum.accumulate(lhs), label = "LHS")
plt.plot(range(1, 71), np.maximum.accumulate(y_train), label = "NUBO")
plt.hlines(3.32237, 0, 71, colors = "red", linestyles = "dashed", label = "Maximum")
plt.xlabel("Evaluation")
plt.ylabel("Output")
plt.xlim(0, 71)
fig.legend(loc = "lower center", ncol = 4)
plt.tight_layout()
plt.subplots_adjust(bottom = 0.25)

# save figure 
plt.savefig("../Figures/Figure5.pdf")
plt.show()
plt.clf()
