# This script runs all of the demos and examples given in # GaussianProcesses.jl: A Nonparametric Bayes package for the Julia Language using Pkg; Pkg.activate(@__DIR__); Pkg.instantiate() @info "Section 2" using GaussianProcesses # Reproduce figure 1 from the paper # import Pkg; Pkg.add("Plots") # import Pkg; Pkg.add("PyPlot") using GaussianProcesses, Plots, Random pyplot() Random.seed!(16042018) #Select covariance function kern = Array{Kernel}(undef,6) kern[1] = SE(0.5,0.0) #Sqaured exponential kernel kern[2] = Periodic(0.5,0.0,1.0) #Periodic kernel kern[3] = Lin(0.0) #Linear kernel kern[4] = SE(0.5,0.0)+Periodic(0.5,0.0,1.0) #SE+Per kernel kern[5] = Periodic(0.5,0.0,1.0)*Lin(0.0) #Per x Lin kernel kern[6] = SE(0.5,0.0)*Lin(0.0)+SE(0.5,0.0)*Periodic(0.5,0.0,1.0) #SE*Lin + SE*Per kernel gp = Array{GPE}(undef,6) for i in 1:6 gp[i] = GPE(mean=MeanZero(),kernel=kern[i]) end x_path = range(-5,stop=5,length=50); p1 = plot(x_path,rand(gp[1],x_path, 5),legend=false,linewidth=1.5,title="SE") p2 = plot(x_path,rand(gp[2],x_path, 5),legend=false,linewidth=1.5,title="Per") p3 = plot(x_path,rand(gp[3],x_path, 5),legend=false,linewidth=1.5,title="Lin") p4 = plot(x_path,rand(gp[4],x_path, 5),legend=false,linewidth=1.5,title="SE+Per") p5 = plot(x_path,rand(gp[5],x_path, 5),legend=false,linewidth=1.5,title="Per × Lin") p6 = plot(x_path,rand(gp[6],x_path, 5),legend=false,linewidth=1.5,title="SE×Lin+SE×Per") p_combined = plot(p1,p2,p3,p4,p5,p6, xlabel="Input - x", ylabel="Output - k(x,x*)",fmt=:png, ylims=(-7, 7)) savefig(p_combined,"figure_1.png") #—————————————————————————————————————————————————————————— @info "Section 3: The package" #Script to reproduce Figure 2 using GaussianProcesses, Random Random.seed!(13579) # Set the seed using the 'Random' package n = 10; # number of training points x = 2π * rand(n); # predictors y = sin.(x) + 0.05*randn(n); # regressors # Select mean and covariance function mZero = MeanZero() # Zero mean function kern = SE(0.0,0.0) # Sqaured exponential kernel logObsNoise = -1.0 # log standard deviation of observation noise gp = GP(x,y,mZero,kern,logObsNoise) # Fit the GP x = 0:0.1:2π # a sequence between 0 and 2π with 0.1 spacing μ, Σ = predict_y(gp,x); using Plots #pyplot() # Optionally select a plotting backend # Plot the GP p1 = plot(gp; xlabel="Input", ylabel="Output", title="Gaussian process", legend=false, fmt=:pdf); savefig(p1,"figure_2-left.png") optimize!(gp) #Optimise the parameters p2 = plot(gp; xlabel="Input", ylabel="Output", title="Parameter optimised Gaussian process", legend=false, fmt=:pdf); savefig(p2,"figure_2-right.png") #Script to produce figure 3 # import Pkg; Pkg.add("Distributions") using Distributions # Uniform priors are used as default if priors are not specified set_priors!(kern, [Normal(0,1), Normal(0,1)]) chain = mcmc(gp) p = plot(chain', xlabel="Iterations", ylabel="Parameter values", label=["Noise", "SE log length", "SE log scale"]); savefig(p,"figure_3.png") # Script to recreate figure 4 # Simulate data for a 2D Gaussian process n = 10 # number of data points X = 2π * rand(2, n) # inputs y = sin.(X[1,:]) .* cos.(X[2,:]) + 0.5 * rand(n) # outputs kern = Matern(5/2,[0.0,0.0],0.0) + SE(0.0,0.0) # sum of two kernels gp2 = GP(X,y,MeanZero(),kern) # Plot mean and variance p1 = plot(gp2; xlabel="Input", ylabel="Input", title="Mean of GP") p2 = plot(gp2; xlabel="Input", ylabel="Input", var=true, title="Variance of GP", fill=true) p = plot(p1, p2; fmt=:pdf); savefig(p,"figure_4.png") #Script to recreate figure 5 gr() # use GR backend to allow wireframe plot p1 = contour(gp2) p2 = surface(gp2) p3 = heatmap(gp2) p4 = wireframe(gp2) p = plot(p1, p2, p3, p4; fmt=:pdf); savefig(p,"figure_5.png") #—————————————————————————————————————————————————————————— @info "Section 4: Demos" @info "Section 4.1: Binary classification" # Script to recreate figure 6 # import Pkg; Pkg.add("RDatasets") using GaussianProcesses, RDatasets, Random import Distributions:Normal # import only the Normal distribution # Load the data Random.seed!(113355) crabs = dataset("MASS","crabs"); # load the data crabs = crabs[shuffle(1:size(crabs)[1]), :]; # shuffle the data train = crabs[1:div(end,2),:]; # training data y = Array{Bool}(undef,size(train)[1]); # response y[train[:,:Sp].=="B"].=0; # convert characters to booleans y[train[:,:Sp].=="O"].=1; # X = convert(Matrix,train[:,4:end]); # predictors X = Matrix(train[:, 4:end]) #Select mean, kernel and likelihood function mZero = MeanZero(); # Zero mean function kern = Matern(3/2,zeros(5),0.0); # Matern 3/2 ARD kernel lik = BernLik(); # Bernoulli likelihood for binary data {0,1} gp = GP(X',y,mZero,kern,lik) # Fit the Gaussian process model set_priors!(gp.kernel,[Normal(0.0,2.0) for i in 1:6]) # MCMC sampling samples = mcmc(gp; ε=0.01, nIter=10000, burn=1000, thin=10); # Test data test = crabs[div(end,2)+1:end,:]; # select test data yTest = Array{Bool}(undef,size(test)[1]); # test response data yTest[test[:,:Sp].=="B"].=0; # convert characters to booleans yTest[test[:,:Sp].=="O"].=1; # xTest = convert(Matrix,test[:,4:end]) xTest = Matrix(test[:,4:end]) # Sample from the predictive ymean = Array{Float64}(undef,size(samples,2),size(xTest,1)); for i in 1:size(samples,2) set_params!(gp,samples[:,i]) # Set the GP parameters to the posterior values update_target!(gp) # Update the GP function with the new parameters ymean[i,:] = predict_y(gp,xTest')[1] # Store the predictive mean end #Display the plots using Plots gr() p = plot(ymean', xlabel="Inputs", ylabel="Predicted output", leg=false); scatter!(yTest); savefig(p,"figure_6.png") #——————————————————————————————————————————————————————————— @info "Section 4.2: Time series" #Script to recreate figure 7 using GaussianProcesses, CSV, DataFrames # Location of data within the package data_dir = joinpath(dirname(dirname(pathof(GaussianProcesses))),"notebooks/data") # Load the data (the data is in the package directory) data = CSV.read(joinpath(data_dir, "CO2_data.csv"), DataFrame, header=0) year = data[:,1]; co2 = data[:,2]; # Split the data into training and testing data xtrain = year[year.<2004]; ytrain = co2[year.<2004]; xtest = year[year.>=2004]; ytest = co2[year.>=2004]; # Kernel is represented as a sum of kernels kernel = SE(4.0,4.0) + Periodic(0.0,1.0,0.0) * SE(4.0,0.0) + RQ(0.0,0.0,-1.0) + SE(-2.0,-2.0); gp = GP(xtrain,ytrain,MeanZero(),kernel,-2.0) #Fit the GP optimize!(gp) #Estimate the parameters through maximum likelihood estimation μ, Σ = predict_y(gp,xtest); #Predict observations at test values using Plots #Load the Plots.jl package with the pyplot backend # pyplot() p = plot(xtest,μ,ribbon=Σ, xlabel="Time (years)", ylabel="Concentration", title="Time series prediction", label="95% predictive confidence region",fmt=:pdf); scatter!(xtest,ytest,label="Observations"); savefig(p,"figure_7.png") #———————————————————————————————————————————————————————— @info "Section 4.3: Count data" #Script to recreate figure 8 from the paper using GaussianProcesses, Distributions, Plots, Random #pyplot() #Select the plotting backend Random.seed!(203617) n = 20 X = collect(range(-3,stop=3,length=n)); F = 2*cos.(2*X); Y = [rand(Poisson(exp.(F[i]))) for i in 1:n]; # Define model params. k = Matern(3/2,0.0,0.0) # Matern 3/2 kernel l = PoisLik() # Poisson likelihood # Define each GP gpmc = GP(X, vec(Y), MeanZero(), k, l) # Set the priors and sample from the posterior set_priors!(gpmc.kernel,[Normal(-2.0,4.0),Normal(-2.0,4.0)]) ############ # MCMC ############ #Sample predicted values @time samples = mcmc(gpmc; nIter=10000); xtest = range(minimum(gpmc.x),stop=maximum(gpmc.x),length=50); ymean = []; fsamples = Array{Float64}(undef,size(samples,2), length(xtest)); for i in 1:size(samples,2) set_params!(gpmc,samples[:,i]) update_target!(gpmc) push!(ymean, predict_y(gpmc,xtest)[1]) fsamples[i,:] = rand(gpmc, xtest) end #Predictive plots q10 = [quantile(fsamples[:,i], 0.1) for i in 1:length(xtest)]; q50 = [quantile(fsamples[:,i], 0.5) for i in 1:length(xtest)]; q90 = [quantile(fsamples[:,i], 0.9) for i in 1:length(xtest)]; p1 = plot(xtest,exp.(q50),ribbon=(exp.(q10), exp.(q90)),leg=true, fmt=:png, label="quantiles", title = "MCMC Inference", xlabel="Input feature", ylabel="Posterior samples") plot!(xtest,mean(ymean), label="posterior mean") xx = range(-3,stop=3,length=1000); f_xx = 2*cos.(2*xx); plot!(xx, exp.(f_xx), label="truth") scatter!(X,Y, label="data") savefig(p1,"figure_8-left.png") ########################## # VI ########################## gpvi = GP(X, vec(Y), MeanZero(), k, l); Q = vi(gpvi); # Run twice to remove Zygote compilation time penalty @time Q = vi(gpvi); # Perform variational inference xtest = range(minimum(gpmc.x),stop=maximum(gpmc.x),length=50); nsamps = 500 ymean = []; visamples = Array{Float64}(undef, nsamps, size(xtest, 1)); for i in 1:nsamps visamples[i, :] = rand(gpvi, xtest, Q) push!(ymean, predict_y(gpvi, xtest)[1]) end q10 = [quantile(visamples[:, i], 0.1) for i in 1:length(xtest)]; q50 = [quantile(visamples[:, i], 0.5) for i in 1:length(xtest)]; q90 = [quantile(visamples[:, i], 0.9) for i in 1:length(xtest)]; p2 = plot(xtest, exp.(q50), ribbon=(exp.(q10), exp.(q90)), leg=true, fmt=:png, label="quantiles", xlabel="Input feature", ylabel="Posterior samples") plot!(xtest, mean(ymean), label="posterior mean", w=2) xx = range(-3,stop=3,length=1000); f_xx = 2*cos.(2*xx); plot!(xx, exp.(f_xx), label="truth") scatter!(X,Y, label="data") savefig(p2,"figure_8-right.png") #—————————————————————————————————————————————————————— @info "Section 4.4: Bayesian optimisation" #This script recreates figure 9 # import Pkg; Pkg.add("BayesianOptimization") using GaussianProcesses, Random gp = ElasticGPE(2, # two input dimensions mean = MeanConst(0.), kernel = SEArd([0., 0.], 5.), logNoise = 0., capacity = 3000, stepsize = 1000) append!(gp, [1., 2.], 0.4) # append observation x = [1., 2.] and y = 0.4 using BayesianOptimization, GaussianProcesses f(x) = 0.1*(x[] - 2)^2 + cos(pi/2*x[]) + randn() # noisy function to minimize # Choose as a model an elastic GP with input dimensions 1. model = ElasticGPE(1, mean = MeanConst(0.), kernel = SEArd([0.], 5.), logNoise = 0.) # Optimize the hyperparameters of the GP using maximum likelihood (ML) # estimates every 50 steps woth bounds on the logNoise and # bounds for the two parameters GaussianProcesses.get_param_names(model.kernel) modeloptimizer = MAPGPOptimizer(every = 50, noisebounds = [-2., 3], kernbounds = [[-1, 0], [4, 10]], maxeval = 40) opt = BOpt(f, model, ExpectedImprovement(), # type of acquisition function modeloptimizer, [-5.], [5.], # lower- and upperbounds repetitions = 5, # evaluate the function 5 times maxiterations = 200, # evaluate at 200 input positions sense = Min, # minimize the objective function acquisitionoptions = (maxeval = 4000, restarts = 50), verbosity = Silent) result = boptimize!(opt) #Plots using PGFPlotsX push!(PGFPlotsX.CUSTOM_PREAMBLE, "\\usepgfplotslibrary{fillbetween}") Random.seed!(13) f(x, noisevariance = 1) = .1*sum((x .- 2).^2) + cos(sum(π/2 * x)) + noisevariance*randn() model = ElasticGPE(1, mean = MeanConst(0.), kernel = SEArd([0.], 5.), logNoise = 0.) modeloptimizer = MAPGPOptimizer(every = 50, noisebounds = [-2., 3], kernbounds = [[-1, 0], [4, 10]], maxeval = 40) opt = BOpt(f, model, ExpectedImprovement(), modeloptimizer, [-5.], [5.], initializer = ScaledLHSIterator([-5.], [5.], 5), maxiterations = 5, sense = Min, repetitions = 5, acquisitionoptions = (maxeval = 4000, restarts = 50), verbosity = Progress) result = boptimize!(opt) BayesianOptimization.setparams!(opt.acquisition, model) acqfunc = BayesianOptimization.acquisitionfunction(opt.acquisition, model) xs = -5:.02:5 ms, var = predict_f(model, xs) sig = sqrt.(var) fmax, xmax = BayesianOptimization.acquire_max(opt.opt, opt.lowerbounds, opt.upperbounds, opt.acquisitionoptions.restarts) @pgf p = GroupPlot({group_style = {group_size = "1 by 2", vertical_sep = "4mm"}, height = "4cm", width = "8cm", legend_pos = "outer north east", legend_style = {draw = "none"}}, {legend_columns = 1, xticklabels = ""}, Plot({only_marks}, Coordinates(model.x[:], -model.y[:])), "\\addlegendentry{observations}", Plot({no_marks, "red!20", name_path = "A", forget_plot}, Coordinates(xs, -ms .+ sig)), Plot({no_marks, "red!20", name_path = "B", forget_plot}, Coordinates(xs, -ms .- sig)), "\\addplot[red!20] fill between [of=A and B];", "\\addlegendentry{model std}", Plot({no_marks, blue, very_thick}, Coordinates(xs, -ms)), "\\addlegendentry{model mean}", Plot({no_marks, "green!80!black", very_thick}, Coordinates(xs, f.(xs, 0))), "\\addlegendentry{noisefree target}", {height = "3cm", ytick=[0, .05, .1], yticklabels = [0, "0.05", "0.1"]}, Plot({only_marks, red, mark="triangle*", mark_size = "5pt", mark_options = {rotate = "0"}}, Coordinates(xmax, [fmax])), Plot({no_marks, very_thick}, Coordinates(xs, (x -> acqfunc([x])).(xs))), "\\addlegendentry{next acquisition}", "\\addlegendentry{acquisition function}") PGFPlotsX.save("figure_9.png",p) #——————————————————————————————————————————————————————— @info "Section 4.5: Sparse GPs" #This script recreates figure 10 # imports and set up plots using Distributions using LinearAlgebra: diag using Random using GaussianProcesses import Statistics import PyPlot; plt=PyPlot using LaTeXStrings cbbPalette = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]; """ The true function we will be simulating from. """ function fstar(x::Float64) return abs(x-5)*cos(2*x) end σy = 10.0 n=5000 Random.seed!(1) # for reproducibility Xdistr = Beta(7,7) ϵdistr = Normal(0,σy) x = rand(Xdistr, n)*10; X = Matrix(x'); Y = fstar.(x) .+ rand(ϵdistr,n); xx = range(0, stop=10, length=200); plt.plot(x, Y, ".", color=cbbPalette[1], label="simulated data", alpha=0.1); plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2); plt.xlabel(L"X"); plt.ylabel(L"Y"); plt.title("Simulated Data"); plt.legend(loc="lower center", fontsize="small"); plt.close() #Exact GP k = SEIso(log(0.3), log(5.0)) GPE(X, Y, MeanConst(mean(Y)), k, log(σy)) # warm-up # time the second run (so the time doesn't include compilation): @time gp_full = GPE(X, Y, MeanConst(mean(Y)), k, log(σy)) # extract predictions predict_f(gp_full, xx; full_cov=true) # warm-up @time μ_exact, Σ_exact = predict_f(gp_full, xx; full_cov=true); #Plot exact GP xx = range(0, stop=10, length=200) plt.plot(x,Y, ".", color="black", markeredgecolor="None", alpha=0.1, label="simulated data"); plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2); plt.plot(xx, μ_exact, color=cbbPalette[3], label=L"$f(x) \mid Y, \hat\theta$"); plt.fill_between(xx, μ_exact.-sqrt.(diag(Σ_exact)), μ_exact.+sqrt.(diag(Σ_exact)), color=cbbPalette[3], alpha=0.5); plt.xlabel(L"X"); plt.ylabel(L"Y"); plt.title("Exact GP"); plt.legend(loc="lower center", fontsize="small"); plt.ylim(-7.5,7.5); plt.close() # Inducing points Xu = Matrix(quantile(x, [0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.98])') # Subset of Regressors gp_SOR = GaussianProcesses.SoR(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_SOR = GaussianProcesses.SoR(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); predict_f(gp_SOR, xx; full_cov=true) @time predict_f(gp_SOR, xx; full_cov=true); # Generic plot for approximate methods function plot_approximation(gp_exact, gp_approx, approx_label) μ_exact, Σ_exact = predict_f(gp_full, xx; full_cov=true) μapprox, Σapprox = predict_f(gp_approx, xx; full_cov=true) plt.plot(x,Y, ".", color="black", markeredgecolor="None", alpha=0.1, label="simulated data"); plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2); plt.plot(xx, μ_exact, color=cbbPalette[3], label=L"$f(x) \mid Y, \hat\theta$"); plt.fill_between(xx, μ_exact.-sqrt.(diag(Σ_exact)), μ_exact.+sqrt.(diag(Σ_exact)), color=cbbPalette[3], alpha=0.3); plt.plot(xx, μapprox, color=cbbPalette[6], label=L"$f(x) \mid Y, \hat\theta$"*approx_label); y_err = sqrt.(diag(Σapprox)) plt.fill_between(xx, μapprox.-y_err, μapprox.+y_err, color=cbbPalette[6], alpha=0.5); plt.xlabel(L"Input"); plt.ylabel(L"Output"); plt.plot(vec(Xu), fill(0.0, length(Xu)).-5, linestyle="None", marker=6, markersize=12, color="black", label=L"inducing points $X_\mathbf{u}$"); plt.legend(loc="upper center", fontsize="small"); plt.ylim(-10, 10); end # Plot for subset of regressors plot_approximation(gp_full, gp_SOR, "SoR"); plt.title("Subset of Regressors"); plt.savefig("figure_10-top-left.png"); plt.close() # Deterministic training conditionals gp_DTC = GaussianProcesses.DTC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_DTC = GaussianProcesses.DTC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); μDTCgp, ΣDTCgp = predict_f(gp_DTC, xx; full_cov=true) @time predict_f(gp_DTC, xx; full_cov=true); # Plot for DTC plot_approximation(gp_full, gp_DTC, "DTC"); plt.title("Deterministic Training Conditionals"); plt.savefig("figure_10-top-right.png"); plt.close() # Fully independent training conditionals gp_FITC = GaussianProcesses.FITC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_FITC = GaussianProcesses.FITC(X, Xu, Y, MeanConst(mean(Y)), k, log(σy)); predict_f(gp_FITC, xx; full_cov=true) @time predict_f(gp_FITC, xx; full_cov=true); # Plot for FITC plot_approximation(gp_full, gp_FITC, "FITC"); plt.title("Fully Independent Training Conditionals"); plt.savefig("figure_10-bottom-left.png"); plt.close() # Full-scale approximation inearest = [argmin(abs.(xi.-Xu[1,:])) for xi in x] blockindices = [findall(isequal(i), inearest) for i in 1:size(Xu,2)] GaussianProcesses.FSA(X, Xu, blockindices, Y, MeanConst(mean(Y)), k, log(σy)); @time gp_FSA = GaussianProcesses.FSA(X, Xu, blockindices, Y, MeanConst(mean(Y)), k, log(σy)); iprednearest = [argmin(abs.(xi.-Xu[1,:])) for xi in xx] blockindpred = [findall(isequal(i), iprednearest) for i in 1:size(Xu,2)] predict_f(gp_FSA, xx, blockindpred; full_cov=true) @time predict_f(gp_FSA, xx, blockindpred; full_cov=true); # Plot function plot_approximation(gp_exact, gp_approx, approx_label, blockindpred) μ_exact, Σ_exact = predict_f(gp_full, xx; full_cov=true) μapprox, Σapprox = predict_f(gp_approx, xx, blockindpred; full_cov=true) plt.plot(x,Y, ".", color="black", markeredgecolor="None", alpha=0.1, label="simulated data"); plt.plot(xx, fstar.(xx), color=cbbPalette[2], label=L"f^\star(X)", linewidth=2); plt.plot(xx, μ_exact, color=cbbPalette[3], label=L"$f(x) \mid Y, \hat\theta$"); plt.fill_between(xx, μ_exact.-sqrt.(diag(Σ_exact)), μ_exact.+sqrt.(diag(Σ_exact)), color=cbbPalette[3], alpha=0.3); plt.plot(xx, μapprox, color=cbbPalette[6], label=L"$f(x) \mid Y, \hat\theta$"*approx_label); y_err = sqrt.(diag(Σapprox)) plt.fill_between(xx, μapprox.-y_err, μapprox.+y_err, color=cbbPalette[6], alpha=0.5); plt.xlabel(L"Input"); plt.ylabel(L"Output"); plt.plot(vec(Xu), fill(0.0, length(Xu)).-5, linestyle="None", marker=6, markersize=12, color="black", label=L"inducing points $X_\mathbf{u}$"); plt.legend(loc="upper left", fontsize="small"); plt.ylim(-10, 10); end # from StatsBase.jl: midpoints(v::AbstractVector) = [Statistics.middle(v[i - 1], v[i]) for i in 2:length(v)] plt.axvline.(midpoints(vec(Xu)), alpha=0.9, color=cbbPalette[7], zorder=-1) plot_approximation(gp_full, gp_FSA, "FSA", blockindpred); plt.title("Full Scale Approximation"); plt.savefig("figure_10-bottom-right.png"); plt.close()