gpOptimizer: Single-Task Acquisition Functions#
#!pip install gpcam==8.3.7
#!pip install matplotlib
Setup#
import numpy as np
import matplotlib.pyplot as plt
from gpcam import GPOptimizer
import time
from loguru import logger
from distributed import Client
client = Client()
%load_ext autoreload
%autoreload 2
from itertools import product
x_pred1D = np.linspace(0,1,1000).reshape(-1,1)
Data Preparation#
x = np.linspace(0,600,1000)
def f1(x):
return np.sin(5. * x) + np.cos(10. * x) + (2.* (x-0.4)**2) * np.cos(100. * x)
x_data = np.random.rand(50).reshape(-1,1)
y_data = f1(x_data[:,0]) + (np.random.rand(len(x_data))-0.5) * 0.5
plt.figure(figsize = (15,5))
plt.xticks([0.,0.5,1.0])
plt.yticks([-2,-1,0.,1])
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.plot(x_pred1D,f1(x_pred1D), color = 'orange', linewidth = 4)
plt.scatter(x_data[:,0],y_data, color = 'black')
<matplotlib.collections.PathCollection at 0x7fdd046b0c10>
Customizing the Gaussian Process#
def my_noise(x,hps):
#This is a simple noise function but can be made arbitrarily complex using many hyperparameters.
#The noise function can return a matrix or a vector
return np.zeros((len(x))) + hps[2]
#stationary
from gpcam.kernels import *
def skernel(x1,x2,hps):
#The kernel follows the mathematical definition of a kernel. This
#means there is no limit to the variety of kernels you can define.
d = get_distance_matrix(x1,x2)
return hps[0] * matern_kernel_diff1(d,hps[1])
def meanf(x, hps):
#This is a simple mean function but it can be arbitrarily complex using many hyperparameters.
return 1.-np.sin(hps[3] * x[:,0])
#it is a good idea to plot the prior mean function to make sure we did not mess up
plt.figure(figsize = (15,5))
plt.plot(x_pred1D,meanf(x_pred1D, np.array([1.,1.,5.0,2.])), color = 'orange', label = 'task1')
[<matplotlib.lines.Line2D at 0x7fdd0403b150>]
Initialization and Different Training Options#
my_gpo = GPOptimizer(x_data,y_data,
init_hyperparameters = np.ones((4))/10., # We need enough of those for kernel, noise, and prior mean functions
compute_device='cpu',
kernel_function=skernel,
kernel_function_grad=None,
prior_mean_function=meanf,
prior_mean_function_grad=None,
noise_function=my_noise,
#noise_variances=np.zeros(y_data.shape) + 0.1,
gp2Scale = False,
calc_inv=False,
ram_economy=False,
args={'a': 1.5, 'b':2.},
)
hps_bounds = np.array([[0.01,10.], #signal variance for the kernel
[0.01,10.], #length scale for the kernel
[0.00001,0.1], #noise
[0.00001,1.] #mean
])
my_gpo.tell(x_data, y_data, append=True) ##every data entry twice
my_gpo.tell(x_data, y_data, append=False) ## back to normal overwriting the data
st = time.time()
print("Standard Training (MCMC)")
hps = my_gpo.train(hyperparameter_bounds=hps_bounds, info = True, max_iter = 100)
print("Result=", hps, "after ", time.time() - st, " seconds")
print("")
print("ADAM")
hps = my_gpo.train(hyperparameter_bounds=hps_bounds, info = True, max_iter = 100, method="adam")
print("Result=", hps, "after ", time.time() - st, " seconds")
print("")
print("Global Training")
my_gpo.train(hyperparameter_bounds=hps_bounds, method='global', max_iter = 20)
print("Result=", hps, "after ", time.time() - st, " seconds")
print("")
print("Local Training")
my_gpo.train(hyperparameter_bounds=hps_bounds, method='local')
print("Result=", hps, "after ", time.time() - st, " seconds")
print("")
print("HGDL Training")
my_gpo.train(hyperparameter_bounds=hps_bounds, method='hgdl', max_iter=2, dask_client=client)
print("Result=", hps, "after ", time.time() - st, " seconds")
print("")
Standard Training (MCMC)
Starting likelihood. f(x)= -59.62790989873237
Finished 10 out of 100 iterations. f(x)= -59.62790989873237
Finished 20 out of 100 iterations. f(x)= -59.62790989873237
Finished 30 out of 100 iterations. f(x)= -22.835961632240394
Finished 40 out of 100 iterations. f(x)= -24.11234514086804
Finished 50 out of 100 iterations. f(x)= -22.836496232670125
Finished 60 out of 100 iterations. f(x)= -22.60068057616417
Finished 70 out of 100 iterations. f(x)= -22.589393847343082
Finished 80 out of 100 iterations. f(x)= -22.153114512863567
Finished 90 out of 100 iterations. f(x)= -22.33957978921792
Result= [3.48678521 0.70967143 0.08293631 0.36468778] after 0.04512739181518555 seconds
ADAM
Result= [3.53830638 0.41045395 0.06829701 1.12341544] after 0.14817237854003906 seconds
Global Training
Result= [3.53830638 0.41045395 0.06829701 1.12341544] after 0.5272433757781982 seconds
Local Training
Result= [3.53830638 0.41045395 0.06829701 1.12341544] after 0.5312037467956543 seconds
HGDL Training
Result= [3.53830638 0.41045395 0.06829701 1.12341544] after 1.328965425491333 seconds
Asynchronous Training#
Train asynchronously on a remote server or locally. You can also start a bunch of different trainings on different computers. This training will continue without any signs of life until you call ‘my_gp1.stop_training(opt_obj)’
my_gpo.set_hyperparameters(np.ones((4))/10.)
opt_obj = my_gpo.train(hyperparameter_bounds=hps_bounds, dask_client=client, asynchronous=True, method="hgdl")
print(my_gpo.hyperparameters)
for i in range(10):
time.sleep(0.1)
my_gpo.update_hyperparameters(opt_obj)
print(my_gpo.hyperparameters)
time.sleep(0.1)
my_gpo.stop_training(opt_obj)
[0.1 0.1 0.1 0.1]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
[1.12721582 0.25765029 0.06791184 1. ]
Vizualizing the Results#
#let's make a prediction
x_pred = np.linspace(0,1,1000)
mean1 = my_gpo.posterior_mean(x_pred.reshape(-1,1))["m(x)"]
var1 = my_gpo.posterior_covariance(x_pred.reshape(-1,1), variance_only=False, add_noise=True)["v(x)"]
plt.figure(figsize = (16,10))
plt.plot(x_pred,mean1, label = "posterior mean", linewidth = 4)
plt.plot(x_pred1D,f1(x_pred1D), label = "latent function", linewidth = 4)
plt.fill_between(x_pred, mean1 - 3. * np.sqrt(var1), mean1 + 3. * np.sqrt(var1), alpha = 0.5, color = "grey", label = "var")
plt.scatter(x_data,y_data, color = 'black')
##looking at some validation metrics
print(my_gpo.rmse(x_pred1D,f1(x_pred1D).flatten()))
print(my_gpo.crps(x_pred1D,f1(x_pred1D).flatten()))
0.20756352542692824
(np.float64(0.10876856683529505), np.float64(0.12839879859312844))
#available acquisition function for the single-task case:
acquisition_functions = ["variance","relative information entropy","relative information entropy set",
"ucb","lcb","maximum","minimum","gradient","expected improvement",
"probability of improvement", "target probability", "total correlation"]
plt.figure(figsize=(16,10))
for acq_func in acquisition_functions:
print("Acquisition function ",acq_func)
res = my_gpo.evaluate_acquisition_function(x_pred, acquisition_function=acq_func)
if len(res)==len(x_pred):
res = res - np.min(res)
res = res/np.max(res)
plt.plot(x_pred,res, label = acq_func, linewidth = 2)
else: print("Some acquisition function return a scalar score for the entirety of points. Here: ", acq_func)
plt.legend()
plt.show()
Acquisition function variance
Acquisition function relative information entropy
Some acquisition function return a scalar score for the entirety of points. Here: relative information entropy
Acquisition function relative information entropy set
Acquisition function ucb
Acquisition function lcb
Acquisition function maximum
Acquisition function minimum
Acquisition function gradient
Acquisition function expected improvement
Acquisition function probability of improvement
Acquisition function target probability
Acquisition function total correlation
Some acquisition function return a scalar score for the entirety of points. Here: total correlation
ask()ing for Optimal Evaluations#
with several optimization methods and acquisition functions
#let's test the asks:
bounds = np.array([[0.0,1.0]])
for acq_func in acquisition_functions:
for method in ["global","local","hgdl"]:
print("Acquisition function ", acq_func," and method ",method)
new_suggestion = my_gpo.ask(bounds, acquisition_function=acq_func,
method=method, max_iter = 2, dask_client=client)
print("led to new suggestion: \n", new_suggestion)
print("")
Acquisition function variance and method global
led to new suggestion:
{'x': array([[0.99384818]]), 'f_a(x)': array([0.17934384]), 'opt_obj': None}
Acquisition function variance and method local
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([0.19472791]), 'opt_obj': None}
Acquisition function variance and method hgdl
[[0.]
[0.]
[0.]] [0]
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([0.19472791]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc52bad0>}
Acquisition function relative information entropy and method global
led to new suggestion:
{'x': array([[0.70827186]]), 'f_a(x)': array([-21.07334569]), 'opt_obj': None}
Acquisition function relative information entropy and method local
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([-13.31418797]), 'opt_obj': None}
Acquisition function relative information entropy and method hgdl
[[0.57945528]
[0.57945519]
[1. ]] [0 2]
led to new suggestion:
{'x': array([[0.57945528]]), 'f_a(x)': array([-37.56828919]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc2f9850>}
Acquisition function relative information entropy set and method global
led to new suggestion:
{'x': array([[0.00876069]]), 'f_a(x)': array([-16.70001273]), 'opt_obj': None}
Acquisition function relative information entropy set and method local
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([-13.31418797]), 'opt_obj': None}
Acquisition function relative information entropy set and method hgdl
/home/marcus/Coding/gpCAM/gpcam/gp_optimizer_base.py:433: UserWarning: I set vectorized=False for total corr. or rel. inf. entropy.
warnings.warn("I set vectorized=False for total corr. or rel. inf. entropy.")
[[0.]
[0.]
[1.]] [0 2]
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([-13.31418797]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc4668d0>}
Acquisition function ucb and method global
led to new suggestion:
{'x': array([[0.01092604]]), 'f_a(x)': array([1.71312063]), 'opt_obj': None}
Acquisition function ucb and method local
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([1.80564862]), 'opt_obj': None}
Acquisition function ucb and method hgdl
[[0. ]
[0. ]
[0.58714818]] [0 2]
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([1.80564862]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc54f090>}
Acquisition function lcb and method global
led to new suggestion:
{'x': array([[0.91488328]]), 'f_a(x)': array([2.33527251]), 'opt_obj': None}
Acquisition function lcb and method local
led to new suggestion:
{'x': array([[0.97750523]]), 'f_a(x)': array([2.09408102]), 'opt_obj': None}
Acquisition function lcb and method hgdl
[[0.91392672]
[0. ]
[0. ]] [0 1]
led to new suggestion:
{'x': array([[0.91392672]]), 'f_a(x)': array([2.33538924]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc4808d0>}
Acquisition function maximum and method global
led to new suggestion:
{'x': array([[0.01055686]]), 'f_a(x)': array([1.19842269]), 'opt_obj': None}
Acquisition function maximum and method local
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([1.22146488]), 'opt_obj': None}
Acquisition function maximum and method hgdl
[[0. ]
[0. ]
[0.59214898]] [0 2]
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([1.22146488]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc3ba750>}
Acquisition function minimum and method global
led to new suggestion:
{'x': array([[0.91764303]]), 'f_a(x)': array([1.97259604]), 'opt_obj': None}
Acquisition function minimum and method local
led to new suggestion:
{'x': array([[0.4958421]]), 'f_a(x)': array([-0.90223865]), 'opt_obj': None}
Acquisition function minimum and method hgdl
[[0.91987756]
[0.91987266]
[0.29612331]] [0 2]
led to new suggestion:
{'x': array([[0.91987756]]), 'f_a(x)': array([1.97330232]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc26f990>}
Acquisition function gradient and method global
led to new suggestion:
{'x': array([[0.75620085]]), 'f_a(x)': array([2.11345523]), 'opt_obj': None}
Acquisition function gradient and method local
led to new suggestion:
{'x': array([[0.22307024]]), 'f_a(x)': array([0.57545033]), 'opt_obj': None}
Acquisition function gradient and method hgdl
[[1. ]
[0.44676935]
[0. ]] [0 1 2]
led to new suggestion:
{'x': array([[1.]]), 'f_a(x)': array([1.898231]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc26ef50>}
Acquisition function expected improvement and method global
led to new suggestion:
{'x': array([[0.00768814]]), 'f_a(x)': array([0.07085537]), 'opt_obj': None}
Acquisition function expected improvement and method local
led to new suggestion:
{'x': array([[0.]]), 'f_a(x)': array([0.0776852]), 'opt_obj': None}
Acquisition function expected improvement and method hgdl
[[0.70688653]] [0]
led to new suggestion:
{'x': array([[0.70688653]]), 'f_a(x)': array([0.06185754]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdd046575d0>}
Acquisition function probability of improvement and method global
led to new suggestion:
{'x': array([[0.00607915]]), 'f_a(x)': array([0.06767992]), 'opt_obj': None}
Acquisition function probability of improvement and method local
led to new suggestion:
{'x': array([[0.99540587]]), 'f_a(x)': array([6.55517465e-59]), 'opt_obj': None}
Acquisition function probability of improvement and method hgdl
[[0.46537302]
[0.36041773]
[0.94710946]] [0 1 2]
led to new suggestion:
{'x': array([[0.46537302]]), 'f_a(x)': array([2.35029894e-10]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc24ac50>}
Acquisition function target probability and method global
led to new suggestion:
{'x': array([[0.00609515]]), 'f_a(x)': array([-0.39361315]), 'opt_obj': None}
Acquisition function target probability and method local
led to new suggestion:
{'x': array([[0.52489165]]), 'f_a(x)': array([-0.49987099]), 'opt_obj': None}
Acquisition function target probability and method hgdl
[[0.17431864]
[0.38788133]
[0.22415321]] [0 1 2]
led to new suggestion:
{'x': array([[0.17431864]]), 'f_a(x)': array([-0.5]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc29ab10>}
Acquisition function total correlation and method global
led to new suggestion:
{'x': array([[0.5930533]]), 'f_a(x)': array([-3.35837001]), 'opt_obj': None}
Acquisition function total correlation and method local
led to new suggestion:
{'x': array([[1.]]), 'f_a(x)': array([-5.01228809]), 'opt_obj': None}
Acquisition function total correlation and method hgdl
[[0.59387004]
[0.04525782]
[0.94852289]] [0 1 2]
led to new suggestion:
{'x': array([[0.59387004]]), 'f_a(x)': array([-3.35710405]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc487190>}
#here we can test other options of the ask() command
bounds = np.array([[0.0,1.0]])
new_suggestion = my_gpo.ask(bounds, acquisition_function="total_correlation", method="global",
max_iter=10, n = 5, info = True)
my_gpo.ask(bounds, n = 5, acquisition_function="variance", vectorized=True, method = 'global')
my_gpo.ask(bounds, n = 1, acquisition_function="relative information entropy", vectorized=True, method = 'global')
my_gpo.ask(bounds, n = 2, acquisition_function="expected improvement", vectorized=True, method = 'global')
my_gpo.ask(bounds, n = 1, acquisition_function="variance", vectorized=True, method = 'global')
my_gpo.ask(bounds, n = 3, acquisition_function="variance", vectorized=True, method = 'hgdl', dask_client=client)
print(new_suggestion)
differential_evolution step 1: f(x)= 19.28897594723182
differential_evolution step 2: f(x)= 19.28897594723182
differential_evolution step 3: f(x)= 18.97703000490162
differential_evolution step 4: f(x)= 18.97703000490162
/home/marcus/Coding/gpCAM/gpcam/gp_optimizer_base.py:429: UserWarning: You specified n>1 and method != 'hgdl' in ask(). The acquisition function has therefore been changed to 'total correlation'.
warnings.warn("You specified n>1 and method != 'hgdl' in ask(). The acquisition function "
differential_evolution step 5: f(x)= 18.97703000490162
differential_evolution step 6: f(x)= 18.97703000490162
differential_evolution step 7: f(x)= 18.104923873417363
differential_evolution step 8: f(x)= 18.104923873417363
differential_evolution step 9: f(x)= 18.104923873417363
differential_evolution step 10: f(x)= 18.104923873417363
[[1. ]
[0. ]
[0. ]
[0.70688654]] [0 1 3]
{'x': array([[0.31445645],
[0.37910438],
[0.01420872],
[0.99200963],
[0.71480817]]), 'f_a(x)': array([-18.10492387]), 'opt_obj': None}
#we can evaluate the acqisiiton function on batches of candidates in parallel:
candidates = np.random.uniform(low = bounds[:,0], high=bounds[:,1], size = (30,1))
candidate_list = [entry for entry in candidates]
#ask sequentially
print("suggestions=", my_gpo.ask(candidate_list, n = 30, acquisition_function="variance", vectorized=False)["x"][0])
#ask in parallel on DASK workers, but sequentially on each worker:
print("suggestions=", my_gpo.ask(candidate_list, n = 30, acquisition_function="variance", vectorized=False, batch_size = 10, dask_client=client)["x"][0])
#ask in parallel on DASK workers, and vectorized (if possible) on each worker:
print("suggestions=", my_gpo.ask(candidate_list, n = 30, acquisition_function="variance", vectorized=True, batch_size = 10, dask_client=client)["x"][0])
#ask vectorized (if possible):
print("suggestions=", my_gpo.ask(candidate_list, n = 30, acquisition_function="variance", vectorized=True)["x"][0])
print("They should be the same!")
suggestions= [0.9934295]
suggestions= [0.9934295]
suggestions= [0.9934295]
suggestions= [0.9934295]
They should be the same!
bounds = np.array([[0.0,1.0]])
#You can even start an ask() search asynchronously and check back later what was found
new_suggestion = my_gpo.ask(bounds, acquisition_function=acquisition_functions[0], method="hgdlAsync", dask_client=client)
time.sleep(10)
print(new_suggestion)
new_suggestion["opt_obj"].kill_client()
{'x': array([[0.]]), 'f_a(x)': array([-0.]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fdcfc40e790>}
[{'x': array([1.]),
'f(x)': np.float64(-0.19684370076146326),
'classifier': 'zero curvature',
'Hessian eigvals': array([0.]),
'df/dx': array([-3.0522374]),
'|df/dx|': np.float64(3.0522373994923413),
'radius': np.float64(0.0)},
{'x': array([1.]),
'f(x)': np.float64(-0.19684370076146326),
'classifier': 'zero curvature',
'Hessian eigvals': array([0.]),
'df/dx': array([-3.0522374]),
'|df/dx|': np.float64(3.0522373994923413),
'radius': np.float64(0.0)},
{'x': array([0.]),
'f(x)': np.float64(-0.1947279134038548),
'classifier': 'degenerate',
'Hessian eigvals': array([0.]),
'df/dx': array([2.55496837]),
'|df/dx|': np.float64(2.554968372731059),
'radius': np.float64(0.0)},
{'x': array([0.70688652]),
'f(x)': np.float64(-0.15505385384237302),
'classifier': 'minimum',
'Hessian eigvals': array([38.66676423]),
'df/dx': array([-7.31081862e-07]),
'|df/dx|': np.float64(7.310818617156656e-07),
'radius': np.float64(0.025862003707487573)}]