gpOptimizer Multi-Task Test#
At first we have to install the newest version of gpCAM
## First, install the newest version of gpcam and some plotting tools
#!pip install gpCAM==8.3.5
#!pip install matplotlib
#!pip install plotly
Setup#
import numpy as np
import matplotlib.pyplot as plt
from gpcam import fvGPOptimizer
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "png"
from itertools import product
from distributed import Client
client = Client()
%load_ext autoreload
%autoreload 2
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/distributed/node.py:187: UserWarning:
Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 45409 instead
Simple 1d Example#
Data#
def f1(x): return 0.5 * x
def f2(x): return (-.25 * x) - 1.
x_pred1d = np.linspace(0,1,50)
plt.plot(x_pred1d,f1(x_pred1d))
plt.plot(x_pred1d,f2(x_pred1d))
x_data = np.random.rand(15)
y_data1 = f1(x_data) + np.random.uniform(low = -0.01, high = 0.01, size =len(x_data))
y_data2 = f2(x_data) + np.random.uniform(low = -0.01, high = 0.01, size =len(x_data))
plt.scatter(x_data,y_data1)
plt.scatter(x_data,y_data2)
plt.show()
GP initialization#
my_gp2 = fvGPOptimizer(x_data.reshape(len(x_data),1), np.column_stack([y_data1, y_data2]))
print("Hybrid Training in progress")
my_gp2.train(max_iter = 20, method = "hgdl", dask_client=client)
print("MCMC Training in progress")
my_gp2.train(max_iter = 20, method = "mcmc")
print("Local Training in progress")
my_gp2.train(max_iter = 20, method = "local")
print("Global Training in progress")
my_gp2.train(max_iter = 20, method="global")
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:276: UserWarning:
Hyperparameters initialized to a vector of ones.
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:310: UserWarning:
No noise function or measurement noise provided. Noise variances will be set to (0.01 * mean(|y_data|))^2.
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:611: UserWarning:
Default hyperparameter_bounds initialized because none were provided. This will fail for custom kernel, mean, or noise functions
Hybrid Training in progress
MCMC Training in progress
Local Training in progress
Global Training in progress
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:611: UserWarning:
Default hyperparameter_bounds initialized because none were provided. This will fail for custom kernel, mean, or noise functions
array([1.42934675, 9.37176519, 0.08434703])
Predictions#
#mean and standard deviation
mean = my_gp2.posterior_mean(x_pred=x_pred1d.reshape(50,1), x_out=np.array([0,1]))["m(x)"]
std = np.sqrt(my_gp2.posterior_covariance(x_pred=x_pred1d.reshape(50,1), x_out=np.array([0,1]))["v(x)"])
plt.plot(x_pred1d.reshape(50,1),mean[:,0], label = "mean task 1")
plt.plot(x_pred1d.reshape(50,1),mean[:,1], label = "mean task 2")
plt.scatter(x_data,y_data1)
plt.scatter(x_data,y_data2)
plt.plot(x_pred1d,f1(x_pred1d), label = "task 1 ground truth")
plt.plot(x_pred1d,f2(x_pred1d), label = "task 2 ground truth")
plt.fill_between(x_pred1d, mean[:,0] - 3. * std[:,0], mean[:,0] + 3. * std[:,0], alpha = 0.5, color = "grey")
plt.fill_between(x_pred1d, mean[:,1] - 3. * std[:,1], mean[:,1] + 3. * std[:,1], alpha = 0.5, color = "grey")
plt.legend()
plt.show()
ask()ing for new actions#
print("asking for the best 2 candidates out of 3:")
print(my_gp2.ask([np.array([0.0]), np.array([0.2]), np.array([1.])], n=2))
print("asking for the best new suggestion across the space [0,1], vectorized=True (default):")
print(my_gp2.ask(np.array([[0.,1.]]), n=1))
print("asking for the best new suggestion across the space [0,1], with vectorization for the global optimizer:")
print(my_gp2.ask(np.array([[0.,1.]]), n=1, vectorized = False))
print("asking for the best new 4 suggestions across the space [0,1] with total correlation:")
print(my_gp2.ask(np.array([[0.,1.]]), n=4, vectorized = False, acquisition_function="total correlation"))
print("asking for an optimized result")
print(my_gp2.ask(np.array([[0.,1.]]), n=30, method = "hgdl", max_iter=100, dask_client=client))
asking for the best 2 candidates out of 3:
{'x': array([[0.],
[1.]]), 'f_a(x)': array([8.5536671e-05, 6.5916102e-05]), 'opt_obj': None}
asking for the best new suggestion across the space [0,1], vectorized=True (default):
{'x': array([[1.65913624e-06]]), 'f_a(x)': array([8.55344253e-05]), 'opt_obj': None}
asking for the best new suggestion across the space [0,1], with vectorization for the global optimizer:
{'x': array([[3.48608531e-06]]), 'f_a(x)': array([8.55319526e-05]), 'opt_obj': None}
asking for the best new 4 suggestions across the space [0,1] with total correlation:
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/gpcam/gp_optimizer_base.py:433: UserWarning:
Set vectorized=False for total corr. or rel. inf. entropy.
{'x': array([[0.18247251],
[0.00364221],
[0.34409772],
[0.39514323]]), 'f_a(x)': array([-56.88813621]), 'opt_obj': None}
asking for an optimized result
[[0.216049 ]
[0.44706669]
[0.13822277]] [0 1 2]
{'x': array([[0.216049 ],
[0.44706669],
[0.13822277]]), 'f_a(x)': array([4.28209191e-05, 3.93887565e-05, 3.09976183e-05]), 'opt_obj': <hgdl.hgdl.HGDL object at 0x7fcb500c49d0>}
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/gpcam/surrogate_model.py:118: UserWarning:
An insufficient number of unique optima identified. Try `total correlation` or the use of candidates by providing them as a list to ask().
plt.plot(x_pred1d, my_gp2.evaluate_acquisition_function(x_pred1d))
[<matplotlib.lines.Line2D at 0x7fcb18edbed0>]
#more ask()s to test:
my_gp2.ask(np.array([[0.,1.]]), n = 1, acquisition_function = 'relative information entropy set', x_out = np.array([0.,1.,2.]), vectorized = True)
my_gp2.ask(np.array([[0.,1.]]), n = 1, acquisition_function = 'relative information entropy', x_out = np.array([0.,1.,2.]), vectorized = True)
my_gp2.ask(np.array([[0.,1.]]), n = 1, acquisition_function = 'variance', x_out = np.array([0.,1.,2.]), vectorized = True)
my_gp2.ask(np.array([[0.,1.]]), n = 1, acquisition_function = 'total correlation', x_out = np.array([0.,1.,2.]), vectorized = True)
my_gp2.ask(np.array([[0.,1.]]), n = 4, acquisition_function = 'relative information entropy set', x_out = np.array([0.,1.,2.]), vectorized = True)
my_gp2.ask(np.array([[0.,1.]]), n = 5, acquisition_function = 'relative information entropy', x_out = np.array([0.,1.,2.]), vectorized = True)
my_gp2.ask(np.array([[0.,1.]]), n = 6, acquisition_function = 'variance', x_out = np.array([0.,1.,2.]), vectorized = True)
my_gp2.ask(np.array([[0.,1.]]), n = 2, acquisition_function = 'total correlation', x_out = np.array([0.,1.,2.]), vectorized = True)
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/gpcam/gp_optimizer_base.py:429: UserWarning:
You specified n>1 and method != 'hgdl' in ask(). The acquisition function has therefore been changed to 'total correlation'.
{'x': array([[0.35855907],
[0.0012013 ]]),
'f_a(x)': array([-30.16109881]),
'opt_obj': None}
Communicating data points with missing tasks as lists#
y_data = np.column_stack([y_data1, y_data2])
noise_variances = np.zeros(y_data.shape) + 0.01
y_data[2,0] = np.nan
noise_variances[2,0] = np.nan
y_data[6,1] = np.nan
noise_variances[6,1] = np.nan
my_gp2 = fvGPOptimizer(x_data.reshape(len(x_data),1), y_data, noise_variances=noise_variances)
print("Global Training in progress")
my_gp2.train(max_iter = 20)
Global Training in progress
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:276: UserWarning:
Hyperparameters initialized to a vector of ones.
array([0.33991123, 2.6001799 , 0.83184212])
2d Example#
Data#
data = np.load("./data/sim_variable_mod.npy")
sparsification = 4
x_data3 = data[:,5:][::sparsification]
y_data3 = data[:,0:2][::sparsification]
#it is good practice to check the format of the data
print(x_data3.shape)
print(y_data3.shape)
(1583, 3)
(1583, 2)
index = np.where(x_data3[:,2] == 1200.)[0]
x_data3=x_data3[index,0:2]
y_data3=y_data3[index]
for i in range(x_data3.shape[1]):
x_data3[:,i] = x_data3[:,i] - np.min(x_data3[:,i])
x_data3[:,i] = x_data3[:,i] / np.max(x_data3[:,i])
x = np.linspace(0,1,100)
y = np.linspace(0,1,100)
x_pred3D = np.asarray(list(product(x, y)))
Plotting#
def scatter(x,y,z,size=3, color = 1):
#if not color: color = z
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=x, y=y, z=z,mode='markers',marker=dict(color=color, size = size)))
fig.update_layout(autosize=False,
width=800, height=800,
font=dict(size=18,),
margin=dict(l=0, r=0, b=0, t=0))
fig.show()
scatter(x_data3[:,0],x_data3[:,1],y_data3[:,0], size = 5, color = y_data3[:,0])
scatter(x_data3[:,0],x_data3[:,1],y_data3[:,1], size = 5, color = y_data3[:,1])
A simple kernel definition#
It is vital in the multi-task case to think hard about kernel design. The kernel is now a function over X x X x T x T, where X is the input and T is the output space. Print the input into the kernel, it will have the dimensionality of this cartesian product space. The default kernel is just a Matern kernel in input and output directions.
#As imple kernel, that won't lead to good performance because its stationary
def mkernel(x1,x2,hps,obj):
d = obj.get_distance_matrix(x1,x2)
return hps[0] * obj.matern_kernel_diff1(d,hps[1])
Initialization#
(a) Default behavior#
from fvgp import fvGP
my_gp2 = fvGPOptimizer(x_data3,y_data3,)
print("Global Training in progress")
my_gp2.train(max_iter = 2)
Global Training in progress
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:276: UserWarning:
Hyperparameters initialized to a vector of ones.
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:310: UserWarning:
No noise function or measurement noise provided. Noise variances will be set to (0.01 * mean(|y_data|))^2.
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:611: UserWarning:
Default hyperparameter_bounds initialized because none were provided. This will fail for custom kernel, mean, or noise functions
array([0.17501628, 7.09465931, 7.45394069, 0.32026355])
(b) Simple custom kernel#
#A simple kernel, that won't lead to good performance because it's stationary
from gpcam.kernels import *
def mkernel(x1,x2,hps):
d = get_distance_matrix(x1,x2)
return hps[0] * matern_kernel_diff1(d,hps[1])
my_gp2 = fvGPOptimizer(x_data3,y_data3,
init_hyperparameters=np.ones((2)), kernel_function=mkernel
)
print("Global Training in progress")
bounds = np.array([[0.01,1.],[0.01,1.]])
my_gp2.train(hyperparameter_bounds=bounds,max_iter = 2)
Global Training in progress
array([0.95560859, 0.97877691])
(c) Deep kernel#
from gpcam.deep_kernel_network import *
iset_dim = 3
gp_deep_kernel_layer_width = 5
n = Network(iset_dim, gp_deep_kernel_layer_width)
print(n.number_of_hps)
def deep_multi_task_kernel(x1, x2, hps): # pragma: no cover
signal_var = hps[0]
length_scale = hps[1]
hps_nn = hps[2:]
w1_indices = np.arange(0, gp_deep_kernel_layer_width * iset_dim)
last = gp_deep_kernel_layer_width * iset_dim
w2_indices = np.arange(last, last + gp_deep_kernel_layer_width ** 2)
last = last + gp_deep_kernel_layer_width ** 2
w3_indices = np.arange(last, last + gp_deep_kernel_layer_width * iset_dim)
last = last + gp_deep_kernel_layer_width * iset_dim
b1_indices = np.arange(last, last + gp_deep_kernel_layer_width)
last = last + gp_deep_kernel_layer_width
b2_indices = np.arange(last, last + gp_deep_kernel_layer_width)
last = last + gp_deep_kernel_layer_width
b3_indices = np.arange(last, last + iset_dim)
n.set_weights(hps_nn[w1_indices].reshape(gp_deep_kernel_layer_width, iset_dim),
hps_nn[w2_indices].reshape(gp_deep_kernel_layer_width, gp_deep_kernel_layer_width),
hps_nn[w3_indices].reshape(iset_dim, gp_deep_kernel_layer_width))
n.set_biases(hps_nn[b1_indices].reshape(gp_deep_kernel_layer_width),
hps_nn[b2_indices].reshape(gp_deep_kernel_layer_width),
hps_nn[b3_indices].reshape(iset_dim))
x1_nn = n.forward(x1)
x2_nn = n.forward(x2)
d = get_distance_matrix(x1_nn, x2_nn)
k = signal_var * matern_kernel_diff1(d, length_scale)
return k
my_gp2 = fvGPOptimizer(x_data3,y_data3,
init_hyperparameters=np.ones((n.number_of_hps+2))*0.1, kernel_function=deep_multi_task_kernel
)
print("Global Training in progress")
bounds = np.zeros((n.number_of_hps+2,2))
bounds[0] = np.array([0.01,1.])
bounds[1] = np.array([0.1,1.])
bounds[2:] = np.array([-1,1])
my_gp2.train(hyperparameter_bounds=bounds,max_iter = 1000, method = "mcmc")
68
Global Training in progress
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp.py:310: UserWarning:
No noise function or measurement noise provided. Noise variances will be set to (0.01 * mean(|y_data|))^2.
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/fvgp/gp_mcmc.py:197: RuntimeWarning:
overflow encountered in exp
array([ 0.41790741, 0.2185018 , 0.12443581, -0.15450857, 0.14636568,
0.13243972, -0.33540748, 0.18862517, 0.4687587 , 0.19025292,
-0.08167761, 0.55493528, 0.64305148, 0.1127721 , -0.05096201,
-0.1545291 , 0.12360014, 0.20304214, -0.24424625, -0.22703699,
0.18183372, 0.02061596, -0.03412911, 0.29741794, -0.14908624,
0.30905425, -0.87057986, 0.1399316 , 0.77424055, 0.79463019,
0.52448388, 0.04086568, 0.81137773, 0.6840117 , -0.00098482,
-0.24615794, 0.47835083, -0.81512937, -0.08126196, -0.21573686,
-0.31345572, 0.0412614 , 0.24389149, 0.11930985, 0.07498446,
0.40086911, 0.14493775, 0.01227921, 0.06623487, 0.73356747,
0.17783797, 0.13492328, 0.23448295, -0.15410022, 0.50363847,
-0.6786774 , 0.81082115, 0.51388268, 0.43202967, 0.10978704,
0.18260347, 0.48750375, 0.27970894, 0.43627055, 0.24939099,
-0.01554257, 0.44839753, 0.61384258, 0.04020806, 0.17561932])
Prediction#
mean = my_gp2.posterior_mean(x_pred3D)["m(x)"]
var = my_gp2.posterior_covariance(x_pred3D)["v(x)"]
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=x_pred3D[:,0],y=x_pred3D[:,1], z=mean[:,0],
mode='markers',marker=dict(color=mean[:,0], size = 5)))
fig.add_trace(go.Scatter3d(x=x_data3[:,0], y=x_data3[:,1] , z=y_data3[:,0],
mode='markers',marker=dict(color=y_data3[:,0], size = 5)))
fig.update_layout(autosize=False,
width=800, height=800,
font=dict(size=18,),
margin=dict(l=0, r=0, b=0, t=0))
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=x_pred3D[:,0],y=x_pred3D[:,1], z=mean[:,1],
mode='markers',marker=dict(color=mean[:,0], size = 5)))
fig.add_trace(go.Scatter3d(x=x_data3[:,0], y=x_data3[:,1] , z=y_data3[:,1],
mode='markers',marker=dict(color=y_data3[:,1], size = 5)))
fig.update_layout(autosize=False,
width=800, height=800,
font=dict(size=18,),
margin=dict(l=0, r=0, b=0, t=0))
fig.show()
print("n=1")
my_gp2.ask(np.array([[0,1],[0,1]]), n = 1, max_iter=2,pop_size=2, info = True,
acquisition_function = 'relative information entropy set', x_out = np.array([0.,1.]), vectorized = True)
my_gp2.ask(np.array([[0,1],[0,1]]), n = 1, max_iter=2,pop_size=2, info = True,
acquisition_function = 'relative information entropy', x_out = np.array([0.,1.]), vectorized = True)
my_gp2.ask(np.array([[0,1],[0,1]]), n = 1, max_iter=2,pop_size=2,info = True,
acquisition_function = 'variance', x_out = np.array([0.,1.]), vectorized = True)
my_gp2.ask(np.array([[0,1],[0,1]]), n = 1, max_iter=2,pop_size=2,info = True,
acquisition_function = 'total correlation', x_out = np.array([0.,1.]), vectorized = True)
print("n>1")
my_gp2.ask(np.array([[0,1],[0,1]]), n = 4, max_iter=2,pop_size=2,info = True,
acquisition_function = 'relative information entropy set', x_out = np.array([0.,1.]), vectorized = True)
my_gp2.ask(np.array([[0,1],[0,1]]), n = 5, max_iter=2,pop_size=2,info = True,
acquisition_function = 'relative information entropy', x_out = np.array([0.]), vectorized = True)
my_gp2.ask(np.array([[0,1],[0,1]]), n = 3, max_iter=2,pop_size=2,info = True,
acquisition_function = 'variance', x_out = np.array([0.,1.]), vectorized = True)
my_gp2.ask(np.array([[0,1],[0,1]]), n = 2, max_iter=2,pop_size=2,info = True,
acquisition_function = 'total correlation', vectorized = True)
n=1
differential_evolution step 1: f(x)= 63.42387620216348
differential_evolution step 2: f(x)= 25.17070877959375
differential_evolution step 1: f(x)= 44.74877986332019
differential_evolution step 2: f(x)= 44.74877986332019
differential_evolution step 1: f(x)= -0.00909680733164231
differential_evolution step 2: f(x)= -0.00909680733164231
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/gpcam/gp_optimizer_base.py:433: UserWarning:
Set vectorized=False for total corr. or rel. inf. entropy.
differential_evolution step 1: f(x)= 2.0248602475180633
differential_evolution step 2: f(x)= 2.0248602475180633
n>1
/home/marcus/VirtualEnvironments/gpcam_dev/lib/python3.11/site-packages/gpcam/gp_optimizer_base.py:429: UserWarning:
You specified n>1 and method != 'hgdl' in ask(). The acquisition function has therefore been changed to 'total correlation'.
differential_evolution step 1: f(x)= 19.856044975582677
differential_evolution step 2: f(x)= 17.823313161809324
differential_evolution step 1: f(x)= 1426.1306144189923
differential_evolution step 2: f(x)= 516.6978539918208
differential_evolution step 1: f(x)= 13.882637130581998
differential_evolution step 2: f(x)= 13.882637130581998
differential_evolution step 1: f(x)= 7.040204821890029
differential_evolution step 2: f(x)= 6.112501761958583
{'x': array([[0.04489424, 0.97791073],
[0.06776132, 0.11414868]]),
'f_a(x)': array([-6.11250176]),
'opt_obj': None}