gpCAM Advanced Application

In this notebook, we will go through many features of gpCAM. Work through it and you are ready for your own autonomous experiment. This notebook uses gpCAM version 8.0.3!

####install gpcam here if you do not have already done so
#!pip install gpcam==8.0.3

Preparations

%load_ext autoreload
%autoreload 2
import plotly.graph_objects as go
import numpy as np
def plot(x,y,z,data = None):
    fig = go.Figure()
    fig.add_trace(go.Surface(x = x, y = y,z=z))
    if data is not None: 
        fig.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2],
                                   mode='markers'))

    fig.update_layout(title='Posterior Mean', autosize=True,
                  width=800, height=800,
                  margin=dict(l=65, r=50, b=65, t=90))


    fig.show()

Defining Prediction Points

x_pred = np.zeros((10000,2))
x = np.linspace(0,10,100)
y = np.linspace(0,10,100)
x,y = np.meshgrid(x,y)
counter = 0
for i in  range(100):
    for j in range(100):
        x_pred[counter] = np.array([x[i,j],y[i,j]])
        counter += 1

Definition of Optional Customization Functions

def optional_acq_func(x,obj):
    #this acquisition function makes the autonomous experiment a Bayesian optimization
    #but is just here as an example. 'acq_funciton="ucb"' will give you the same result
    a = 3.0 #3.0 for 95 percent confidence interval
    mean = obj.posterior_mean(x)["f(x)"]
    cov = obj.posterior_covariance(x)["v(x)"]
    return mean + a * np.sqrt(cov)

def optional_mean_func(x,hyperparameters,gp_obj):
    #the prior mean function should return a vector: a mean function evaluation for every x
    return np.zeros((len(x)))

def optional_cost_function(origin,x,arguments = None):
    #cost pf l1 motion in the input space
    offset = arguments["offset"]
    slope = arguments["slope"]
    d = np.abs(np.subtract(origin,x))
    c = (d * slope) + offset
    n = np.sum(c)
    return n

def optional_cost_update_function(costs, parameters):
    ###defining a cost update function might look tricky but just needs a bit
    ###of tenacity. And remember, this is optional, if you have a great guess for your costs you
    ###don't need to update the costs. Also, if you don't account for costs, this function is not needed.
    #In this example we just return the old parameters, but print the costs. 
    #I hope it is clear how the parameters can be fitted to the recorded costs.
    print("recorded costs (from,to,costs): ", costs)
    
    return parameters

AutonomousExperimenter Initialization

import time
from gpcam import AutonomousExperimenterGP

def instrument(data):
    print("Suggested by gpCAM: ")
    for entry in data:
        print("suggested:", entry["x_data"])
        entry["y_data"] = np.sin(np.linalg.norm(entry["x_data"]))
        entry["cost"]  = [np.array([0,0]),entry["x_data"],np.sum(entry["x_data"])]
        print("received: ", entry["y_data"])
    print("")
    return data

#initialization
#feel free to try different acquisition functions, e.g. optional_acq_func, "covariance", "shannon_ig"
#note how costs are defined in for the autonomous experimenter
my_ae = AutonomousExperimenterGP(np.array([[0,10],[0,10]]),
                                 np.ones((3)), np.array([[0.001,100.],[0.001,100.],[0.001,100.]]),
                                 init_dataset_size= 20, instrument_function = instrument,
                                 acquisition_function = optional_acq_func, 
                                 cost_function = optional_cost_function, 
                                 cost_update_function = optional_cost_update_function,
                                 cost_function_parameters={"offset": 5.0,"slope":10.0},
                                 kernel_function = None, store_inv = True,
                                 prior_mean_function = optional_mean_func,
                                 communicate_full_dataset = False, ram_economy = True)#, info = False, prior_mean_func = optional_mean_func)


print("length of the dataset: ",len(my_ae.x_data))


#my_ae.train_async()                 #train asynchronously
my_ae.train(method = "global")       #or not, or both, choose between "global","local" and "hgdl"
#update hyperparameters in case they are optimized asynchronously
my_ae.update_hps()
print(my_ae.gp_optimizer.hyperparameters)
#training and client can be killed if desired and in case they are optimized asynchronously
my_ae.kill_training()

Initial Model Vizualization

f = my_ae.gp_optimizer.posterior_mean(x_pred)["f(x)"]
f_re = f.reshape(100,100)

plot(x,y,f_re, data = np.column_stack([my_ae.x_data,my_ae.y_data]))

The go() Command

#run the autonomous loop
my_ae.go(N = 100, 
            retrain_async_at=[30,40,50,60,70,80,90],
            retrain_globally_at = [20,22,24,26,28,30,40,50,60,70],
            retrain_locally_at = [21,22,56,78],
            acq_func_opt_setting = lambda obj: "global" if len(obj.data.dataset) % 2 == 0 else "local",
            update_cost_func_at = (50,),
            training_opt_max_iter = 20,
            training_opt_pop_size = 10,
            training_opt_tol      = 1e-6,
            acq_func_opt_max_iter = 20,
            acq_func_opt_pop_size = 20,
            acq_func_opt_tol      = 1e-6,
            number_of_suggested_measurements = 1,
            acq_func_opt_tol_adjust = 0.1)

Visualization of the Resulting Model

res = my_ae.gp_optimizer.posterior_mean(x_pred)
f = res["f(x)"]
f = f.reshape(100,100)

plot(x,y,f, data = np.column_stack([my_ae.x_data,my_ae.y_data]))

Running a Multi-Task GP Autonomous Data Acquisition

This example uses 21 (!) dim robot data and 7 tasks, which you can all use or pick a subset of them

##prepare some data
import numpy as np
from scipy.interpolate import griddata
data = np.load("./data/sarcos.npy")
print(data.shape)
x = data[:,0:21]
y = data[:,21:23]
from gpcam import AutonomousExperimenterFvGP

def instrument(data, instrument_dict = {}):
    for entry in data:
        print("Suggested by gpCAM: ", entry["x_data"])
        entry["y_data"] = griddata(x,y,entry["x_data"],method = "nearest", fill_value = 0)[0]
        entry["output positions"] = np.array([[0],[1]])
        print("received: ", entry["y_data"])
    print("")
    return data

def acq_func(x,obj):
    #multi-tast autonomous experiments should make use of a user-defined acquisition function to
    #make full use of the surrogate and the uncertainty in all tasks.
    a = 3.0 #3.0 for ~95 percent confidence interval
    x = np.block([[x,np.zeros((len(x))).reshape(-1,1)],[x,np.ones((len(x))).reshape(-1,1)]]) #for task 0 and 1
    mean = obj.posterior_mean(x)["f(x)"]
    cov = obj.posterior_covariance(x)["v(x)"]
    #it takes a little bit of wiggling to get the tasks seperated and then merged again...
    task0index = np.where(x[:,21] == 0.)[0]
    task1index = np.where(x[:,21] == 1.)[0]
    mean_task0 = mean[task0index]
    mean_task1 = mean[task1index]
    cov_task0 = cov[task0index]
    cov_task1 = cov[task1index]
    mean = np.column_stack([mean_task0,mean_task1])
    cov  = np.column_stack([cov_task0 ,cov_task1 ])
    #and now we are interested in the l2 norm of the mean and variance at each input location.
    return np.linalg.norm(mean, axis = 1) + a * np.linalg.norm(cov,axis = 1)


input_s = np.array([np.array([np.min(x[:,i]),np.max(x[:,i])]) for i in range(len(x[0]))])
print("index set (input space) bounds:")
print(input_s)
print("hps bounds:")
hps_bounds = np.empty((23,2))
hps_bounds[:,0] = 0.0001
hps_bounds[:,1] = 100.0
hps_bounds[0] = np.array([0.0001, 10000])
print(hps_bounds)
print("shape of y: ")
print(y.shape)

my_fvae = AutonomousExperimenterFvGP(input_s,2,1,
                                     init_dataset_size= 10, instrument_function = instrument, \
                                     acquisition_function=acq_func)


my_fvae.train()
my_fvae.go(N = 50, retrain_async_at=(22,), retrain_globally_at=(50,90,120), retrain_locally_at=(25,))

Plotting the 0th task in a 2d slice

x_pred = np.zeros((10000,22))
x = np.linspace(input_s[0,0],input_s[0,1],100)
y = np.linspace(input_s[1,0],input_s[1,1],100)
x,y = np.meshgrid(x,y)
counter = 0
for i in  range(100):
    for j in range(100):
        x_pred[counter] = np.zeros((22))
        x_pred[counter,[0,1,-1]] = np.array([x[i,j],y[i,j],0.])
        counter += 1
res = my_fvae.gp_optimizer.posterior_mean(x_pred)
f = res["f(x)"]
f = f.reshape(100,100)

print(f)

plot(x,y,f)