gpCAM Advanced Application

In this notebook, we will go through many features of gpCAM. Work through it and you are ready for your own autonomous experiment. This notebook uses gpCAM version 8.0.3!

####install gpcam here if you do not have already done so
#!pip install gpcam==8.0.3

Preparations

%load_ext autoreload
%autoreload 2

import plotly.graph_objects as go
import numpy as np
def plot(x,y,z,data = None):
    fig = go.Figure()
    fig.add_trace(go.Surface(x = x, y = y,z=z))
    if data is not None: 
        fig.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2],
                                   mode='markers'))

    fig.update_layout(title='Posterior Mean', autosize=True,
                  width=800, height=800,
                  margin=dict(l=65, r=50, b=65, t=90))


    fig.show()

Defining Prediction Points

x_pred = np.zeros((10000,2))
x = np.linspace(0,10,100)
y = np.linspace(0,10,100)
x,y = np.meshgrid(x,y)
counter = 0
for i in  range(100):
    for j in range(100):
        x_pred[counter] = np.array([x[i,j],y[i,j]])
        counter += 1

Definition of Optional Customization Functions

def optional_acq_func(x,obj):
    #this acquisition function makes the autonomous experiment a Bayesian optimization
    #but is just here as an example. 'acq_funciton="ucb"' will give you the same result
    a = 3.0 #3.0 for 95 percent confidence interval
    mean = obj.posterior_mean(x)["f(x)"]
    cov = obj.posterior_covariance(x)["v(x)"]
    return mean + a * np.sqrt(cov)

def optional_mean_func(x,hyperparameters,gp_obj):
    #the prior mean function should return a vector: a mean function evaluation for every x
    return np.zeros((len(x)))

def optional_cost_function(origin,x,arguments = None):
    #cost pf l1 motion in the input space
    offset = arguments["offset"]
    slope = arguments["slope"]
    d = np.abs(np.subtract(origin,x))
    c = (d * slope) + offset
    n = np.sum(c)
    return n

def optional_cost_update_function(costs, parameters):
    ###defining a cost update function might look tricky but just needs a bit
    ###of tenacity. And remember, this is optional, if you have a great guess for your costs you
    ###don't need to update the costs. Also, if you don't account for costs, this function is not needed.
    #In this example we just return the old parameters, but print the costs. 
    #I hope it is clear how the parameters can be fitted to the recorded costs.
    print("recorded costs (from,to,costs): ", costs)
    
    return parameters

AutonomousExperimenter Initialization

import time
from gpcam import AutonomousExperimenterGP

def instrument(data):
    print("Suggested by gpCAM: ")
    for entry in data:
        print("suggested:", entry["x_data"])
        entry["y_data"] = np.sin(np.linalg.norm(entry["x_data"]))
        entry["cost"]  = [np.array([0,0]),entry["x_data"],np.sum(entry["x_data"])]
        print("received: ", entry["y_data"])
    print("")
    return data

#initialization
#feel free to try different acquisition functions, e.g. optional_acq_func, "covariance", "shannon_ig"
#note how costs are defined in for the autonomous experimenter
my_ae = AutonomousExperimenterGP(np.array([[0,10],[0,10]]),
                                 np.ones((3)), np.array([[0.001,100.],[0.001,100.],[0.001,100.]]),
                                 init_dataset_size= 20, instrument_function = instrument,
                                 acquisition_function = optional_acq_func, 
                                 cost_function = optional_cost_function, 
                                 cost_update_function = optional_cost_update_function,
                                 cost_function_parameters={"offset": 5.0,"slope":10.0},
                                 kernel_function = None, store_inv = True,
                                 prior_mean_function = optional_mean_func,
                                 communicate_full_dataset = False, ram_economy = True)#, info = False, prior_mean_func = optional_mean_func)


print("length of the dataset: ",len(my_ae.x_data))


#my_ae.train_async()                 #train asynchronously
my_ae.train(method = "global")       #or not, or both, choose between "global","local" and "hgdl"

#update hyperparameters in case they are optimized asynchronously
my_ae.update_hps()
print(my_ae.gp_optimizer.hyperparameters)

#training and client can be killed if desired and in case they are optimized asynchronously
my_ae.kill_training()

Initial Model Vizualization

f = my_ae.gp_optimizer.posterior_mean(x_pred)["f(x)"]
f_re = f.reshape(100,100)

plot(x,y,f_re, data = np.column_stack([my_ae.x_data,my_ae.y_data]))

The go() Command

#run the autonomous loop
my_ae.go(N = 100, 
            retrain_async_at=[30,40,50,60,70,80,90],
            retrain_globally_at = [20,22,24,26,28,30,40,50,60,70],
            retrain_locally_at = [21,22,56,78],
            acq_func_opt_setting = lambda obj: "global" if len(obj.data.dataset) % 2 == 0 else "local",
            update_cost_func_at = (50,),
            training_opt_max_iter = 20,
            training_opt_pop_size = 10,
            training_opt_tol      = 1e-6,
            acq_func_opt_max_iter = 20,
            acq_func_opt_pop_size = 20,
            acq_func_opt_tol      = 1e-6,
            number_of_suggested_measurements = 1,
            acq_func_opt_tol_adjust = 0.1)

Visualization of the Resulting Model

res = my_ae.gp_optimizer.posterior_mean(x_pred)
f = res["f(x)"]
f = f.reshape(100,100)

plot(x,y,f, data = np.column_stack([my_ae.x_data,my_ae.y_data]))

Running a Multi-Task GP Autonomous Data Acquisition

This example uses 21 (!) dim robot data and 7 tasks, which you can all use or pick a subset of them

##prepare some data
import numpy as np
from scipy.interpolate import griddata
data = np.load("./data/sarcos.npy")
print(data.shape)
x = data[:,0:21]
y = data[:,21:23]

from gpcam import AutonomousExperimenterFvGP

def instrument(data, instrument_dict = {}):
    for entry in data:
        print("Suggested by gpCAM: ", entry["x_data"])
        entry["y_data"] = griddata(x,y,entry["x_data"],method = "nearest", fill_value = 0)[0]
        entry["output positions"] = np.array([[0],[1]])
        print("received: ", entry["y_data"])
    print("")
    return data

def acq_func(x,obj):
    #multi-tast autonomous experiments should make use of a user-defined acquisition function to
    #make full use of the surrogate and the uncertainty in all tasks.
    a = 3.0 #3.0 for ~95 percent confidence interval
    x = np.block([[x,np.zeros((len(x))).reshape(-1,1)],[x,np.ones((len(x))).reshape(-1,1)]]) #for task 0 and 1
    mean = obj.posterior_mean(x)["f(x)"]
    cov = obj.posterior_covariance(x)["v(x)"]
    #it takes a little bit of wiggling to get the tasks seperated and then merged again...
    task0index = np.where(x[:,21] == 0.)[0]
    task1index = np.where(x[:,21] == 1.)[0]
    mean_task0 = mean[task0index]
    mean_task1 = mean[task1index]
    cov_task0 = cov[task0index]
    cov_task1 = cov[task1index]
    mean = np.column_stack([mean_task0,mean_task1])
    cov  = np.column_stack([cov_task0 ,cov_task1 ])
    #and now we are interested in the l2 norm of the mean and variance at each input location.
    return np.linalg.norm(mean, axis = 1) + a * np.linalg.norm(cov,axis = 1)


input_s = np.array([np.array([np.min(x[:,i]),np.max(x[:,i])]) for i in range(len(x[0]))])
print("index set (input space) bounds:")
print(input_s)
print("hps bounds:")
hps_bounds = np.empty((23,2))
hps_bounds[:,0] = 0.0001
hps_bounds[:,1] = 100.0
hps_bounds[0] = np.array([0.0001, 10000])
print(hps_bounds)
print("shape of y: ")
print(y.shape)

my_fvae = AutonomousExperimenterFvGP(input_s,2,1,
                                     init_dataset_size= 10, instrument_function = instrument, \
                                     acquisition_function=acq_func)


my_fvae.train()
my_fvae.go(N = 50, retrain_async_at=(22,), retrain_globally_at=(50,90,120), retrain_locally_at=(25,))

Plotting the 0th task in a 2d slice

x_pred = np.zeros((10000,22))
x = np.linspace(input_s[0,0],input_s[0,1],100)
y = np.linspace(input_s[1,0],input_s[1,1],100)
x,y = np.meshgrid(x,y)
counter = 0
for i in  range(100):
    for j in range(100):
        x_pred[counter] = np.zeros((22))
        x_pred[counter,[0,1,-1]] = np.array([x[i,j],y[i,j],0.])
        counter += 1
res = my_fvae.gp_optimizer.posterior_mean(x_pred)
f = res["f(x)"]
f = f.reshape(100,100)

print(f)

plot(x,y,f)