import numpy as np
import scipy.sparse as sparse
from sklearn.gaussian_process.kernels import ConstantKernel, RBF
from sklearn.gaussian_process import GaussianProcessRegressor
from scipy.linalg import block_diag, cho_solve, cholesky
from scipy.optimize import minimize
from .mlfm import BaseMLFM, Dimensions
from collections import namedtuple
from . import util
MLFMSuccApproxFitResults = namedtuple('MLFMSuccApproxFitResults',
'g, beta, logpsi, logphi, loggamma, logtau, logalpha, \
optimres')
class BaseMLFMSuccApprox(BaseMLFM):
is_tt_aug = False
def __init__(self, *args, order=1, **kwargs):
super(BaseMLFMSuccApprox, self).__init__(*args, **kwargs)
if isinstance(order, int) and order > 0:
self.order = order
else:
raise ValueError("The approximation order must be a"
" positive integer")
def _setup_latentstates(self, kernels=None, **kwargs):
"""
Create the initial GP approximation
"""
if kernels is None:
ls_kernels = [ConstantKernel()*RBF() for k in range(self.dim.K)]
self.latentstates = [GaussianProcessRegressor(kern)
for kern in ls_kernels]
else:
self.latentstates = [GaussianProcessRegressor(kern) for kern in ls_kernels]
def _setup_times(self, tt, tt_aug=None, ifix=0, **kwargs):
if tt_aug is not None:
self.ttc = tt_aug.copy()
self._tt_data = tt.copy()
self.data_inds = kwargs['data_inds']
self._Ndata = len(self.data_inds)
self.is_tt_aug = True
else:
self.ttc = tt
self.ifix = ifix
self.dim = Dimensions(self.ttc.size, self.dim.K, self.dim.R, self.dim.D)
self._weight_matrix = weight_matrix(self.ttc, ifix)
def _setup(self, times, **kwargs):
"""Prepares the model for fitting
"""
if not hasattr(self, 'ttc'):
self._setup_times(times, **kwargs)
if not hasattr(self, 'latentstates'):
# no latent states supplied so use default setting
self._setup_latentstates(**kwargs)
def _K(self, g, beta, ifix=None):
""" Returns the discretised integral operator
"""
if ifix is None:
ifix = self.ifix
g = g.reshape(self.dim.R, self.dim.N).T # set g as g_{nr}, n=1..N, r=1..R
g = np.column_stack((np.ones(self.dim.N), g)) # append a col. of 1s to g
# matrices A[r] = sum(b_rd * Ld
struct_mats = np.array([sum(brd*Ld for brd, Ld in zip(br, self.basis_mats))
for br in beta])
# Add an axis for n=0,...,N-1
At = struct_mats[None, :, :, :] * g[..., None, None]
At = At.sum(1)
W = weight_matrix(self.ttc, ifix) #self._weight_matrix
K = sum(sparse.kron(sparse.kron(W[:, i][:, None],
sparse.eye(1, self.dim.N, i)),
At[i, ...])
for i in range(self.dim.N))
I = sparse.eye(self.dim.K)
# because x[ifix] is fixed by the integral transform
# we need to add a column of identity matrices to K
# - could do this by assignment
eifix = sparse.eye(1, self.dim.N, ifix)
K += sum(sparse.kron(sparse.kron(sparse.eye(1, self.dim.N, i).transpose(),
eifix),
I)
for i in range(self.dim.N))
return K
def _K2(self, g, beta, ifix=None):
""" K acting on vec(X) rather than ravel(X)
"""
K = self._K(g, beta, ifix)
T = util.T_xrav_tovecx(self.dim.N, self.dim.K)
Tt = T.transpose()
return T.dot(K.dot(Tt))
def get_weight_matrix(self, tt, i):
return weight_matrix(self.ttc, i)
def _K3(self, g, beta, ifix, printit=False):
W = weight_matrix(self.ttc, ifix)
g = g.reshape(self.dim.R, self.dim.N).T
g = np.column_stack((np.ones(self.dim.N), g))
# matrices A[r] = sum(b_rd * Ld
struct_mats = np.array([sum(brd*Ld for brd, Ld in zip(br, self.basis_mats))
for br in beta])
# Add an axis for n=0,...,N-1
At = struct_mats[None, :, :, :] * g[..., None, None]
At = At.sum(1)
eifix = np.eye(1, self.dim.N, ifix)
K = sum(np.kron(At[i,...],
np.kron(W[:, i][:, None], np.eye(1, self.dim.N, i)))
for i in range(self.dim.N))
I = np.eye(self.dim.K)
eifix = np.eye(1, self.dim.N, ifix)
K += sum(np.kron(I,
np.kron(np.eye(1, self.dim.N, i).T, eifix))
for i in range(self.dim.N))
return K
def sparse_vecK_aff_rep(self, beta):
"""Returns the vectorisation of the discretised integral
operator
"""
V0 = self._K(np.zeros(self.dim.N*self.dim.R), beta) # lazy way of finding K[0]
W = self._weight_matrix
# A[r] = sum_{brd} * Ld
struct_mats = np.array([sum(brd*Lrd for brd, Lrd in zip(br, self.basis_mats))
for br in beta])
# columns of dK.dot(g) + vec(V0) = vec(K[g, beta])
NK = self.dim.N*self.dim.K
K = self.dim.K
cols = []
for r in range(self.dim.R):
for n in range(self.dim.N):
v1 = sparse.csr_matrix((n*K*NK, 1)) # Padding of zeros either
v3 = sparse.csr_matrix(((NK-(n+1)*K)*NK,1)) # side of the vector
v2 = np.kron(W[:, n][:, None], struct_mats[r+1]).T.ravel()[:, None]
cols.append(sparse.vstack((v1, v2, v3)))
V = sparse.hstack(cols)
v0 = sparse.coo_matrix(V0.todense().T.ravel()).transpose() # ugly step
# convert V, v0 to sparse csr (quick for .dot( some_dense_arr )
return V.tocsr(), v0.tocsr()
@property
def Ndata(self):
if self.is_tt_aug:
return len(self.data_inds)
else:
return self.dim.N
class MLFMSAFitMixin:
"""
Utility mixin for initialising fit functions
"""
def _fit_kwarg_parser(self, **kwargs):
"""
Parses kwargs to determine which var. to be kept fixed during optimisation.
"""
# possibly makes these properties of a class
var_names = ['g', 'beta', 'logpsi', 'logphi', 'loggamma', 'logtau', 'logalpha']
# controls the default fixing of parameters
default = {'g': False, 'beta': True, 'logpsi': True,
'logphi': True, 'loggamma': False, 'logtau': True, 'logalpha': True}
# check **kwargs to see if any variables have been kept fixed
is_fixed_vars = [kwargs.pop("".join((vn, "_is_fixed")),
default[vn])
for vn in var_names]
return is_fixed_vars
def _fit_init(self, is_fixed_vars, **kwargs):
"""
Handles initialisation of optimisation
"""
init_strategies = {
'g': lambda : np.zeros(self.dim.N*self.dim.R),
'logpsi': lambda : np.concatenate([gp.kernel.theta
for gp in self.latentforces]),
'beta': lambda : np.row_stack((np.zeros(self.dim.D),
np.eye(self.dim.R, self.dim.D))).ravel(),
'logphi': lambda : np.concatenate([gp.kernel.theta
for gp in self.latentstates]),
'loggamma': lambda : np.log(1e-4*np.ones(self.dim.K)),
'logtau': lambda : np.log(1e4*np.ones(self.dim.K)),
'logalpha': lambda : np.zeros(self.dim.K),
}
var_names = ['g', 'beta', 'logpsi', 'logphi', 'loggamma', 'logtau', 'logalpha']
full_init = [kwargs.pop("".join((vn, '0')),
init_strategies[vn]())
for vn in var_names]
full_init_shape = [item.size for item in full_init]
free_vars, fixed_vars = ([], [])
for item, boolean in zip(full_init, is_fixed_vars):
if boolean:
fixed_vars.append(item)
else:
free_vars.append(item)
free_vars_shape = [item.size for item in free_vars]
return np.concatenate(free_vars), free_vars_shape, fixed_vars
def _fit_find_priors(self, **kwargs):
priors = {}
for vn in ['logpsi', 'beta', 'logtau', 'loggamma', 'logalpha']:
key = '_'.join((vn, 'prior'))
try:
priors[vn] = kwargs[key].logpdf
except KeyError:
pass
return priors
def _var_mixer(self, free_vars, free_vars_shape, fixed_vars, is_fixed_vars):
""" Utility function to mix the free vars and the fixed vars
"""
free_vars = util._unpack_vector(free_vars, free_vars_shape)
if is_fixed_vars is None:
return free_vars
else:
full_vars = []
ifree, ifixed = (0, 0)
for b in is_fixed_vars:
if b:
full_vars.append(fixed_vars[ifixed])
ifixed += 1
else:
full_vars.append(free_vars[ifree])
ifree += 1
return full_vars
class PredictMixin:
"""
Extends the capabilties to behave like the mutliout GP
"""
def predict_lf(self, tt, return_std=False):
Cff_ = block_diag(*(gp.kernel_(tt[:, None], self.X_train_)
for gp in self.latentforces))
lf_mean = Cff_.dot(self.g_alpha_)
lf_mean = lf_mean.reshape(self.dim.R, tt.size).T
if return_std:
A = Cff_.dot(cho_solve((self.g_L_, True), np.eye(self.dim.N*self.dim.R)))
K = A.dot(self.g_laplace_cov.dot(A.T))
Cp = []
for gp in self.latentforces:
C22 = gp.kernel_(tt[:, None])
C21 = gp.kernel_(tt[:, None], self.X_train_)
Cp.append(C22 - C21.dot(cho_solve((gp.L_, True), C21.T)))
K += block_diag(*Cp)
return lf_mean, np.sqrt(np.diag(K))
return lf_mean
[docs]class MLFMSuccApprox(PredictMixin, MLFMSAFitMixin, BaseMLFMSuccApprox):
[docs] def __init__(self, *args, **kwargs):
super(MLFMSuccApprox, self).__init__(*args, **kwargs)
[docs] def fit(self, times, Y, **kwargs):
""" Carries out MAP optimisation of the model parameters.
"""
def objfunc(arg, free_vars_shape, fixed_vars):
g, beta, logpsi, logphi, loggamma, logtau, logalpha = \
self._var_mixer(arg, free_vars_shape, fixed_vars, is_fixed_vars)
# reshape beta
beta = beta.reshape(self.dim.R+1, self.dim.D)
try:
ll, ll_g_grad, ll_lgam_grad, ll_lalf_grad = \
self.log_likelihood(Y, g, beta,
logphi, loggamma, logtau, logalpha,
eval_gradient=True)
lp, lp_g_grad = \
self.prior_logpdf(g, logpsi,
eval_gradient=True)
grad = [ll_g_grad + lp_g_grad,
None,
None,
None,
ll_lgam_grad,
None,
ll_lalf_grad]
# add contributions from various priors
for vn, indx in zip(['loggamma'],
[(4, loggamma),]):
try:
prior_logpdf = priors[vn]
ind, x = indx # unpack the value of var. name
vn_lp, vn_lp_grad = prior_logpdf(x, True)
lp += vn_lp
grad[ind] += vn_lp_grad
except KeyError:
pass
grad = np.concatenate([item
for item, b in zip(grad, is_fixed_vars)
if not b])
return -(ll+lp), -grad
except:
return np.inf, np.zeros(arg.shape)
# make sure the model is ready for fitting by calling _setup
self._setup(times, **kwargs)
# parse kwargs to see if any args to be kept fixed
is_fixed_vars = self._fit_kwarg_parser(**kwargs)
# Check for any priors
priors = self._fit_find_priors(**kwargs)
init, free_vars_shape, fixed_vars = \
self._fit_init(is_fixed_vars, **kwargs)
res = minimize(objfunc, init, jac=True,
args=(free_vars_shape, fixed_vars))
# unpack the results
g_, beta_, logpsi_, logphi_, loggamma_, logtau_, logalpha_ = \
self._var_mixer(res.x, free_vars_shape, fixed_vars, is_fixed_vars)
logpsi_ = util._unpack_vector(logpsi_,
util._get_gp_theta_shape(self.latentforces))
for r, gp in enumerate(self.latentforces):
gp.kernel_ = gp.kernel.clone_with_theta(logpsi_[r])
Cr = gp.kernel_(self.ttc[:, None])
Cr[np.diag_indices_from(Cr)] += 1e-5
gp.L_ = cholesky(Cr, True)
# store some variables for later use
self.X_train_ = self.ttc[:, None]
if not is_fixed_vars[0]:
# Optimised g - so get Laplace approx to from optim
self.g_laplace_cov = res.hess_inv[:self.dim.N*self.dim.R,
:self.dim.N*self.dim.R]
Cff = block_diag(*(gp.kernel_(self.X_train_)
for gp in self.latentforces))
Cff[np.diag_indices_from(Cff)] += 1e-4
self.g_L_ = cholesky(Cff, lower=True)
self.g_alpha_ = cho_solve((self.g_L_, True), g_)
return MLFMSuccApproxFitResults(g_.reshape(self.dim.R, self.dim.N),
beta_, logpsi_, logphi_, loggamma_, logtau_, logalpha_,
res)
[docs] def marginal_covar_matrix(self, g, beta, gamma, logphi, alpha, eval_gradient=False):
"""
Covariance matrix K^n C0 K^nT
"""
# reshape g
g = g.reshape(self.dim.R, self.dim.N).T # reshape
g = np.column_stack((np.ones(self.dim.N), g)) # and append a col. of 1s
# get the struct matrices
struct_mats = np.array([sum(brd*Ld for brd, Ld in zip(br, self.basis_mats))
for br in beta])
# Add an axis for n=0,...,N-1
At = struct_mats[None, :, :, :] * g[..., None, None]
At = At.sum(1)
# Get the additive noise error term
#Gamma = np.diag(np.concatenate([gk*np.ones(self.dim.N) for gk in gamma]))
Gamma = np.kron(np.eye(self.dim.N), np.diag(gamma))
# Get the initial state covariance matrix
C = np.kron(np.ones((self.dim.N, self.dim.N)),
np.diag(alpha))
####
# Construct the transformation matrix K
W = self._weight_matrix
K = sum(sparse.kron(sparse.kron(W[:, i][:, None],
sparse.eye(1, self.dim.N, i)),
At[i, ...])
for i in range(self.dim.N))
I = sparse.eye(self.dim.K)
eifix = sparse.eye(1, self.dim.N, self.ifix)
K += sum(sparse.kron(sparse.kron(sparse.eye(1, self.dim.N, i).transpose(),
eifix),
I)
for i in range(self.dim.N))
#_K = np.array(K.todense())
if eval_gradient:
dCdg = [np.zeros((self.dim.N*self.dim.K, self.dim.N*self.dim.K))]*self.dim.N*self.dim.R
#_dCdg = np.zeros((self.dim.N*self.dim.R,
# self.dim.N*self.dim.K,
# self.dim.N*self.dim.K))
# Gradient of the operator K wrt g_rn
dK = [sparse.kron(sparse.kron(W[:, n][:, None],
sparse.eye(1, self.dim.N, n)),
struct_mats[r+1])
for r in range(self.dim.R)
for n in range(self.dim.N)]
#_dK = np.array([np.kron(W[:, n][:, None], struct_mats[r+1])
# for r in range(self.dim.R)
# for n in range(self.dim.N)])
dCdgamma = np.zeros((self.dim.N*self.dim.K,
self.dim.N*self.dim.K,
self.dim.K))
dCdalpha = [np.kron(np.ones((self.dim.N, self.dim.N)),
np.diag(np.eye(N=1, M=self.dim.K, k=k).ravel()))
for k in range(self.dim.K)]
for m in range(self.order):
dCdg = np.array([K.dot(K.dot(dC_grn).T) for dC_grn in dCdg])
for i, dK_grn in enumerate(dK):
kcdk = K.dot(dK_grn.dot(C).T)
dCdg[i] += kcdk + kcdk.T
# update gradient of C wrt to g_{rn}
#CKt = K.dot(C).T
#print(K.shape, _dCdg.shape)
#expr = _K.dot(_dCdg)
#print(expr.shape)
#assert(False)
#expr = dK.dot(CKt.reshape(self.dim.N, self.dim.K, self.dim.N*self.dim.K)).sum(2)
#expr += expr.transpose(0, 2, 1)
#expr2 = K.dot(K.dot(dCdg).T)
#dCdg = expr + expr2
#dCdg = [K.dot(dK_grn.dot(C).T) + \
# dK_grn.dot(K.dot(C).T) + \
# K.dot(K.dot(dC_grn).T)
# for dK_grn, dC_grn in zip(dK, dCdg)]
# update gradient of C wrt to Gamma
dCdgamma = np.dstack((K.dot(K.dot(dCdgamma[..., i]).T)
for i in range(dCdgamma.shape[-1])))
diag_ek = np.zeros((self.dim.K, self.dim.K))
for k in range(self.dim.K):
diag_ek[k, k] = 1.
dCdgamma[..., k] += np.kron(np.eye(self.dim.N), diag_ek)
diag_ek[k, k] = 0.
# update gradient of C wrt to alpha
dCdalpha = [K.dot(K.dot(dCdak).T) for dCdak in dCdalpha]
C = K.dot(K.dot(C).T) + Gamma
#print("...done.")
dCdg = np.dstack(dCdg)
return (C,
dCdg,
dCdgamma,
np.dstack(dCdalpha))
else:
for m in range(self.order):
C = K.dot(K.dot(C).T) + Gamma
return C
[docs] def marginal_covar_matrix2(self, g, beta, gamma, logphi, alpha, eval_gradient=False):
"""
Rewrite to prevent some of the looping
"""
g = g.reshape(self.dim.R, self.dim.N).T # reshape
g = np.column_stack((np.ones(self.dim.N), g)) # and append a col. of 1s
# get the struct matrices
struct_mats = np.array([sum(brd*Ld for brd, Ld in zip(br, self.basis_mats))
for br in beta])
# Add an axis for n=0,...,N-1
At = struct_mats[None, :, :, :] * g[..., None, None]
At = At.sum(1)
# Get the additive noise error term
Gamma = np.kron(np.eye(self.dim.N), np.diag(gamma))
# Get the initial state covariance matrix
C = np.kron(np.ones((self.dim.N, self.dim.N)),
np.diag(alpha))
####
# Construct the transformation matrix K
W = self._weight_matrix
K = sum(sparse.kron(sparse.kron(W[:, i][:, None],
sparse.eye(1, self.dim.N, i)),
At[i, ...])
for i in range(self.dim.N))
I = sparse.eye(self.dim.K)
eifix = sparse.eye(1, self.dim.N, self.ifix)
K += sum(sparse.kron(sparse.kron(sparse.eye(1, self.dim.N, i).transpose(),
eifix),
I)
for i in range(self.dim.N))
if eval_gradient:
# dK[r, n, ...] = wn (x) Ar
dK = [sparse.kron(sparse.kron(W[:, n][:, None],
sparse.eye(1, self.dim.N, n)),
struct_mats[r+1])
for r in range(self.dim.R)
for n in range(self.dim.N)]
dCdg = np.zeros((self.dim.N*self.dim.R,
self.dim.N*self.dim.K,
self.dim.N*self.dim.K))
dCdalpha = [np.kron(np.ones((self.dim.N, self.dim.N)),
np.diag(np.eye(N=1, M=self.dim.K, k=k).ravel()))
for k in range(self.dim.K)]
K = K.todense()
Kt = np.array(K.transpose())
for m in range(self.order):
CKt = C.dot(Kt)
# Update gradient of C wrt to g_{rn}
for i, (dK_rn, dC_grn) in enumerate(zip(dK, dCdg)):
a = dK_rn.dot(CKt)
dCdg[i] = a + a.T + K.dot(dC_grn.dot(Kt))
# Update gradient of C wrt to alpha
dCdalpha = [K.dot(dCdak.dot(Kt))
for dCdak in dCdalpha]
# Finally update C
C = K.dot(CKt) + Gamma
return C, dCdg, np.stack(dCdalpha)
def data_cov(self, tau, logphi):
Kdat = np.diag((np.ones((self.Ndata, self.dim.K))*(1/tau)).ravel())
return Kdat
logphi = util._unpack_vector(logphi,
util._get_gp_theta_shape(self.latentstates))
if self.is_tt_aug:
tt = self._tt_data
else:
tt = self.ttc
datacov = [gp.kernel.clone_with_theta(theta)(tt[:, None])
for gp, theta in zip(self.latentstates, logphi)]
for item in datacov:
item /= tau[0]
datacov = block_diag(*datacov)
#for k, C in enumerate(datainvcov):
# C[np.diag_indices_from(C)] += self.latentstates[k].alpha
#datainvcov = [np.linalg.cholesky(C) for C in datainvcov]
#datainvcov = block_diag(*[cho_solve((L, True), np.eye(L.shape[0]))
#for L in datainvcov])
# this is the inverse of Cov{vec(X)}, we need
# Cov{rav(X)} = T Cov{vec(X)} Tt
# ---> InvCov{rav(X)} = Tt InvCov{vec(X)} T
T = util.T_xrav_tovecx(self.Ndata, self.dim.K)
#datainvcov = Tt.dot(Tt.dot(datainvcov).T)
#Kdat += T.dot(T.dot(datacov).T)
Kdat = datacov
return Kdat
def log_likelihood(self, y, g, beta,
logphi, loggamma, logtau, logalpha,
eval_gradient=False, **kwargs):
if eval_gradient:
K, K_g_grad, K_gamma_grad, K_alpha_grad = \
self.marginal_covar_matrix(g, beta, np.exp(loggamma), logphi, np.exp(logalpha), True)
else:
K = self.marginal_covar_matrix(g, beta, np.exp(loggamma), logphi, np.exp(logalpha))
if self.is_tt_aug:
data_inds = np.asarray(self.data_inds)
data_inds = (data_inds*self.dim.K)[:, None] + np.arange(self.dim.K)[None, :]
data_inds_ix_ = np.ix_(data_inds.ravel(), data_inds.ravel())
K = K[data_inds_ix_]
if eval_gradient:
K_g_grad = np.dstack([K_g_grad[..., i][data_inds_ix_]
for i in range(K_g_grad.shape[-1])])
K_gamma_grad = np.dstack([K_gamma_grad[..., i][data_inds_ix_]
for i in range(K_gamma_grad.shape[-1])])
K_alpha_grad = np.dstack([K_alpha_grad[..., i][data_inds_ix_]
for i in range(K_alpha_grad.shape[-1])])
# data covariance
tau = np.exp(logtau)
Kdat = self.data_cov(tau, logphi)
K += Kdat
try:
L = np.linalg.cholesky(K)
except np.linalg.LinAlgError:
return (-np.inf, np.zeros_like(g.size)) \
if eval_gradient else -np.inf
if len(y.shape) == 1:
y_train = y[:, None]
else:
y_train = y
alpha = cho_solve((L, True), y_train)
log_lik_dims = -.5 * np.einsum("ik, ik->k",y_train, alpha)
log_lik_dims -= np.log(np.diag(L)).sum()
log_lik_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
log_lik = log_lik_dims.sum(-1) # sum over output dimensions
if eval_gradient:
tmp = np.einsum("ik,jk->ijk", alpha, alpha)
tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, None]
log_likelihood_gradient_dims = \
0.5 * np.einsum("ijl,ijk->kl", tmp, K_g_grad)
log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)
log_likelihood_gamma_gradient_dims = \
0.5 * np.einsum("ijl,ijk->kl", tmp, K_gamma_grad)
log_likelihood_loggamma_gradient = log_likelihood_gamma_gradient_dims.sum(-1)
# Gamma parameters is on the log scale
log_likelihood_loggamma_gradient = \
log_likelihood_loggamma_gradient * np.exp(loggamma)
log_likelihood_alpha_gradient_dims = \
0.5 * np.einsum('ijl,ijk->kl', tmp, K_alpha_grad)
log_likelihood_logalpha_gradient = log_likelihood_alpha_gradient_dims.sum(-1)
# alpha parameters are on the log scale
log_likelihood_logalpha_gradient = \
log_likelihood_logalpha_gradient * np.exp(logalpha)
return (log_lik,
log_likelihood_gradient,
log_likelihood_loggamma_gradient,
log_likelihood_logalpha_gradient)
else:
return log_lik
[docs] def prior_logpdf(self, g, logpsi, eval_gradient):
"""Logpdf of prior
"""
veclogpsi = logpsi.copy()
logpsi_shape = util._get_gp_theta_shape(self.latentforces)
logpsi = util._unpack_vector(logpsi, logpsi_shape)
# reshapge g
g = g.reshape(self.dim.R, self.dim.N)
ll = 0.
if eval_gradient:
ll_g_grad, ll_logpsi_grad = ([], [])
for r in range(self.dim.R):
gp = self.latentforces[r]
kern = gp.kernel.clone_with_theta(logpsi[r])
if eval_gradient:
K, K_gradient = kern(self.ttc[:, None],
eval_gradient=True)
else:
K = kern(self.ttc[:, None])
K[np.diag_indices_from(K)] += gp.alpha
L = np.linalg.cholesky(K)
alpha = cho_solve((L, True), g[r, :])
tmp = -.5 * g[r, :].dot(alpha)
tmp -= np.log(np.diag(L)).sum()
ll += -.5 * g[r, :].dot(alpha) - \
np.log(np.diag(L)).sum() - \
L.shape[0] / 2 * np.log(2 * np.pi)
if eval_gradient:
ll_g_grad.append(-alpha)
if eval_gradient:
return ll, \
np.concatenate(ll_g_grad)
return ll
class VarMLFMSuccApprox(BaseMLFMSuccApprox):
def varfit(self, times, Y, gtol=1e-3, max_iter=100, **kwargs):
try:
mapres = kwargs.pop('mapres')
self._setup(times, **kwargs)
except KeyError:
raise NotImplementedError("varfit currently requires the results of a MAP fit")
# initalise the distribution of g at the MAP
Eg = mapres.g.ravel()
Covg = np.diag([0.1]*Eg.size)
Eb = mapres.beta.ravel()
Deltag = np.inf
# initalise the distribution of X
# X is of shape[N*K, order+1, n_samples]
Ex, Covx = self._init_x_distrib(times, Y)
# Burn in the distribution of Ex, Covx around g map
for i in range(5):
Ex, Covx = self.x_update_moments(Y, Ex, Covx, Eg, Covg,
mapres.beta, np.exp(mapres.loggamma),
mapres.logphi, mapres.logtau)
nt = 0 # Count number of iterations
while nt < 10:
Eg, Covg = self.g_update_moments(Ex, Covx,
mapres.beta, np.exp(mapres.loggamma),
mapres.logpsi)
Ex, Covx = self.x_update_moments(Y,
Ex, Covx,
Eg, Covg,
mapres.beta, np.exp(mapres.loggamma),
mapres.logphi, mapres.logtau)
nt += 1
return Eg, Covg
def _init_x_distrib(self, times, Y):
from scipy.interpolate import interp1d
if self.is_tt_aug:
# cheap interpolation of the data to the latent variable times
Ex = np.zeros((self.dim.N*self.dim.K, self.order+1, Y.shape[1]))
for m, Ym in enumerate(Y.T):
Ym = Ym.reshape(self.Ndata, self.dim.K)
u = [interp1d(times, ymk,
kind='cubic',
fill_value='extrapolate')
for ymk in Ym.T]
Yinterp = np.column_stack([uk(self.ttc)
for uk in u]).ravel()
Ex[:, :, m] = np.column_stack((Yinterp
for p in range(self.order+1)))
NK = self.dim.N*self.dim.K
Covx = np.dstack([np.diag(0.001*np.ones(NK))
for p in range(min(self.order, 3))])
return Ex, Covx
def g_update_moments(self, EX, CovX, beta, gamma, logpsi):
# dimensionality variables
n_outputs = EX.shape[-1]
NK = self.dim.N*self.dim.K
# struct mats
struct_mats = np.array([sum(brd*Ld for brd, Ld in zip(br, self.basis_mats))
for br in beta])
# vec(K) = v0 + V.dot(g)
V, v0 = self.sparse_vecK_aff_rep(beta)
V = V.todense()
v0 = v0.todense()
GammaInv = np.diag(np.concatenate([(1/gk)*np.ones(self.dim.N)
for gk in gamma]))
# array of outer products
EXXT = np.einsum('ikl,jkl->ijkl', EX[...,:-1,:], EX[...,:-1,:])
# ... adjust with covariance matrices
EXXT[..., 0, :] += CovX[..., 0][..., None]
if EXXT.shape[1] > 2:
EXXT[..., 1:-1, :] += CovX[..., 1][..., None, None]
EXXT[..., -1, :] += CovX[..., 2][..., None]
else:
EXXT[..., -1, :] += CovX[..., 1]
S = np.kron(EXXT.sum(2), GammaInv[..., None])
## E outer(Xm, X_{m+1})
# - we use the fact cov{X_m, X_{m+1}} = 0 under chosen mean field factorisation
EXXpT = np.einsum('ikl,jkl->ijkl', EX[..., 1:, :], EX[..., :-1, :]).sum(2)
# b = vec(sum([Gamma.dot(outer(xm, xm-1)) ]))
b = np.einsum('ij,jkl->ikl', GammaInv, EXXpT)
b = b.transpose(1, 0, 2).reshape(NK*NK, n_outputs)
premean = np.einsum('ijl,jk->ikl', S, v0)
premean = b[:, None, :] - premean
premean = np.einsum('ij,jkl->ikl', V.T, premean)
invcov = np.einsum('ijl,jk->ikl', S, V)
invcov = np.einsum('ij,jkl->ikl', V.T, invcov)
invcov = invcov.sum(-1)
# get the contribution from the prior
logpsi = util._unpack_vector(logpsi,
util._get_gp_theta_shape(self.latentforces))
for r, theta in enumerate(logpsi):
kern = self.latentforces[r].kernel.clone_with_theta(theta)
K = kern(self.ttc[:, None])
K[np.diag_indices_from(K)] += self.latentforces[r].alpha
L = np.linalg.cholesky(K)
invcov[r*self.dim.N:(r+1)*self.dim.N,
r*self.dim.N:(r+1)*self.dim.N] += \
cho_solve((L, True), np.eye(L.shape[0]))
L = np.linalg.cholesky(invcov)
cov = cho_solve((L, True), np.eye(L.shape[0]))
mean = cov.dot(premean.sum(-1))
#mean = np.einsum('ij,jkl->ikl', cov, premean.sum(-1))[:, 0].sum(-1)
return mean.ravel(), cov
def x_update_moments(self, Y, EX, CovX, Eg, Covg,
beta, gamma, logphi, logtau):
# Get the additive noise error term
GammaInv = np.diag(
np.column_stack([1/gk*np.ones(self.dim.N)
for gk in gamma]).ravel())
# x0 cov variance matrix
logphi = util._unpack_vector(logphi,
util._get_gp_theta_shape(self.latentstates))
C0inv = [gp.kernel.clone_with_theta(theta)(self.ttc[:, None])
for gp, theta in zip(self.latentstates, logphi)]
for k, C in enumerate(C0inv):
C[np.diag_indices_from(C)] += self.latentstates[k].alpha
C0inv = [np.linalg.cholesky(C) for C in C0inv]
C0inv = block_diag(*[cho_solve((L, True), np.eye(L.shape[0]))
for L in C0inv])
# this is the inverse of Cov{vec(X)}, we need
# Cov{rav(X)} = T Cov{vec(X)} Tt
# ---> InvCov{rav(X)} = Tt InvCov{vec(X)} T
Tt = util.T_xrav_tovecx(self.dim.N, self.dim.K).transpose()
C0inv = Tt.dot(Tt.dot(C0inv).T)
## Takes the expectation wrt to distribution of G
EK = self._K(Eg, beta)
EKt = EK.transpose()
EKtLK = self._EKtLK(Eg, Covg, beta, GammaInv)
######
# q(X0)
ic0 = C0inv + EKtLK
pm0 = EKt.dot(GammaInv.dot(EX[:, 1, :]))
L0 = np.linalg.cholesky(ic0)
cov0 = cho_solve((L0, True), np.eye(L0.shape[0]))
EX[:, 0, :] = cov0.dot(pm0)
CovX[..., 0] = cov0
# Common cov
cov = GammaInv + EKtLK
L = np.linalg.cholesky(cov)
cov = cho_solve((L, True), np.eye(L.shape[0]))
CovX[..., 1] = cov
for m in range(self.order-1):
pm = EKt.dot(GammaInv.dot(EX[:, m+2, :])) + \
GammaInv.dot(EK.dot(EX[:, m, :]))
EX[:, m+1, :] = cov.dot(pm)
datainvcov = np.diag(
np.column_stack([tau*np.ones(self.Ndata)
for tau in np.exp(logtau)]).ravel())
datainvcov = [gp.kernel.clone_with_theta(theta)(self._tt_data[:, None])
for gp, theta in zip(self.latentstates, logphi)]
for k, C in enumerate(datainvcov):
C[np.diag_indices_from(C)] += self.latentstates[k].alpha
datainvcov = [np.linalg.cholesky(C) for C in datainvcov]
datainvcov = block_diag(*[cho_solve((L, True), np.eye(L.shape[0]))
for L in datainvcov])
# this is the inverse of Cov{vec(X)}, we need
# Cov{rav(X)} = T Cov{vec(X)} Tt
# ---> InvCov{rav(X)} = Tt InvCov{vec(X)} T
Tt = util.T_xrav_tovecx(self.Ndata, self.dim.K).transpose()
datainvcov = Tt.dot(Tt.dot(datainvcov).T)
invcov = GammaInv
if self.is_tt_aug:
data_inds = np.asarray(self.data_inds)
data_inds = (data_inds*self.dim.K)[:, None] + \
np.arange(self.dim.K)[None, :]
data_inds_ix_ = np.ix_(data_inds.ravel(), data_inds.ravel())
invcov[data_inds_ix_] += datainvcov
pm = GammaInv.dot(EK.dot(EX[:, -2, :]))
pm[data_inds.ravel(), :] += datainvcov.dot(Y)
else:
invcov += datainvcov
pm = datainvcov.dot(Y) + GammaInv.dot(EK.dot(EX[:, -2, :]))
cov = np.linalg.inv(invcov) #np.diag(1 / np.diag(invcov))
EX[:, -1, :] = cov.dot(pm)
CovX[..., -1] = cov
return EX, CovX
def _EKtLK(self, Eg, Covg, beta, L):
"""
Computes the expected value of the matrix valued quadratic form.
"""
EggT = np.outer(Eg, Eg) + Covg
# vector v0 and matrix V with vec(K) = v0 + V.dot(g)
V, v0 = self.sparse_vecK_aff_rep(beta)
EVg = V.dot(Eg[:, None]) # E[ V.dot(g) ] = V.dot(Eg)
vEVg = v0.dot(EVg.T) # E[ outer(v, Vg) ] = outer(v, V.dot(Eg))
# E[outer(vec(K), vec(K)]
E_vecK_outer = v0.dot(v0.transpose()) + \
vEVg + vEVg.T + \
V.dot(V.dot(EggT).T)
# reshape so that [n, m, ...] = E[outer(K[:, n], K[:, m])]
NK = self.dim.N * self.dim.K
# numpy matrix to array
E_vecK_outer = np.asarray(E_vecK_outer)
E_vecK_outer = E_vecK_outer.reshape(NK, NK, NK, NK).transpose(0, 2, 1, 3)
# reshape so that [n, m, ...] = vec(E[outer(K[:, m], K[:, m])])
E_vecK_outer = E_vecK_outer.transpose(0, 1, 3, 2).reshape(NK, NK, NK**2)
# final results is [n, m] = Tr(E_vecK_outer[n, m, ...].dot(L))
# i) vectorise L
# ii) multiply and sum for dot product
return (E_vecK_outer * L.T.ravel()[None, None, :]).sum(-1)
"""
Methods for trapezoidal integration
"""
def get_weights(tt):
tt = np.asarray(tt)
W = np.zeros((tt.size, tt.size))
h = np.diff(tt)
for i in range(tt.size):
W[i, :i] += .5*h[:i]
W[i, 1:i+1] += .5*h[:i]
return W
def weight_matrix(tt, ifix):
ttb = tt[:ifix+1]
ttf = tt[ifix:]
Wb = get_weights([t for t in reversed(ttb)])
Wb = np.flip(Wb, (0, 1))
Wf = get_weights(ttf)
W = np.zeros((tt.size, tt.size))
W[:ifix+1, :ifix+1] += Wb
W[ifix:, ifix:] += Wf
return W