Source code for botorch.posteriors.gpytorch

#! /usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

r"""
Posterior Module to be used with GPyTorch models.
"""

from __future__ import annotations

from typing import Optional

import gpytorch
import torch
from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
from gpytorch.lazy import BlockDiagLazyTensor, LazyTensor, SumLazyTensor
from torch import Tensor

from ..exceptions.errors import BotorchTensorDimensionError
from .posterior import Posterior


[docs]class GPyTorchPosterior(Posterior): r"""A posterior based on GPyTorch's multi-variate Normal distributions.""" def __init__(self, mvn: MultivariateNormal) -> None: r"""A posterior based on GPyTorch's multi-variate Normal distributions. Args: mvn: A GPyTorch MultivariateNormal (single-output case) or MultitaskMultivariateNormal (multi-output case). """ self.mvn = mvn self._is_mt = isinstance(mvn, MultitaskMultivariateNormal) @property def device(self) -> torch.device: r"""The torch device of the posterior.""" return self.mvn.loc.device @property def dtype(self) -> torch.dtype: r"""The torch dtype of the posterior.""" return self.mvn.loc.dtype @property def event_shape(self) -> torch.Size: r"""The event shape (i.e. the shape of a single sample) of the posterior.""" shape = self.mvn.batch_shape + self.mvn.event_shape if not self._is_mt: shape += torch.Size([1]) return shape
[docs] def rsample( self, sample_shape: Optional[torch.Size] = None, base_samples: Optional[Tensor] = None, ) -> Tensor: r"""Sample from the posterior (with gradients). Args: sample_shape: A `torch.Size` object specifying the sample shape. To draw `n` samples, set to `torch.Size([n])`. To draw `b` batches of `n` samples each, set to `torch.Size([b, n])`. base_samples: An (optional) Tensor of `N(0, I)` base samples of appropriate dimension, typically obtained from a `Sampler`. This is used for deterministic optimization. Returns: A `sample_shape x event_shape`-dim Tensor of samples from the posterior. """ if sample_shape is None: sample_shape = torch.Size([1]) if base_samples is not None: if base_samples.shape[: len(sample_shape)] != sample_shape: raise RuntimeError("sample_shape disagrees with shape of base_samples.") # get base_samples to the correct shape base_samples = base_samples.expand(sample_shape + self.event_shape) # remove output dimension in single output case if not self._is_mt: base_samples = base_samples.squeeze(-1) with gpytorch.settings.fast_computations(covar_root_decomposition=False): samples = self.mvn.rsample( sample_shape=sample_shape, base_samples=base_samples ) # make sure there always is an output dimension if not self._is_mt: samples = samples.unsqueeze(-1) return samples
@property def mean(self) -> Tensor: r"""The posterior mean.""" mean = self.mvn.mean if not self._is_mt: mean = mean.unsqueeze(-1) return mean @property def variance(self) -> Tensor: r"""The posterior variance.""" variance = self.mvn.variance if not self._is_mt: variance = variance.unsqueeze(-1) return variance
[docs]def scalarize_posterior( posterior: GPyTorchPosterior, weights: Tensor, offset: float = 0.0 ) -> GPyTorchPosterior: r"""Affine transformation of a multi-output posterior. Args: posterior: The posterior over `m` outcomes to be scalarized. Supports `t`-batching. weights: A tensor of weights of size `m`. offset: The offset of the affine transformation. Returns: The transformed (single-output) posterior. If the input posterior has mean `mu` and covariance matrix `Sigma`, this posterior has mean `weights^T * mu` and variance `weights^T Sigma w`. Example: Example for a model with two outcomes: >>> X = torch.rand(1, 2) >>> posterior = model.posterior(X) >>> weights = torch.tensor([0.5, 0.25]) >>> new_posterior = scalarize_posterior(posterior, weights=weights) """ if weights.ndim > 1: raise BotorchTensorDimensionError("`weights` must be one-dimensional") mean = posterior.mean q, m = mean.shape[-2:] batch_shape = mean.shape[:-2] if m != weights.size(0): raise RuntimeError("Output shape not equal to that of weights") mvn = posterior.mvn cov = mvn.lazy_covariance_matrix if mvn.islazy else mvn.covariance_matrix if m == 1: # just scaling, no scalarization necessary new_mean = offset + (weights[0] * mean).view(*batch_shape, q) new_cov = weights[0] ** 2 * cov new_mvn = MultivariateNormal(new_mean, new_cov) return GPyTorchPosterior(new_mvn) new_mean = offset + (mean @ weights).view(*batch_shape, q) if q == 1: new_cov = ((cov @ weights) @ weights).view(*batch_shape, q, q) else: # we need to handle potentially different representations of the multi-task mvn if mvn._interleaved: w_cov = weights.repeat(q).unsqueeze(0) sum_shape = batch_shape + torch.Size([q, m, q, m]) sum_dims = (-1, -2) else: # special-case the independent setting if isinstance(cov, BlockDiagLazyTensor): new_cov = SumLazyTensor( *[ cov.base_lazy_tensor[..., i, :, :] * weights[i].pow(2) for i in range(cov.base_lazy_tensor.size(-3)) ] ) new_mvn = MultivariateNormal(new_mean, new_cov) return GPyTorchPosterior(new_mvn) w_cov = torch.repeat_interleave(weights, q).unsqueeze(0) sum_shape = batch_shape + torch.Size([m, q, m, q]) sum_dims = (-2, -3) cov_scaled = w_cov * cov * w_cov.transpose(-1, -2) # TODO: Do not instantiate full covariance for lazy tensors (ideally we simplify # this in GPyTorch: https://github.com/cornellius-gp/gpytorch/issues/1055) if isinstance(cov_scaled, LazyTensor): cov_scaled = cov_scaled.evaluate() new_cov = cov_scaled.view(sum_shape).sum(dim=sum_dims[0]).sum(dim=sum_dims[1]) new_mvn = MultivariateNormal(new_mean, new_cov) return GPyTorchPosterior(new_mvn)