Source code for botorch.models.gpytorch

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

r"""
Abstract model class for all GPyTorch-based botorch models.

To implement your own, simply inherit from both the provided classes and a
GPyTorch Model class such as an ExactGP.
"""

from __future__ import annotations

import itertools
import warnings
from abc import ABC
from copy import deepcopy
from typing import Any, TYPE_CHECKING

import torch
from botorch.acquisition.objective import PosteriorTransform
from botorch.exceptions.errors import (
    BotorchTensorDimensionError,
    InputDataError,
    UnsupportedError,
)
from botorch.exceptions.warnings import (
    _get_single_precision_warning,
    BotorchTensorDimensionWarning,
    InputDataWarning,
)
from botorch.models.model import Model, ModelList
from botorch.models.utils import (
    _make_X_full,
    add_output_dim,
    gpt_posterior_settings,
    mod_batch_shape,
    multioutput_to_batch_mode_transform,
)
from botorch.models.utils.assorted import fantasize as fantasize_flag
from botorch.posteriors.fully_bayesian import GaussianMixturePosterior
from botorch.posteriors.gpytorch import GPyTorchPosterior
from botorch.utils.multitask import separate_mtmvn
from botorch.utils.transforms import is_ensemble
from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
from linear_operator.operators import BlockDiagLinearOperator, CatLinearOperator
from torch import Tensor

if TYPE_CHECKING:
    from botorch.posteriors.posterior_list import PosteriorList  # pragma: no cover
    from botorch.posteriors.transformed import TransformedPosterior  # pragma: no cover
    from gpytorch.likelihoods import Likelihood  # pragma: no cover


[docs] class GPyTorchModel(Model, ABC): r"""Abstract base class for models based on GPyTorch models. The easiest way to use this is to subclass a model from a GPyTorch model class (e.g. an `ExactGP`) and this `GPyTorchModel`. See e.g. `SingleTaskGP`. """ likelihood: Likelihood @staticmethod def _validate_tensor_args( X: Tensor, Y: Tensor, Yvar: Tensor | None = None, strict: bool = True ) -> None: r"""Checks that `Y` and `Yvar` have an explicit output dimension if strict. Checks that the dtypes of the inputs match, and warns if using float. This also checks that `Yvar` has the same trailing dimensions as `Y`. Note we only infer that an explicit output dimension exists when `X` and `Y` have the same `batch_shape`. Args: X: A `batch_shape x n x d`-dim Tensor, where `d` is the dimension of the feature space, `n` is the number of points per batch, and `batch_shape` is the batch shape (potentially empty). Y: A `batch_shape' x n x m`-dim Tensor, where `m` is the number of model outputs, `n'` is the number of points per batch, and `batch_shape'` is the batch shape of the observations. Yvar: A `batch_shape' x n x m` tensor of observed measurement noise. Note: this will be None when using a model that infers the noise level (e.g. a `SingleTaskGP`). strict: A boolean indicating whether to check that `Y` and `Yvar` have an explicit output dimension. """ if X.dim() != Y.dim(): if (X.dim() - Y.dim() == 1) and (X.shape[:-1] == Y.shape): message = ( "An explicit output dimension is required for targets." f" Expected Y with dimension {X.dim()} (got {Y.dim()=})." ) else: message = ( "Expected X and Y to have the same number of dimensions" f" (got X with dimension {X.dim()} and Y with dimension" f" {Y.dim()})." ) if strict: raise BotorchTensorDimensionError(message) else: warnings.warn( "Non-strict enforcement of botorch tensor conventions. The " "following error would have been raised with strict enforcement: " f"{message}", BotorchTensorDimensionWarning, stacklevel=2, ) # Yvar may not have the same batch dimensions, but the trailing dimensions # of Yvar should be the same as the trailing dimensions of Y. if Yvar is not None and Y.shape[-(Yvar.dim()) :] != Yvar.shape: raise BotorchTensorDimensionError( "An explicit output dimension is required for observation noise." f" Expected Yvar with shape: {Y.shape[-Yvar.dim() :]} (got" f" {Yvar.shape})." ) # Check the dtypes. if X.dtype != Y.dtype or (Yvar is not None and Y.dtype != Yvar.dtype): raise InputDataError( "Expected all inputs to share the same dtype. Got " f"{X.dtype} for X, {Y.dtype} for Y, and " f"{Yvar.dtype if Yvar is not None else None} for Yvar." ) if X.dtype != torch.float64: warnings.warn( _get_single_precision_warning(str(X.dtype)), InputDataWarning, stacklevel=3, # Warn at model constructor call. ) @property def batch_shape(self) -> torch.Size: r"""The batch shape of the model. This is a batch shape from an I/O perspective, independent of the internal representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel). For a model with `m` outputs, a `test_batch_shape x q x d`-shaped input `X` to the `posterior` method returns a Posterior object over an output of shape `broadcast(test_batch_shape, model.batch_shape) x q x m`. """ return self.train_inputs[0].shape[:-2] @property def num_outputs(self) -> int: r"""The number of outputs of the model.""" return self._num_outputs # pyre-fixme[14]: Inconsistent override. # `botorch.models.gpytorch.GPyTorchModel.posterior` overrides method defined # in `Model` inconsistently. Could not find parameter `output_indices` in # overriding signature.
[docs] def posterior( self, X: Tensor, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, **kwargs: Any, ) -> GPyTorchPosterior | TransformedPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q`). It is assumed to be in the outcome-transformed space if an outcome transform is used. posterior_transform: An optional PosteriorTransform. Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X = self.transform_inputs(X) with gpt_posterior_settings(): # NOTE: BoTorch's GPyTorchModels also inherit from GPyTorch's ExactGP, thus # self(X) calls GPyTorch's ExactGP's __call__, which computes the posterior, # rather than e.g. SingleTaskGP's forward, which computes the prior. mvn = self(X) if observation_noise is not False: if isinstance(observation_noise, torch.Tensor): # TODO: Make sure observation noise is transformed correctly self._validate_tensor_args(X=X, Y=observation_noise) if observation_noise.size(-1) == 1: observation_noise = observation_noise.squeeze(-1) mvn = self.likelihood(mvn, X, noise=observation_noise) else: mvn = self.likelihood(mvn, X) posterior = GPyTorchPosterior(distribution=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) if posterior_transform is not None: return posterior_transform(posterior) return posterior
[docs] def condition_on_observations( self, X: Tensor, Y: Tensor, noise: Tensor | None = None, **kwargs: Any ) -> Model: r"""Condition the model on new observations. Args: X: A `batch_shape x n' x d`-dim Tensor, where `d` is the dimension of the feature space, `n'` is the number of points per batch, and `batch_shape` is the batch shape (must be compatible with the batch shape of the model). Y: A `batch_shape' x n x m`-dim Tensor, where `m` is the number of model outputs, `n'` is the number of points per batch, and `batch_shape'` is the batch shape of the observations. `batch_shape'` must be broadcastable to `batch_shape` using standard broadcasting semantics. If `Y` has fewer batch dimensions than `X`, its is assumed that the missing batch dimensions are the same for all `Y`. noise: If not `None`, a tensor of the same shape as `Y` representing the associated noise variance. kwargs: Passed to `self.get_fantasy_model`. Returns: A `Model` object of the same type, representing the original model conditioned on the new observations `(X, Y)` (and possibly noise observations passed in via kwargs). Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X[:, 0]) + torch.cos(train_X[:, 1]) >>> model = SingleTaskGP(train_X, train_Y) >>> new_X = torch.rand(5, 2) >>> new_Y = torch.sin(new_X[:, 0]) + torch.cos(new_X[:, 1]) >>> model = model.condition_on_observations(X=new_X, Y=new_Y) """ Yvar = noise if hasattr(self, "outcome_transform"): # pass the transformed data to get_fantasy_model below # (unless we've already trasnformed if BatchedMultiOutputGPyTorchModel) if not isinstance(self, BatchedMultiOutputGPyTorchModel): # `noise` is assumed to already be outcome-transformed. Y, _ = self.outcome_transform(Y=Y, Yvar=Yvar) # Validate using strict=False, since we cannot tell if Y has an explicit # output dimension. Do not check shapes when fantasizing as they are # not expected to match. if fantasize_flag.off(): self._validate_tensor_args(X=X, Y=Y, Yvar=Yvar, strict=False) if Y.size(-1) == 1: Y = Y.squeeze(-1) if Yvar is not None: kwargs.update({"noise": Yvar.squeeze(-1)}) # get_fantasy_model will properly copy any existing outcome transforms # (since it deepcopies the original model) return self.get_fantasy_model(inputs=X, targets=Y, **kwargs)
# pyre-fixme[13]: uninitialized attributes _num_outputs, _input_batch_shape, # _aug_batch_shape
[docs] class BatchedMultiOutputGPyTorchModel(GPyTorchModel): r"""Base class for batched multi-output GPyTorch models with independent outputs. This model should be used when the same training data is used for all outputs. Outputs are modeled independently by using a different batch for each output. """ _num_outputs: int _input_batch_shape: torch.Size _aug_batch_shape: torch.Size
[docs] @staticmethod def get_batch_dimensions( train_X: Tensor, train_Y: Tensor ) -> tuple[torch.Size, torch.Size]: r"""Get the raw batch shape and output-augmented batch shape of the inputs. Args: train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. train_Y: A `n x m` or `batch_shape x n x m` (batch mode) tensor of training observations. Returns: 2-element tuple containing - The `input_batch_shape` - The output-augmented batch shape: `input_batch_shape x (m)` """ input_batch_shape = train_X.shape[:-2] aug_batch_shape = input_batch_shape num_outputs = train_Y.shape[-1] if num_outputs > 1: aug_batch_shape += torch.Size([num_outputs]) return input_batch_shape, aug_batch_shape
def _set_dimensions(self, train_X: Tensor, train_Y: Tensor) -> None: r"""Store the number of outputs and the batch shape. Args: train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. train_Y: A `n x m` or `batch_shape x n x m` (batch mode) tensor of training observations. """ self._num_outputs = train_Y.shape[-1] self._input_batch_shape, self._aug_batch_shape = self.get_batch_dimensions( train_X=train_X, train_Y=train_Y ) @property def batch_shape(self) -> torch.Size: r"""The batch shape of the model. This is a batch shape from an I/O perspective, independent of the internal representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel). For a model with `m` outputs, a `test_batch_shape x q x d`-shaped input `X` to the `posterior` method returns a Posterior object over an output of shape `broadcast(test_batch_shape, model.batch_shape) x q x m`. """ return self._input_batch_shape def _transform_tensor_args( self, X: Tensor, Y: Tensor, Yvar: Tensor | None = None ) -> tuple[Tensor, Tensor, Tensor | None]: r"""Transforms tensor arguments: for single output models, the output dimension is squeezed and for multi-output models, the output dimension is transformed into the left-most batch dimension. Args: X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. Y: A `n x m` or `batch_shape x n x m` (batch mode) tensor of training observations. Yvar: A `n x m` or `batch_shape x n x m` (batch mode) tensor of observed measurement noise. Note: this will be None when using a model that infers the noise level (e.g. a `SingleTaskGP`). Returns: 3-element tuple containing - A `input_batch_shape x (m) x n x d` tensor of training features. - A `target_batch_shape x (m) x n` tensor of training observations. - A `target_batch_shape x (m) x n` tensor observed measurement noise (or None). """ if self._num_outputs > 1: return multioutput_to_batch_mode_transform( train_X=X, train_Y=Y, train_Yvar=Yvar, num_outputs=self._num_outputs ) return X, Y.squeeze(-1), None if Yvar is None else Yvar.squeeze(-1) def _apply_noise( self, X: Tensor, mvn: MultivariateNormal, observation_noise: bool | Tensor = False, ) -> MultivariateNormal: """Adds the observation noise to the posterior. Args: X: A tensor of shape `batch_shape x q x d`. mvn: A `MultivariateNormal` object representing the posterior over the true latent function. num_outputs: The number of outputs of the model. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q x m`). Returns: The posterior predictive. """ if observation_noise is False: return mvn # noise_shape is `broadcast(test_batch_shape, model.batch_shape) x m x q` noise_shape = mvn.batch_shape + mvn.event_shape if torch.is_tensor(observation_noise): # TODO: Validate noise shape # make observation_noise's shape match noise_shape if self.num_outputs > 1: obs_noise = observation_noise.transpose(-1, -2) else: obs_noise = observation_noise.squeeze(-1) mvn = self.likelihood( mvn, X, noise=obs_noise.expand(noise_shape), ) elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). observation_noise = self.likelihood.noise.mean(dim=-1, keepdim=True) mvn = self.likelihood( mvn, X, noise=observation_noise.expand(noise_shape), ) else: mvn = self.likelihood(mvn, X) return mvn
[docs] def posterior( self, X: Tensor, output_indices: list[int] | None = None, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, ) -> GPyTorchPosterior | TransformedPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape `(batch_shape) x q x m`). posterior_transform: An optional PosteriorTransform. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X = self.transform_inputs(X) with gpt_posterior_settings(): # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) # NOTE: BoTorch's GPyTorchModels also inherit from GPyTorch's ExactGP, thus # self(X) calls GPyTorch's ExactGP's __call__, which computes the posterior, # rather than e.g. SingleTaskGP's forward, which computes the prior. mvn = self(X) mvn = self._apply_noise(X=X, mvn=mvn, observation_noise=observation_noise) if self._num_outputs > 1: if torch.jit.is_tracing(): mvn = MultitaskMultivariateNormal.from_batch_mvn( mvn, task_dim=output_dim_idx ) else: mean_x = mvn.mean covar_x = mvn.lazy_covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), covar_x[(slice(None),) * output_dim_idx + (t,)], ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) posterior = GPyTorchPosterior(distribution=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) if posterior_transform is not None: return posterior_transform(posterior) return posterior
[docs] def condition_on_observations( self, X: Tensor, Y: Tensor, **kwargs: Any ) -> BatchedMultiOutputGPyTorchModel: r"""Condition the model on new observations. Args: X: A `batch_shape x n' x d`-dim Tensor, where `d` is the dimension of the feature space, `m` is the number of points per batch, and `batch_shape` is the batch shape (must be compatible with the batch shape of the model). Y: A `batch_shape' x n' x m`-dim Tensor, where `m` is the number of model outputs, `n'` is the number of points per batch, and `batch_shape'` is the batch shape of the observations. `batch_shape'` must be broadcastable to `batch_shape` using standard broadcasting semantics. If `Y` has fewer batch dimensions than `X`, its is assumed that the missing batch dimensions are the same for all `Y`. Returns: A `BatchedMultiOutputGPyTorchModel` object of the same type with `n + n'` training examples, representing the original model conditioned on the new observations `(X, Y)` (and possibly noise observations passed in via kwargs). Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.cat( >>> [torch.sin(train_X[:, 0]), torch.cos(train_X[:, 1])], -1 >>> ) >>> model = SingleTaskGP(train_X, train_Y) >>> new_X = torch.rand(5, 2) >>> new_Y = torch.cat([torch.sin(new_X[:, 0]), torch.cos(new_X[:, 1])], -1) >>> model = model.condition_on_observations(X=new_X, Y=new_Y) """ noise = kwargs.get("noise") if hasattr(self, "outcome_transform"): # We need to apply transforms before shifting batch indices around. # `noise` is assumed to already be outcome-transformed. Y, _ = self.outcome_transform(Y) # Do not check shapes when fantasizing as they are not expected to match. if fantasize_flag.off(): self._validate_tensor_args(X=X, Y=Y, Yvar=noise, strict=False) inputs = X if self._num_outputs > 1: inputs, targets, noise = multioutput_to_batch_mode_transform( train_X=X, train_Y=Y, num_outputs=self._num_outputs, train_Yvar=noise ) # `multioutput_to_batch_mode_transform` removes the output dimension, # which is necessary for `condition_on_observations` targets = targets.unsqueeze(-1) if noise is not None: noise = noise.unsqueeze(-1) else: inputs = X targets = Y if noise is not None: kwargs.update({"noise": noise}) fantasy_model = super().condition_on_observations(X=inputs, Y=targets, **kwargs) fantasy_model._input_batch_shape = fantasy_model.train_targets.shape[ : (-1 if self._num_outputs == 1 else -2) ] if not self._is_fully_bayesian: fantasy_model._aug_batch_shape = fantasy_model.train_targets.shape[:-1] return fantasy_model
[docs] def subset_output(self, idcs: list[int]) -> BatchedMultiOutputGPyTorchModel: r"""Subset the model along the output dimension. Args: idcs: The output indices to subset the model to. Returns: The current model, subset to the specified output indices. """ try: subset_batch_dict = self._subset_batch_dict except AttributeError: raise NotImplementedError( "`subset_output` requires the model to define a `_subset_batch_dict` " "attribute that lists the indices of the output dimensions in each " "model parameter that needs to be subset." ) m = len(idcs) new_model = deepcopy(self) subset_everything = self.num_outputs == m and idcs == list(range(m)) if subset_everything: return new_model tidxr = torch.tensor(idcs, device=new_model.train_targets.device) idxr = tidxr if m > 1 else idcs[0] new_tail_bs = torch.Size([m]) if m > 1 else torch.Size() new_model._num_outputs = m new_model._aug_batch_shape = new_model._aug_batch_shape[:-1] + new_tail_bs new_model.train_inputs = tuple( ti[..., idxr, :, :] for ti in new_model.train_inputs ) new_model.train_targets = new_model.train_targets[..., idxr, :] # adjust batch shapes of parameters/buffers if necessary for full_name, p in itertools.chain( new_model.named_parameters(), new_model.named_buffers() ): if full_name in subset_batch_dict: idx = subset_batch_dict[full_name] new_data = p.index_select(dim=idx, index=tidxr) if m == 1: new_data = new_data.squeeze(idx) p.data = new_data mod_name = full_name.split(".")[:-1] mod_batch_shape(new_model, mod_name, m if m > 1 else 0) # subset outcome transform if present try: subset_octf = new_model.outcome_transform.subset_output(idcs=idcs) new_model.outcome_transform = subset_octf except AttributeError: pass # Subset fixed noise likelihood if present. if isinstance(self.likelihood, FixedNoiseGaussianLikelihood): full_noise = new_model.likelihood.noise_covar.noise new_noise = full_noise[..., idcs if len(idcs) > 1 else idcs[0], :] new_model.likelihood.noise_covar.noise = new_noise return new_model
[docs] class ModelListGPyTorchModel(ModelList, GPyTorchModel, ABC): r"""Abstract base class for models based on multi-output GPyTorch models. This is meant to be used with a gpytorch ModelList wrapper for independent evaluation of submodels. Those submodels can themselves be multi-output models, in which case the task covariances will be ignored. """ @property def batch_shape(self) -> torch.Size: r"""The batch shape of the model. This is a batch shape from an I/O perspective, independent of the internal representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel). For a model with `m` outputs, a `test_batch_shape x q x d`-shaped input `X` to the `posterior` method returns a Posterior object over an output of shape `broadcast(test_batch_shape, model.batch_shape) x q x m`. """ batch_shapes = {m.batch_shape for m in self.models} if len(batch_shapes) > 1: msg = ( f"Component models of {self.__class__.__name__} have different " "batch shapes" ) try: broadcast_shape = torch.broadcast_shapes(*batch_shapes) warnings.warn(msg + ". Broadcasting batch shapes.", stacklevel=2) return broadcast_shape except RuntimeError: raise NotImplementedError(msg + " that are not broadcastble.") return next(iter(batch_shapes)) # pyre-fixme[15]: Inconsistent override in return types
[docs] def posterior( self, X: Tensor, output_indices: list[int] | None = None, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, ) -> GPyTorchPosterior | PosteriorList: r"""Computes the posterior over model outputs at the provided points. If any model returns a MultitaskMultivariateNormal posterior, then that will be split into individual MVNs per task, with inter-task covariance ignored. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the respective likelihoods to the posterior. If a Tensor of shape `(batch_shape) x q x m`, use it directly as the observation noise (with `observation_noise[...,i]` added to the posterior of the `i`-th model). posterior_transform: An optional PosteriorTransform. Returns: - If no `posterior_transform` is provided and the component models have no `outcome_transform`, or if the component models only use linear outcome transforms like `Standardize` (i.e. not `Log`), returns a `GPyTorchPosterior` or `GaussianMixturePosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise` is specified. - If no `posterior_transform` is provided and component models have nonlinear transforms like `Log`, returns a `PosteriorList` with sub-posteriors of type `TransformedPosterior` - If `posterior_transform` is provided, that posterior transform will be applied and will determine the return type. This could potentially be any subclass of `Posterior`, but common choices give a `GPyTorchPosterior`. """ # Nonlinear transforms untransform to a `TransformedPosterior`, # which can't be made into a `GPyTorchPosterior` returns_untransformed = any( hasattr(mod, "outcome_transform") and (not mod.outcome_transform._is_linear) for mod in self.models ) # NOTE: We're not passing in the posterior transform here. We'll apply it later. posterior = ModelList.posterior( self, X=X, output_indices=output_indices, observation_noise=observation_noise, ) if not returns_untransformed: mvns = [p.distribution for p in posterior.posteriors] if any(isinstance(m, MultitaskMultivariateNormal) for m in mvns): mvn_list = [] for mvn in mvns: if len(mvn.event_shape) == 2: # We separate MTMVNs into independent-across-task MVNs for # the convenience of using BlockDiagLinearOperator below. # (b x q x m x m) -> list of m (b x q x 1 x 1) mvn_list.extend(separate_mtmvn(mvn)) else: mvn_list.append(mvn) mean = torch.stack([mvn.mean for mvn in mvn_list], dim=-1) covars = CatLinearOperator( *[mvn.lazy_covariance_matrix.unsqueeze(-3) for mvn in mvn_list], dim=-3, ) # List of m (b x q x 1 x 1) -> (b x q x m x 1 x 1) mvn = MultitaskMultivariateNormal( mean=mean, covariance_matrix=BlockDiagLinearOperator(covars, block_dim=-3).to( X ), # (b x q x m x 1 x 1) -> (b x q x m x m) interleaved=False, ) else: mvn = ( mvns[0] if len(mvns) == 1 else MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) ) # Return the result as a GPyTorchPosterior/GaussianMixturePosterior. if any(is_ensemble(m) for m in self.models): # Mixing fully Bayesian and other GP models is currently # not supported. posterior = GaussianMixturePosterior(distribution=mvn) else: posterior = GPyTorchPosterior(distribution=mvn) if posterior_transform is not None: return posterior_transform(posterior) return posterior
[docs] def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Model: raise NotImplementedError()
[docs] class MultiTaskGPyTorchModel(GPyTorchModel, ABC): r"""Abstract base class for multi-task models based on GPyTorch models. This class provides the `posterior` method to models that implement a "long-format" multi-task GP in the style of `MultiTaskGP`. """ def _map_tasks(self, task_values: Tensor) -> Tensor: """Map raw task values to the task indices used by the model. Args: task_values: A tensor of task values. Returns: A tensor of task indices with the same shape as the input tensor. """ if self._task_mapper is None: if not ( torch.all(0 <= task_values) and torch.all(task_values < self.num_tasks) ): raise ValueError( "Expected all task features in `X` to be between 0 and " f"self.num_tasks - 1. Got {task_values}." ) else: task_values = task_values.long() unexpected_task_values = set(task_values.unique().tolist()).difference( self._expected_task_values ) if len(unexpected_task_values) > 0: raise ValueError( "Received invalid raw task values. Expected raw value to be in" f" {self._expected_task_values}, but got unexpected task values:" f" {unexpected_task_values}." ) task_values = self._task_mapper[task_values] return task_values def _apply_noise( self, X: Tensor, mvn: MultivariateNormal, num_outputs: int, observation_noise: bool | Tensor, ) -> MultivariateNormal: """Adds the observation noise to the posterior. If the likelihood is a `FixedNoiseGaussianLikelihood`, then the average noise per task is computed, and a diagonal noise matrix is added to the posterior covariance matrix, where the noise per input is the average noise for its respective task. If the likelihood is a Gaussian likelihood, then currently there is a shared inferred noise level for all tasks. TODO: implement support for task-specific inferred noise levels. Args: X: A tensor of shape `batch_shape x q x d + 1`, where `d` is the dimension of the feature space and the `+ 1` dimension is the task feature / index. mvn: A `MultivariateNormal` object representing the posterior over the true latent function. num_outputs: The number of outputs of the model. observation_noise: If True, add observation noise from the respective likelihood. Tensor input is currently not supported. Returns: The posterior predictive. """ if torch.is_tensor(observation_noise): raise NotImplementedError( "Passing a tensor of observations is not supported by MultiTaskGP." ) elif observation_noise is False: return mvn elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # get task features for test points test_task_features = X[..., self._task_feature] test_task_features = self._map_tasks(test_task_features).long() unique_test_task_features = test_task_features.unique() # get task features for training points train_task_features = self.train_inputs[0][..., self._task_feature] train_task_features = self._map_tasks(train_task_features).long() noise_by_task = torch.zeros(self.num_tasks, dtype=X.dtype, device=X.device) for task_feature in unique_test_task_features: mask = train_task_features == task_feature noise_by_task[task_feature] = self.likelihood.noise[mask].mean( dim=-1, keepdim=True ) # noise_shape is `broadcast(test_batch_shape, model.batch_shape) x q` noise_shape = X.shape[:-1] observation_noise = noise_by_task[test_task_features].expand(noise_shape) return self.likelihood( mvn, X, noise=observation_noise, ) return self.likelihood(mvn, X)
[docs] def posterior( self, X: Tensor, output_indices: list[int] | None = None, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, ) -> GPyTorchPosterior | TransformedPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A tensor of shape `batch_shape x q x d` or `batch_shape x q x (d + 1)`, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. The `+ 1` dimension is the optional task feature / index. If given, the model produces the outputs for the given task indices. If omitted, the model produces outputs for tasks in in `self._output_tasks` (specified as `output_tasks` while constructing the model), which can overwritten using `output_indices`. output_indices: A list of task values over which to compute the posterior. Only used if `X` does not include the task feature. If omitted, defaults to `self._output_tasks`. observation_noise: If True, add observation noise from the respective likelihoods. If a Tensor, specifies the observation noise levels to add. posterior_transform: An optional PosteriorTransform. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points. If the task features are included in `X`, the posterior will be single output. Otherwise, the posterior will be single or multi output corresponding to the tasks included in either the `output_indices` or `self._output_tasks`. """ includes_task_feature = X.shape[-1] == self.num_non_task_features + 1 if includes_task_feature: if output_indices is not None: raise ValueError( "`output_indices` must be None when `X` includes task features." ) task_features = X[..., self._task_feature].unique() num_outputs = 1 X_full = X else: # Add the task features to construct the full X for evaluation. task_features = torch.tensor( self._output_tasks if output_indices is None else output_indices, dtype=torch.long, device=X.device, ) num_outputs = len(task_features) X_full = _make_X_full( X=X, output_indices=task_features.tolist(), tf=self._task_feature ) # Make sure all task feature values are valid. task_features = self._map_tasks(task_values=task_features) self.eval() # make sure model is in eval mode # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X_full = self.transform_inputs(X_full) with gpt_posterior_settings(): mvn = self(X_full) mvn = self._apply_noise( X=X_full, mvn=mvn, num_outputs=num_outputs, observation_noise=observation_noise, ) # If single-output, return the posterior of a single-output model if num_outputs == 1: posterior = GPyTorchPosterior(distribution=mvn) else: # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*mvn.mean.shape[:-1], num_outputs, -1).transpose( -1, -2 ), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) posterior = GPyTorchPosterior(distribution=mtmvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) if posterior_transform is not None: return posterior_transform(posterior) return posterior
[docs] def subset_output(self, idcs: list[int]) -> MultiTaskGPyTorchModel: r"""Returns a new model that only outputs a subset of the outputs. Args: idcs: A list of output indices, corresponding to the outputs to keep. Returns: A new model that only outputs the requested outputs. """ raise UnsupportedError( "Subsetting outputs is not supported by `MultiTaskGPyTorchModel`." )