Source code for botorch.models.converter

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

r"""
Utilities for converting between different models.
"""

from __future__ import annotations

import warnings
from copy import deepcopy

import torch
from botorch.exceptions import UnsupportedError
from botorch.exceptions.warnings import BotorchWarning
from botorch.models import SingleTaskGP
from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP
from botorch.models.gp_regression_mixed import MixedSingleTaskGP
from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import OutcomeTransform
from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
from torch import Tensor
from torch.nn import Module, ModuleList

DEPRECATION_MESSAGE = (
    "Model converter code is deprecated and will be removed in v0.13 release. "
    "Its correct behavior is dependent on some assumptions about model priors "
    "that do not always hold. Use it at your own risk! See "
    "https://github.com/cornellius-gp/gpytorch/issues/2550."
)


def _get_module(module: Module, name: str) -> Module:
    """Recursively get a sub-module from a module.

    Args:
        module: A `torch.nn.Module`.
        name: The name of the submodule to return, in the form of a period-delinated
            string: `sub_module.subsub_module.[...].leaf_module`.

    Returns:
        The requested sub-module.

    Example:
        >>> gp = SingleTaskGP(train_X, train_Y)
        >>> noise_prior = _get_module(gp, "likelihood.noise_covar.noise_prior")
    """
    current = module
    if name != "":
        for a in name.split("."):
            current = getattr(current, a)
    return current


def _check_compatibility(models: ModuleList) -> None:
    """Check if the submodels of a ModelListGP are compatible with the converter."""
    # Check that all submodules are of the same type.
    for modn, mod in models[0].named_modules():
        mcls = mod.__class__
        if not all(isinstance(_get_module(m, modn), mcls) for m in models[1:]):
            raise UnsupportedError(
                "Sub-modules must be of the same type across models."
            )
        if "prior" in modn and len(mod.state_dict()) == 0:
            warnings.warn(  # pragma no cover -- not tested after GPyTorch 2551.
                "Model converter cannot verify compatibility of GPyTorch priors "
                "that do not register their parameters as buffers. If the prior "
                "is different than the default prior set by the model constructor "
                "this may not work correctly. Use it at your own risk! See "
                "https://github.com/cornellius-gp/gpytorch/issues/2550.",
                BotorchWarning,
                stacklevel=3,
            )

    # Check that each model is a BatchedMultiOutputGPyTorchModel.
    if not all(isinstance(m, BatchedMultiOutputGPyTorchModel) for m in models):
        raise UnsupportedError(
            "All models must be of type BatchedMultiOutputGPyTorchModel."
        )

    # TODO: Add support for custom likelihoods.
    if any(getattr(m, "_is_custom_likelihood", False) for m in models):
        raise NotImplementedError(
            "Conversion of models with custom likelihoods is currently unsupported."
        )

    # TODO: Add support for outcome transforms.
    if any(getattr(m, "outcome_transform", None) is not None for m in models):
        raise UnsupportedError(
            "Conversion of models with outcome transforms is unsupported. "
            "To fix this error, explicitly pass `outcome_transform=None`.",
        )

    # check that each model is single-output
    if not all(m._num_outputs == 1 for m in models):
        raise UnsupportedError("All models must be single-output.")

    # check that training inputs are the same
    if not all(
        torch.equal(ti, tj)
        for m in models[1:]
        for ti, tj in zip(models[0].train_inputs, m.train_inputs)
    ):
        raise UnsupportedError("training inputs must agree for all sub-models.")

    # check that there are no batched input transforms
    default_size = torch.Size([])
    for m in models:
        if hasattr(m, "input_transform"):
            if (
                m.input_transform is not None
                and len(getattr(m.input_transform, "batch_shape", default_size)) != 0
            ):
                raise UnsupportedError("Batched input_transforms are not supported.")

    # check that all models have the same input transforms
    if any(hasattr(m, "input_transform") for m in models):
        if not all(
            m.input_transform.equals(models[0].input_transform) for m in models[1:]
        ):
            raise UnsupportedError("All models must have the same input_transforms.")


[docs] def model_list_to_batched(model_list: ModelListGP) -> BatchedMultiOutputGPyTorchModel: """Convert a ModelListGP to a BatchedMultiOutputGPyTorchModel. Args: model_list: The `ModelListGP` to be converted to the appropriate `BatchedMultiOutputGPyTorchModel`. All sub-models must be of the same type and have the shape (batch shape and number of training inputs). Returns: The model converted into a `BatchedMultiOutputGPyTorchModel`. Example: >>> list_gp = ModelListGP(gp1, gp2) >>> batch_gp = model_list_to_batched(list_gp) """ warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning, stacklevel=2) was_training = model_list.training model_list.train() models = model_list.models _check_compatibility(models) # if the list has only one model, we can just return a copy of that if len(models) == 1: return deepcopy(models[0]) # construct inputs train_X = deepcopy(models[0].train_inputs[0]) train_Y = torch.stack([m.train_targets.clone() for m in models], dim=-1) kwargs = {"train_X": train_X, "train_Y": train_Y} if isinstance(models[0].likelihood, FixedNoiseGaussianLikelihood): kwargs["train_Yvar"] = torch.stack( [m.likelihood.noise_covar.noise.clone() for m in models], dim=-1 ) if isinstance(models[0], SingleTaskMultiFidelityGP): init_args = models[0]._init_args if not all( v == m._init_args[k] for m in models[1:] for k, v in init_args.items() ): raise UnsupportedError("All models must have the same fidelity parameters.") kwargs.update(init_args) # add batched kernel, except if the model type is SingleTaskMultiFidelityGP, # which does not have a `covar_module` if not isinstance(models[0], SingleTaskMultiFidelityGP): batch_length = len(models) covar_module = _batched_kernel(models[0].covar_module, batch_length) kwargs["covar_module"] = covar_module # SingleTaskGP uses a default outcome transforms while this converter doesn't # support outcome transforms. We need to explicitly pass down `None` to make # sure no outcome transform is being used. if isinstance(models[0], SingleTaskGP): kwargs["outcome_transform"] = None # construct the batched GP model input_transform = getattr(models[0], "input_transform", None) batch_gp = models[0].__class__(input_transform=input_transform, **kwargs) adjusted_batch_keys, non_adjusted_batch_keys = _get_adjusted_batch_keys( batch_state_dict=batch_gp.state_dict(), input_transform=input_transform ) input_batch_dims = len(models[0]._input_batch_shape) # ensure scalars agree (TODO: Allow different priors for different outputs) for n in non_adjusted_batch_keys: v0 = _get_module(models[0], n) if not all(torch.equal(_get_module(m, n), v0) for m in models[1:]): raise UnsupportedError("All scalars must have the same value.") # ensure dimensions of all tensors agree for n in adjusted_batch_keys: shape0 = _get_module(models[0], n).shape if not all(_get_module(m, n).shape == shape0 for m in models[1:]): raise UnsupportedError("All tensors must have the same shape.") # now construct the batched state dict non_adjusted_batch_state_dict = { s: p.clone() for s, p in models[0].state_dict().items() if s in non_adjusted_batch_keys } adjusted_batch_state_dict = { t: ( torch.stack( [m.state_dict()[t].clone() for m in models], dim=input_batch_dims ) if "active_dims" not in t else models[0].state_dict()[t].clone() ) for t in adjusted_batch_keys } batch_state_dict = {**non_adjusted_batch_state_dict, **adjusted_batch_state_dict} # load the state dict into the new model batch_gp.load_state_dict(batch_state_dict) return batch_gp.train(mode=was_training)
def _batched_kernel(kernel, batch_length: int): """Adds a batch dimension of size `batch_length` to all non-scalar Tensor parameters that govern the kernel function `kernel`. NOTE: prior or constraint parameters are excluded from batching. """ # copy just in case there are non-tensor parameters that are passed by reference kernel = deepcopy(kernel) search_str = "raw_outputscale" for key, attr in kernel.state_dict().items(): if isinstance(attr, Tensor) and ( attr.ndim > 0 or (search_str == key.rpartition(".")[-1]) ): attr = attr.unsqueeze(0).expand(batch_length, *attr.shape).clone() set_attribute(kernel, key, torch.nn.Parameter(attr)) return kernel # two helper functions for `batched_kernel` # like `setattr` and `getattr` for object hierarchies
[docs] def set_attribute(obj, attr: str, val): """Like `setattr` but works with hierarchical attribute specification. E.g. if obj=Zoo(), and attr="tiger.age", set_attribute(obj, attr, 3), would set the Zoo's tiger's age to three. """ path_to_leaf, _, attr_name = attr.rpartition(".") leaf = get_attribute(obj, path_to_leaf) if path_to_leaf else obj setattr(leaf, attr_name, val)
[docs] def get_attribute(obj, attr: str): """Like `getattr` but works with hierarchical attribute specification. E.g. if obj=Zoo(), and attr="tiger.age", get_attribute(obj, attr), would return the Zoo's tiger's age. """ attr_names = attr.split(".") while attr_names: obj = getattr(obj, attr_names.pop(0)) return obj
[docs] def batched_to_model_list(batch_model: BatchedMultiOutputGPyTorchModel) -> ModelListGP: """Convert a BatchedMultiOutputGPyTorchModel to a ModelListGP. Args: batch_model: The `BatchedMultiOutputGPyTorchModel` to be converted to a `ModelListGP`. Returns: The model converted into a `ModelListGP`. Example: >>> train_X = torch.rand(5, 2) >>> train_Y = torch.rand(5, 2) >>> batch_gp = SingleTaskGP(train_X, train_Y) >>> list_gp = batched_to_model_list(batch_gp) """ warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning, stacklevel=2) was_training = batch_model.training batch_model.train() if isinstance(batch_model, MixedSingleTaskGP): raise NotImplementedError( "Conversion of MixedSingleTaskGP is currently not supported." ) input_transform = getattr(batch_model, "input_transform", None) outcome_transform = getattr(batch_model, "outcome_transform", None) batch_sd = batch_model.state_dict() adjusted_batch_keys, non_adjusted_batch_keys = _get_adjusted_batch_keys( batch_state_dict=batch_sd, input_transform=input_transform, outcome_transform=outcome_transform, ) input_bdims = len(batch_model._input_batch_shape) models = [] for i in range(batch_model._num_outputs): non_adjusted_batch_sd = { s: batch_sd[s].clone() for s in non_adjusted_batch_keys } adjusted_batch_sd = { t: ( batch_sd[t].select(input_bdims, i).clone() if "active_dims" not in t else batch_sd[t].clone() ) for t in adjusted_batch_keys } sd = {**non_adjusted_batch_sd, **adjusted_batch_sd} kwargs = { "train_X": batch_model.train_inputs[0].select(input_bdims, i).clone(), "train_Y": batch_model.train_targets.select(input_bdims, i) .clone() .unsqueeze(-1), } if isinstance(batch_model.likelihood, FixedNoiseGaussianLikelihood): noise_covar = batch_model.likelihood.noise_covar kwargs["train_Yvar"] = ( noise_covar.noise.select(input_bdims, i).clone().unsqueeze(-1) ) if isinstance(batch_model, SingleTaskMultiFidelityGP): kwargs.update(batch_model._init_args) # NOTE: Adding outcome transform to kwargs to avoid the multiple # values for same kwarg issue with SingleTaskMultiFidelityGP. if outcome_transform is not None: octf = outcome_transform.subset_output(idcs=[i]) kwargs["outcome_transform"] = octf # Update the outcome transform state dict entries. sd = { **sd, **{"outcome_transform." + k: v for k, v in octf.state_dict().items()}, } else: kwargs["outcome_transform"] = None model = batch_model.__class__(input_transform=input_transform, **kwargs) model.load_state_dict(sd) models.append(model) return ModelListGP(*models).train(mode=was_training)
[docs] def batched_multi_output_to_single_output( batch_mo_model: BatchedMultiOutputGPyTorchModel, ) -> BatchedMultiOutputGPyTorchModel: """Convert a model from batched multi-output to a batched single-output. Note: the underlying GPyTorch GP does not change. The GPyTorch GP's batch_shape (referred to as `_aug_batch_shape`) is still `_input_batch_shape x num_outputs`. The only things that change are the attributes of the BatchedMultiOutputGPyTorchModel that are responsible the internal accounting of the number of outputs: namely, num_outputs, _input_batch_shape, and _aug_batch_shape. Initially for the batched MO models these are: `num_outputs = m`, `_input_batch_shape = train_X.batch_shape`, and `_aug_batch_shape = train_X.batch_shape + torch.Size([num_outputs])`. In the new SO model, these are: `num_outputs = 1`, `_input_batch_shape = train_X.batch_shape + torch.Size([num_outputs])`, and `_aug_batch_shape = train_X.batch_shape + torch.Size([num_outputs])`. This is a (hopefully) temporary measure until multi-output MVNs with independent outputs have better support in GPyTorch (see https://github.com/cornellius-gp/gpytorch/pull/1083). Args: batched_mo_model: The BatchedMultiOutputGPyTorchModel Returns: The model converted into a batch single-output model. Example: >>> train_X = torch.rand(5, 2) >>> train_Y = torch.rand(5, 2) >>> batch_mo_gp = SingleTaskGP(train_X, train_Y, outcome_transform=None) >>> batch_so_gp = batched_multi_output_to_single_output(batch_mo_gp) """ warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning, stacklevel=2) was_training = batch_mo_model.training batch_mo_model.train() if not isinstance(batch_mo_model, BatchedMultiOutputGPyTorchModel): raise UnsupportedError("Only BatchedMultiOutputGPyTorchModels are supported.") # TODO: Add support for custom likelihoods. elif getattr(batch_mo_model, "_is_custom_likelihood", False): raise NotImplementedError( "Conversion of models with custom likelihoods is currently unsupported." ) input_transform = getattr(batch_mo_model, "input_transform", None) batch_sd = batch_mo_model.state_dict() # TODO: add support for outcome transforms. if hasattr(batch_mo_model, "outcome_transform"): raise NotImplementedError( "Converting batched multi-output models with outcome transforms " "is not currently supported." ) kwargs = { "train_X": batch_mo_model.train_inputs[0].clone(), "train_Y": batch_mo_model.train_targets.clone().unsqueeze(-1), } if isinstance(batch_mo_model.likelihood, FixedNoiseGaussianLikelihood): noise_covar = batch_mo_model.likelihood.noise_covar kwargs["train_Yvar"] = noise_covar.noise.clone().unsqueeze(-1) if isinstance(batch_mo_model, SingleTaskMultiFidelityGP): kwargs.update(batch_mo_model._init_args) # SingleTaskGP uses a default outcome transforms while this converter doesn't # support outcome transforms. We need to explicitly pass down `None` to make # sure no outcome transform is being used. if isinstance(batch_mo_model, SingleTaskGP): kwargs["outcome_transform"] = None single_outcome_model = batch_mo_model.__class__( input_transform=input_transform, **kwargs ) single_outcome_model.load_state_dict(batch_sd) return single_outcome_model.train(mode=was_training)
def _get_adjusted_batch_keys( batch_state_dict: dict[str, Tensor], input_transform: InputTransform | None, outcome_transform: OutcomeTransform | None = None, ) -> tuple[set[str], set[str]]: r"""Group the keys based on whether the value requires batch shape changes. Args: batch_state_dict: The state dict of the batch model. input_transform: The input transform. outcome_transform: The outcome transform. Returns: A two-element tuple containing: - The keys of the parameters/buffers that require a batch shape adjustment. - The keys of the parameters/buffers that do not require a batch shape adjustment. """ # These are the names of the params/buffers that need their batch shape adjusted. adjusted_batch_keys = {n for n, p in batch_state_dict.items() if len(p.shape) > 0} # Don't modify transform buffers, so add them to non-adjusted set and remove # them from tensors. for transform, transform_type in [ (input_transform, "input_transform."), (outcome_transform, "outcome_transform."), ]: if transform is not None: transform_keys = { transform_type + n for n, p in transform.state_dict().items() } adjusted_batch_keys = adjusted_batch_keys - transform_keys # These are the names of the parameters/buffers that don't need their # batch shape adjusted. non_adjusted_batch_keys = set(batch_state_dict) - adjusted_batch_keys return adjusted_batch_keys, non_adjusted_batch_keys