Source code for tfep.nn.flows.maf

#!/usr/bin/env python


# =============================================================================
# MODULE DOCSTRING
# =============================================================================

"""
Masked autoregressive flow layer for PyTorch.
"""


# =============================================================================
# GLOBAL IMPORTS
# =============================================================================

from collections.abc import Sequence
from typing import Optional, Union

import torch

from tfep.nn.conditioners.made import MADE
from tfep.nn.embeddings.mafembed import MAFEmbedding
from tfep.nn.flows.autoregressive import AutoregressiveFlow
from tfep.nn.transformers.affine import AffineTransformer
from tfep.utils.misc import ensure_tensor_sequence


# =============================================================================
# MAF
# =============================================================================


[docs]
class MAF(AutoregressiveFlow):
    """Masked Autoregressive Flow.

    This implements an autoregressive flow in which the :class:`tfep.nn.conditioners.MADE` [1]
    network is used for the conditioner. The class supports arbitrary
    transformers.

    When the transformer is the :class:`AffineTransformer`, this is
    equivalent to MAF and IAF [2-3]. These two differ only in the direction
    of the conditional dependence, effectively determining which between
    forward and inverse evaluation is faster.

    See Also
    --------
    :class:`tfep.nn.conditioners.MADE` : The autoregressive layer used as conditioner.

    References
    ----------
    [1] Germain M, Gregor K, Murray I, Larochelle H. Made: Masked autoencoder
        for distribution estimation. In International Conference on Machine
        Learning 2015 Jun 1 (pp. 881-889).
    [2] Kingma DP, Salimans T, Jozefowicz R, Chen X, Sutskever I, Welling M.
        Improved variational inference with inverse autoregressive flow.
        In Advances in neural information processing systems 2016 (pp. 4743-4751).
    [3] Papamakarios G, Pavlakou T, Murray I. Masked autoregressive flow for
        density estimation. In Advances in Neural Information Processing
        Systems 2017 (pp. 2338-2347).

    Examples
    --------

    A masked autoregressive flow using a linear transformer. This uses two MAF
    layers inverting the dependencies between inputs. This is a standard strategy
    in autoregressive flows to ensure every output depends on every input.

    >>> from tfep.nn.conditioners.made import generate_degrees
    >>> flow = torch.nn.Sequential(
    ...     MAF(degrees_in=generate_degrees(n_features=5, order='ascending')),
    ...     MAF(degrees_in=generate_degrees(n_features=5, order='descending')),
    ... )

    Multiple inputs can be assigned the same degree. Further, it is possible to
    specify "conditioning" inputs (in this case, the first 3) which affect all
    outputs but that are not mapped by assigning them degree -1.

    >>> maf = MAF(degrees_in=[-1, -1, -1, 0, 0, 1, 2])

    """


[docs]
    def __init__(
            self,
            degrees_in: Sequence[int],
            transformer: Optional[torch.nn.Module] = None,
            hidden_layers: Union[int, Sequence[int], Sequence[Sequence[int]]] = 2,
            embedding: Optional[MAFEmbedding] = None,
            weight_norm: bool = True,
            initialize_identity: bool = True,
    ):
        """Constructor.

        Parameters
        ----------
        degrees_in : Sequence[int]
            Shape: ``(n_inputs,)``. ``degrees_in[i]`` is the degree assigned to
            the ``i``-th input. The degrees must assume consecutive values starting
            from 0 or -1. Input features assigned a -1 degree are labeled as "conditioning"
            and affect the output without being mapped.
        transformer : torch.nn.Module or None, optional
            The transformer used to map the input features. By default, the
            :class:`tfep.nn.transformers.affine.AffineTransformer` is used.
        hidden_layers : Union[int, Sequence[int], Sequence[Sequence[int]]], optional
            If an integer, this is the number of hidden layers. In this case,
            the number of nodes in each layer is set to
            ``max(n_inputs, ceil((n_inputs * n_outputs)**0.5))``
            where ``n_inputs`` is the number of input features that affect the
            output, and ``n_outputs`` is the number of output features.

            If a sequence of integers, ``hidden_layers[l]`` is the number of
            nodes in the l-th hidden layer. The degrees of each node are assigned
            in a round-robin fashion by tiling ``degrees_in`` until the requested
            number of nodes is covered.

            Otherwise, ``degrees_hidden[l][i]`` is the degree assigned to the
            ``i``-th node of the ``l``-th hidden layer.

            Default is 2.
        embedding : torch.nn.Module, optional
            If present, the conditioner input features are first passed to this
            layer whose output is then fed to the ``conditioner``.
        weight_norm : bool, optional
            If ``True``, weight normalization is applied to the masked linear
            modules. Default is ``True``.
        initialize_identity : bool, optional
            If ``True``, the parameters are initialized in such a way that
            the flow initially performs the identity function.

        """
        # By default, use an affine transformer.
        if transformer is None:
            transformer = AffineTransformer()

        # Convert all sequences to Tensors to simplify the code.
        degrees_in = ensure_tensor_sequence(degrees_in)

        # Check that degrees_in satisfy the requirements.
        min_degree_in = degrees_in.min().tolist()
        max_degree_in = degrees_in.max().tolist()
        if ((set(degrees_in.tolist()) != set(range(min_degree_in, max_degree_in+1))) or
                (min_degree_in not in {-1, 0})):
            raise ValueError('degrees_in must assume consecutive values starting '
                             'from 0 (or -1 for conditioning input features).')

        # Create the lifter used to map the periodic degrees of freedom.
        if embedding is None:
            degrees_in_embedded = degrees_in
        else:
            degrees_in_embedded = embedding.get_degrees_out(degrees_in)

        # Find transformer indices in the order they need to be evaluated during the inverse.
        transformer_indices = [(degrees_in == degree).nonzero().flatten()
                               for degree in range(max_degree_in+1)]

        # We need out degrees only for the transformed inputs.
        degrees_out = transformer.get_degrees_out(degrees_in[degrees_in != -1])

        # Initialize parent class.
        super().__init__(
            n_features_in=len(degrees_in),
            transformer_indices=transformer_indices,
            conditioner=_EmbeddedMADE(
                embedding=embedding,
                degrees_in=degrees_in_embedded,
                degrees_out=degrees_out,
                hidden_layers=hidden_layers,
                weight_norm=weight_norm,
            ),
            transformer=transformer,
            initialize_identity=initialize_identity,
        )

        self._embedding = embedding



[docs]
    def n_parameters(self) -> int:
        """The total number of (unmasked) parameters."""
        return self._conditioner.n_parameters()




# =============================================================================
# HELPER CLASS FOR EMBEDDINGS
# =============================================================================

class _EmbeddedMADE(MADE):
    """A MADE conditioner with embedded input features."""

    def __init__(self, embedding, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.embedding = embedding

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if self.embedding is not None:
            x = self.embedding(x)
        return super().forward(x)