Source code for tfep.nn.flows.maf

#!/usr/bin/env python


# =============================================================================
# MODULE DOCSTRING
# =============================================================================

"""
Masked autoregressive flow layer for PyTorch.
"""


# =============================================================================
# GLOBAL IMPORTS
# =============================================================================

from collections.abc import Sequence
from typing import Optional, Union

import torch

from tfep.nn.conditioners.made import MADE
from tfep.nn.embeddings.mafembed import MAFEmbedding
from tfep.nn.flows.autoregressive import AutoregressiveFlow
from tfep.nn.transformers.affine import AffineTransformer
from tfep.utils.misc import ensure_tensor_sequence


# =============================================================================
# MAF
# =============================================================================

[docs] class MAF(AutoregressiveFlow): """Masked Autoregressive Flow. This implements an autoregressive flow in which the :class:`tfep.nn.conditioners.MADE` [1] network is used for the conditioner. The class supports arbitrary transformers. When the transformer is the :class:`AffineTransformer`, this is equivalent to MAF and IAF [2-3]. These two differ only in the direction of the conditional dependence, effectively determining which between forward and inverse evaluation is faster. See Also -------- :class:`tfep.nn.conditioners.MADE` : The autoregressive layer used as conditioner. References ---------- [1] Germain M, Gregor K, Murray I, Larochelle H. Made: Masked autoencoder for distribution estimation. In International Conference on Machine Learning 2015 Jun 1 (pp. 881-889). [2] Kingma DP, Salimans T, Jozefowicz R, Chen X, Sutskever I, Welling M. Improved variational inference with inverse autoregressive flow. In Advances in neural information processing systems 2016 (pp. 4743-4751). [3] Papamakarios G, Pavlakou T, Murray I. Masked autoregressive flow for density estimation. In Advances in Neural Information Processing Systems 2017 (pp. 2338-2347). Examples -------- A masked autoregressive flow using a linear transformer. This uses two MAF layers inverting the dependencies between inputs. This is a standard strategy in autoregressive flows to ensure every output depends on every input. >>> from tfep.nn.conditioners.made import generate_degrees >>> flow = torch.nn.Sequential( ... MAF(degrees_in=generate_degrees(n_features=5, order='ascending')), ... MAF(degrees_in=generate_degrees(n_features=5, order='descending')), ... ) Multiple inputs can be assigned the same degree. Further, it is possible to specify "conditioning" inputs (in this case, the first 3) which affect all outputs but that are not mapped by assigning them degree -1. >>> maf = MAF(degrees_in=[-1, -1, -1, 0, 0, 1, 2]) """
[docs] def __init__( self, degrees_in: Sequence[int], transformer: Optional[torch.nn.Module] = None, hidden_layers: Union[int, Sequence[int], Sequence[Sequence[int]]] = 2, embedding: Optional[MAFEmbedding] = None, weight_norm: bool = True, initialize_identity: bool = True, ): """Constructor. Parameters ---------- degrees_in : Sequence[int] Shape: ``(n_inputs,)``. ``degrees_in[i]`` is the degree assigned to the ``i``-th input. The degrees must assume consecutive values starting from 0 or -1. Input features assigned a -1 degree are labeled as "conditioning" and affect the output without being mapped. transformer : torch.nn.Module or None, optional The transformer used to map the input features. By default, the :class:`tfep.nn.transformers.affine.AffineTransformer` is used. hidden_layers : Union[int, Sequence[int], Sequence[Sequence[int]]], optional If an integer, this is the number of hidden layers. In this case, the number of nodes in each layer is set to ``max(n_inputs, ceil((n_inputs * n_outputs)**0.5))`` where ``n_inputs`` is the number of input features that affect the output, and ``n_outputs`` is the number of output features. If a sequence of integers, ``hidden_layers[l]`` is the number of nodes in the l-th hidden layer. The degrees of each node are assigned in a round-robin fashion by tiling ``degrees_in`` until the requested number of nodes is covered. Otherwise, ``degrees_hidden[l][i]`` is the degree assigned to the ``i``-th node of the ``l``-th hidden layer. Default is 2. embedding : torch.nn.Module, optional If present, the conditioner input features are first passed to this layer whose output is then fed to the ``conditioner``. weight_norm : bool, optional If ``True``, weight normalization is applied to the masked linear modules. Default is ``True``. initialize_identity : bool, optional If ``True``, the parameters are initialized in such a way that the flow initially performs the identity function. """ # By default, use an affine transformer. if transformer is None: transformer = AffineTransformer() # Convert all sequences to Tensors to simplify the code. degrees_in = ensure_tensor_sequence(degrees_in) # Check that degrees_in satisfy the requirements. min_degree_in = degrees_in.min().tolist() max_degree_in = degrees_in.max().tolist() if ((set(degrees_in.tolist()) != set(range(min_degree_in, max_degree_in+1))) or (min_degree_in not in {-1, 0})): raise ValueError('degrees_in must assume consecutive values starting ' 'from 0 (or -1 for conditioning input features).') # Create the lifter used to map the periodic degrees of freedom. if embedding is None: degrees_in_embedded = degrees_in else: degrees_in_embedded = embedding.get_degrees_out(degrees_in) # Find transformer indices in the order they need to be evaluated during the inverse. transformer_indices = [(degrees_in == degree).nonzero().flatten() for degree in range(max_degree_in+1)] # We need out degrees only for the transformed inputs. degrees_out = transformer.get_degrees_out(degrees_in[degrees_in != -1]) # Initialize parent class. super().__init__( n_features_in=len(degrees_in), transformer_indices=transformer_indices, conditioner=_EmbeddedMADE( embedding=embedding, degrees_in=degrees_in_embedded, degrees_out=degrees_out, hidden_layers=hidden_layers, weight_norm=weight_norm, ), transformer=transformer, initialize_identity=initialize_identity, ) self._embedding = embedding
[docs] def n_parameters(self) -> int: """The total number of (unmasked) parameters.""" return self._conditioner.n_parameters()
# ============================================================================= # HELPER CLASS FOR EMBEDDINGS # ============================================================================= class _EmbeddedMADE(MADE): """A MADE conditioner with embedded input features.""" def __init__(self, embedding, *args, **kwargs): super().__init__(*args, **kwargs) self.embedding = embedding def forward(self, x: torch.Tensor) -> torch.Tensor: if self.embedding is not None: x = self.embedding(x) return super().forward(x)