Source code for tfep.nn.flows.pca

#!/usr/bin/env python


# =============================================================================
# MODULE DOCSTRING
# =============================================================================

"""
Normalizing flow transforming to and from PCA-whitened space.
"""


# =============================================================================
# GLOBAL IMPORTS
# =============================================================================

import torch

import tfep.utils.math


# =============================================================================
# PCA WHITENED FLOW
# =============================================================================


[docs]
class PCAWhitenedFlow(torch.nn.Module):
    """Normalizing flow transforming to and from PCA-whitened space.

    The layer wraps a normalizing flows, passes to it PCA-whitened coordinates,
    and finally (optionally) blacken the output to return to the original space.

    The PCA-whitening matrix is estimated from data that is passed on initialization.

    Parameters
    ----------
    flow : torch.nn.Module
        The wrapped normalizing flow.
    x : torch.Tensor
        A tensor of shape (n_samples, n_features) which is used to estimate mean
        and covariance matrix of the coordinates and compute the PCA matrix.
    blacken : bool, optional
        If ``False``, the output coordinates are not blackened by inverting the
        PCA whitening transformation.

    """


[docs]
    def __init__(self, flow, x, blacken=True):
        super().__init__()
        self.flow = flow
        self.blacken = blacken

        # We don't need to keep track of the graph for backpropagation here.
        x = x.detach()

        # Compute mean and covariance.
        cov, mean = tfep.utils.math.cov(x, return_mean=True)

        # Compute eigenvalues/vectors and singular values.
        eigvalues, eigvectors = torch.linalg.eigh(cov)
        if torch.any(eigvalues < 0.0):
            raise ValueError(
                'Cannot determine the PCA whitening matrix since some of the '
                'eigenvalues of the covariance matrix estimate are negative. '
                'Likely, this is due to an insufficient number of samples.')
        singular_values = torch.sqrt(eigvalues)

        # Whitening matrix.
        whitening_matrix = torch.matmul(eigvectors, torch.diag(1. / singular_values))
        blackening_matrix = torch.matmul(torch.diag(singular_values), eigvectors.t())

        # The jacobian determinant of the whitening transformation
        # is just the product inverse singular values.
        whitening_log_det_J = -torch.sum(torch.log(singular_values))

        # Register various tensors as buffers so that PyTorch will automatically
        # save them and restore them with state_dict.
        self.register_buffer('mean', mean)
        self.register_buffer('whitening_matrix', whitening_matrix)
        self.register_buffer('blackening_matrix', blackening_matrix)
        self.register_buffer('whitening_log_det_J', whitening_log_det_J)



[docs]
    def n_parameters(self):
        """int: The total number of parameters that can be optimized."""
        return self.flow.n_parameters()



[docs]
    def forward(self, x):
        return self._pass(x, inverse=False)


    def inverse(self, y):
        return self._pass(y, inverse=True)

    def _whiten(self, x):
        return torch.matmul(x - self.mean, self.whitening_matrix)

    def _blacken(self, x):
        return torch.matmul(x, self.blackening_matrix) + self.mean

    def _pass(self, x, inverse):
        # Check wheter we need to whiten and/or blacken the features.
        whiten = not inverse or self.blacken
        blacken = inverse or self.blacken

        # Whiten before going through the encapsulated flow.
        if whiten:
            x = self._whiten(x)

        # Run the encapsulated flow.
        if inverse:
            y, log_det_J = self.flow.inverse(x)
        else:
            y, log_det_J = self.flow(x)

        # Blacken the feature before returning the output.
        if blacken:
            y = self._blacken(y)

        # If we perform only one between whitening/blackening,
        # the two jacobians don't cancel each other out.
        if not (whiten and blacken):
            if whiten:
                log_det_J = log_det_J + self.whitening_log_det_J
            else:
                log_det_J = log_det_J - self.whitening_log_det_J

        return y, log_det_J