Source code for tfep.potentials.ase

#!/usr/bin/env python

# =============================================================================
# MODULE DOCSTRING
# =============================================================================

"""
Modules and functions to compute QM energies and gradients with ASE.

The function/classes in this module wrap an Atomistic Simulation Environment
(ASE) ``Calculator``s and make them compatible with PyTorch.

See Also
--------
ASE: https://wiki.fysik.dtu.dk/ase/index.html

"""


# =============================================================================
# GLOBAL IMPORTS
# =============================================================================

# DO NOT IMPORT ASE HERE! ASE is an optional dependency of tfep.
import copy
import functools

import numpy as np
import torch

from tfep.utils.misc import (
    atom_to_flattened, flattened_to_atom,
    energies_array_to_tensor, forces_array_to_tensor
)
from tfep.utils.parallel import SerialStrategy
from tfep.potentials.base import PotentialBase


# =============================================================================
# TORCH MODULE API
# =============================================================================

[docs] class ASEPotential(PotentialBase): """Potential energy and forces with ASE. This ``Module`` wraps :class:``.ASEPotentialEnergyFunc`` to provide a differentiable potential energy function for training. .. warning:: Currently double-backpropagation is not supported, which means force matching cannot be performed during training. """ #: The default energy unit. DEFAULT_ENERGY_UNIT : str = 'eV' #: The default positions unit. DEFAULT_POSITIONS_UNIT : str = 'angstrom'
[docs] def __init__( self, calculator, symbols=None, numbers=None, pbc=None, positions_unit=None, energy_unit=None, parallelization_strategy=None, **atoms_kwargs, ): """Constructor. Parameters ---------- calculator : ase.calculators.calculator.Calculator The ASE calculator used to compute energies and forces. symbols : str or List[str] The symbols of the atoms elements used to initialize the ``ase.Atoms`` object. It can be a string formula, a list of symbols, or a list of ``ase.Atom`` objects. Examples: ``'H2O'``, ``'COPt12'``, ``['H', 'H', 'O']``, ``[Atom('Ne', (x, y, z)), ...]``. numbers: List[int] Atomic numbers (use only one between symbols and numbers). pbc: bool or three bool Periodic boundary conditions flags. Examples: ``True``, ``False``, ``0``, ``1``, ``(1, 1, 0)``, ``(True, False, False)``. positions_unit : pint.Unit, optional The unit of the positions passed to the class methods. Since input ``Tensor``s do not have units attached, this is used to appropriately convert ``batch_positions`` to ASE units. If ``None``, no conversion is performed, which assumes that the input positions are in the same units used by ASE. energy_unit : pint.Unit, optional The unit used for the returned energies (and as a consequence forces). Since ``Tensor``s do not have units attached, this is used to appropriately convert ASE energies into the desired units. If ``None`` is performed, which means that energies and forces will be returned in ASE units. parallelization_strategy : tfep.utils.parallel.ParallelizationStrategy, optional The parallelization strategy used to distribute batches of energy and gradient calculations. By default, these are executed serially. **atoms_kwargs Other keyword arguments for ``ase.Atoms``. See Also -------- :class:`.ASEPotentialEnergyFunc` More details on input parameters and implementation details. """ from ase import Atoms super().__init__(positions_unit=positions_unit, energy_unit=energy_unit) # Handle mutable default arguments. if parallelization_strategy is None: parallelization_strategy = SerialStrategy() self.atoms = Atoms( symbols=symbols, numbers=numbers, pbc=pbc, calculator=calculator, **atoms_kwargs, ) self.parallelization_strategy = parallelization_strategy
[docs] def forward(self, batch_positions, batch_cell=None): """Compute a differential potential energy for a batch of configurations. Parameters ---------- batch_positions : torch.Tensor Shape ``(batch_size, 3*n_atoms)``. The atoms positions in units of ``self.positions_unit``. batch_cell : torch.Tensor, optional Shape ``(batch_size, 3, 3)`` or ``(batch_size, 3)`` or ``(batch_size, 6)``. Unit cell vectors. Can also be given as just three numbers for orthorhombic cells, or 6 numbers, where first three are lengths of unit cell vectors (in units of ``self.positions_unit``, and the other three are angles between them (in degrees), in following order: ``[len(a), len(b), len(c), angle(b,c), angle(a,c), angle(a,b)]``. First vector will lie in x-direction, second in xy-plane, and the third one in z-positive subspace. Returns ------- potential_energy : torch.Tensor ``potential_energy[i]`` is the potential energy of configuration ``batch_positions[i]`` in units of ``self.energy_unit``. """ return ase_potential_energy( batch_positions, atoms=self.atoms, batch_cell=batch_cell, positions_unit=self._positions_unit, energy_unit=self._energy_unit, parallelization_strategy=self.parallelization_strategy, )
# ============================================================================= # TORCH FUNCTIONAL API # =============================================================================
[docs] class ASEPotentialEnergyFunc(torch.autograd.Function): """PyTorch-differentiable potential energy using ASE. This wraps an ASE calculator to perform batchwise energy and forces calculation used for the forward pass and backpropagation. .. warning:: Currently double-backpropagation is not supported, which means force matching cannot be performed during training. By default, the perform the batch of energy/gradient calculations serially. This scheme is, however, not embarassingly parallel. Thus, the module supports batch parallelization schemes through :class:``tfep.utils.parallel.ParallelizationStrategy``s. Parameters ---------- ctx : torch.autograd.function._ContextMethodMixin A context to save information for the gradient. batch_positions : torch.Tensor Shape ``(batch_size, 3*n_atoms)``. The atoms positions in units of ``positions_unit`` (or ASE units if ``positions_unit`` is not provided). atoms : ase.Atoms or list[ase.Atoms] The ASE ``Atoms`` object containing the ASE calculator. batch_cell : None or torch.Tensor Shape ``(batch_size, 3, 3)`` or ``(batch_size, 3)`` or ``(batch_size, 6)``. Unit cell vectors. Can also be given as just three numbers for orthorhombic cells, or 6 numbers, where first three are lengths of unit cell vectors (in units of ``positions_unit`` or ASE units if ``positions_unit`` is not provided), and the other three are angles between them (in degrees), in following order: ``[len(a), len(b), len(c), angle(b,c), angle(a,c), angle(a,b)]``. First vector will lie in x-direction, second in xy-plane, and the third one in z-positive subspace. positions_unit : pint.Unit, optional The unit of the positions passed. This is used to appropriately convert ``batch_positions`` to ASE units. If ``None``, no conversion is performed, which assumes that the input positions are in the same units used by ASE. energy_unit : pint.Unit, optional The unit used for the returned energies (and as a consequence forces). This is used to appropriately convert ASE energies into the desired units. If ``None``, no conversion is performed, which means that energies and forces will use ASE units. parallelization_strategy : tfep.utils.parallel.ParallelizationStrategy, optional The parallelization strategy used to distribute batches of energy and gradient calculations. By default, these are executed serially. Returns ------- potentials : torch.Tensor ``potentials[i]`` is the potential energy of configuration ``batch_positions[i]``. See Also -------- :class:`.ASEPotential` ``Module`` API for computing potential energies with ASE. """
[docs] @staticmethod def forward( ctx, batch_positions, atoms, batch_cell=None, positions_unit=None, energy_unit=None, parallelization_strategy=None, ): """Compute the potential energy of the molecule with ASE.""" # Handle mutable default arguments. if parallelization_strategy is None: parallelization_strategy = SerialStrategy() # Convert tensor to numpy array with shape (batch_size, n_atoms, 3) and in ASE units (angstrom). batch_positions_arr_ang = flattened_to_atom(batch_positions.detach().cpu().numpy()) if positions_unit is not None: batch_positions_arr_ang = _to_ase_units(batch_positions_arr_ang, positions_unit) # We need to convert also the cells in Angstroms. if batch_cell is not None: batch_cell_arr_ang = batch_cell.detach().cpu().numpy() if positions_unit is not None: if batch_cell.shape[1:] == (6,): # The last 3 entires of each batch cell are angles, not lengths. batch_cell_arr_ang[:, :3] = _to_ase_units(batch_cell_arr_ang[:, :3], positions_unit) else: # All entries of batch_cells are lengths. batch_cell_arr_ang = _to_ase_units(batch_cell_arr_ang, positions_unit) # We use functools.partial to encode the arguments that are common to all tasks. if batch_cell is None: task = functools.partial( _get_potential_energy_task, atoms, batch_cell) distributed_args = zip(batch_positions_arr_ang) else: task = functools.partial( _get_potential_energy_task, atoms) distributed_args = zip(batch_cell_arr_ang, batch_positions_arr_ang) # Run all batches with the provided parallelization strategy. batch_results = parallelization_strategy.run(task, distributed_args) # Unpack the results. From [(atoms, energy), ...] to ([atoms, ...], [energy, ...]). batch_atoms, energies = list(zip(*batch_results)) # Convert energies to unitless tensors. if energy_unit is None: energies = torch.tensor(energies) else: energies *= ASEPotential.default_energy_unit(energy_unit._REGISTRY) energies = energies_array_to_tensor(energies, energy_unit) energies = energies.to(batch_positions) # Save the Atoms objects with the results of the calculation (including # forces) for backward propagation. ctx.batch_atoms = batch_atoms ctx.energy_unit = energy_unit ctx.positions_unit = positions_unit ctx.parallelization_strategy = parallelization_strategy return energies
[docs] @staticmethod def backward(ctx, grad_output): """Compute the gradient of the potential energy.""" # We still need to return a None gradient for each # input of forward() beside batch_positions. n_input_args = 6 grad_input = [None for _ in range(n_input_args)] # Compute gradient w.r.t. batch_positions. if ctx.needs_input_grad[0]: # The Atoms object should already have precomputed forces stored. distributed_args = zip(ctx.batch_atoms) forces = ctx.parallelization_strategy.run(_get_forces_task, distributed_args) forces = np.stack(forces) # Convert to unitless tensors. if (ctx.energy_unit is None) and (ctx.positions_unit is None): forces = torch.from_numpy(atom_to_flattened(forces)) else: ureg = ctx.energy_unit._REGISTRY default_positions_unit = ASEPotential.default_positions_unit(ureg) default_energy_unit = ASEPotential.default_energy_unit(ureg) forces *= default_energy_unit / default_positions_unit forces = forces_array_to_tensor(forces, ctx.positions_unit, ctx.energy_unit) forces = forces.to(grad_output) # Accumulate gradient grad_input[0] = -forces * grad_output[:, None] return tuple(grad_input)
[docs] def ase_potential_energy( batch_positions, atoms, batch_cell=None, positions_unit=None, energy_unit=None, parallelization_strategy=None, ): """PyTorch-differentiable potential energy using ASE. PyTorch ``Function``s do not accept keyword arguments. This function wraps :func:`.ASEPotentialEnergyFunc.apply` to enable standard functional notation. See the documentation on the original function for the input parameters. See Also -------- :class:`.ASEPotentialEnergyFunc` More details on input parameters and implementation details. """ # apply() does not accept keyword arguments. return ASEPotentialEnergyFunc.apply( batch_positions, atoms, batch_cell, positions_unit, energy_unit, parallelization_strategy, )
# ============================================================================= # RUNNING UTILITY FUNCTIONS # ============================================================================= def _to_ase_units(x, positions_unit): """Convert x from positions_unit to angstroms.""" default_positions_unit = ASEPotential.default_positions_unit(positions_unit._REGISTRY) return (x * positions_unit).to(default_positions_unit).magnitude def _get_potential_energy_task( atoms, cell, positions, ): """Compute potential energy for a single configuration. This function is used as task function for a ParallelStrategy. Both positions and cell are expected to be numpy arrays and in units of Angstrom. The returned energies are in units of eV. """ # Return a copy of the atoms instance which contain all the results (beside return value). atoms = copy.deepcopy(atoms) # Set positions and cell. atoms.set_positions(positions) if cell is not None: atoms.set_cell(cell) # Compute energies. energy = atoms.get_potential_energy() return atoms, energy def _get_forces_task(atoms): """Compute forces for a single configuration. This function is used as task function for a ParallelStrategy. This assumes that atoms already has the positions/cell set. The returned forces are in units of eV/Angstrom. """ return atoms.get_forces()