Source code for tfep.potentials.mimic

#!/usr/bin/env python

# =============================================================================
# MODULE DOCSTRING
# =============================================================================

"""
Modules and functions to compute QM/MM energies and gradients with MiMiC, GROMACS,
and CPMD.

The code interfaces with the molecular dynamics software through the command line.

"""

import warnings
warnings.warn('The potential interface for MiMiC is still experimental and under heavy development.')

# TODO: STANDARDIZE BATCH_CELL FORMAT THROUGHOUT. FORWARD TAKES ALSO ANGLES, ENERGY/FORCE() ONLY LENGTHS.
# TODO: THE PV CONTRIBUTION IS NOT COMPUTED! THE RETURNED ENERGY IS NOT THE REDUCED POTENTIAL.
# TODO: UNDERSTAND KINETIC ENERGY PRINTING IN WAVEFUNCTION OPTIMIZATION
# TODO: USE logging MODULE INSTEAD OF print()


# =============================================================================
# GLOBAL IMPORTS
# =============================================================================

import copy
import functools
import glob
import os
import re
import shutil
import subprocess
from typing import Any, Dict, List, Optional, Union

import numpy as np
import pint
import torch

from tfep.potentials.base import PotentialBase
from tfep.potentials.gromacs import GmxGrompp, GmxMdrun
from tfep.utils.cli import Launcher, CLITool
from tfep.utils.misc import (
    flattened_to_atom, energies_array_to_tensor, forces_array_to_tensor, temporary_cd)
from tfep.utils.parallel import ParallelizationStrategy, SerialStrategy


# =============================================================================
# CPMD COMMAND
# =============================================================================

[docs] class Cpmd(CLITool): """The CPMD command. The CPMD command takes two positional arguments: the path to the input script and the path to the directory including the definitions of the pseudopoentials. If given as relative paths, these must be relative to the working directory at the time of the execution. Parameters ---------- input_file_path : str The CPMD input file path. pseudopotential_dir_path : str, optional The path to the directory including the pseudopotential definitions. executable_path : str, optional The executable path associated to the instance of the command. If this is not specified, the ``EXECUTABLE_PATH`` class variable is used instead. Examples -------- >>> cmd = Cpmd('input.in', 'path/to/pseudopotentials/') >>> cmd.to_subprocess() ['cpmd', 'input.in', 'path/to/pseudopotentials/'] If the executable is called differently, simply specify the executable path as a keyword argument. >>> cmd = Cpmd('input.in', 'path/to/pseudopotentials/', executable_path='cpmd.x') >>> cmd.to_subprocess() ['cpmd.x', 'input.in', 'path/to/pseudopotentials/'] """ EXECUTABLE_PATH = 'cpmd'
# ============================================================================= # TORCH MODULE API # =============================================================================
[docs] class MiMiCPotential(PotentialBase): """Potential energy and forces with MiMiC. This ``Module`` wraps :class:``.MiMiCPotentialEnergyFunc`` to provide a differentiable potential energy function for training. It also provides an API to compute energies and forces with MiMiC from batches of coordinates in ``numpy`` arrays in standard format (i.e., shape ``(n_atoms, 3)``) rather than flattened ``torch.Tensor``s (i.e., shape ``(n_atoms*3,)``). """ #: The default energy unit. DEFAULT_ENERGY_UNIT : str = 'hartree' #: The default positions unit. DEFAULT_POSITIONS_UNIT : str = 'bohr'
[docs] def __init__( self, cpmd_cmd: Cpmd, mdrun_cmd: GmxMdrun, grompp_cmd: GmxGrompp, gromacs_to_cpmd_atom_indices: Dict[int, int], launcher: Optional[Launcher] = None, grompp_launcher: Optional[Launcher] = None, positions_unit: Optional[pint.Unit] = None, energy_unit: Optional[pint.Unit] = None, precompute_gradient: bool = True, working_dir_path: Optional[Union[str, List[str]]]=None, cleanup_working_dir: bool = False, parallelization_strategy: Optional[ParallelizationStrategy] = None, launcher_kwargs: Optional[Dict[str, Any]] = None, grompp_launcher_kwargs: Optional[Dict[str, Any]] = None, n_attempts: int = 1, on_unconverged: str = 'raise', on_local_error: str = 'raise', ): """Constructor. Parameters ---------- cpmd_cmd : tfep.potentials.mimic.Cpmd The CPMD command to be run for MiMiC's execution that encapsulates the path to the CPMD input script and options. The ``&MIMIC.PATHS`` option and atomic coordinates can be placeholders as they are automatically set by this function according to the ``working_dir_path`` and ``batch_positions`` arguments. All other options must be set correctly for the function to run successfully. mdrun_cmd : tfep.potentials.mimic.GmxMdrun The GMX mdrun command to be run for MiMiC's execution that encapsulates the path to the GROMACS input script and running options. The ``mdrun_cmd.tpr_input_file_path`` can be left unset since a new ``.tpr`` file with the correct positions is automatically generated with ``gromp_cmd``. grompp_cmd : tfep.potentials.mimic.GmxGrompp, optional This command is used to generate the ``.tpr`` file with the correct coordinates. To do so, the batch positions are first stored in a ``.trr`` file which is then passed to grompp. Thus, the ``GmxGrompp.tpr_output_file_path`` and ``GmxGrompp.trajectory_input_file_path`` options can be ``None``. gromacs_to_cpmd_atom_indices : Dict[int, int] A dictionary associating atom indices in GROMACS to atom indices in CPMD. launcher : tfep.utils.cli.Launcher, optional The ``Launcher`` to use to run the ``cpmd_cmd`` and ``mdrun_cmd``. If not passed, a new :class:`tfep.utils.cli.Launcher` is created. grompp_launcher : tfep.utils.cli.Launcher, optional The ``Launcher`` to use to run the ``grompp_cmd`` command. If not passed, a new :class:`tfep.utils.cli.Launcher` is created. positions_unit : pint.Unit, optional The unit of the positions passed. This is used to appropriately convert ``batch_positions`` to the units used by MiMiC. If ``None``, no conversion is performed, which assumes that the input positions are in Bohr. energy_unit : pint.Unit, optional The unit used for the returned energies (and as a consequence forces). This is used to appropriately convert MiMiC energies into the desired units. If ``None``, no conversion is performed, which means that energies and forces will be in hartrees and hartrees/bohr respectively. precompute_gradient : bool, optional If ``False``, the ``FTRAJECTORY`` file is not read after executing MiMiC. This might save a small amount of time if backpropagation is not needed. working_dir_path : str or List[str], optional The working directory to be used to run MiMiC and grompp. This must exist. If a list, ``batch_positions[i]`` is evaluated in the directory ``working_dir_path[i]``. cleanup_working_dir : bool, optional If ``True`` and ``working_dir_path`` is passed, all the files inside the working directory are removed after executing MiMiC. The directory(s) itself is not deleted. parallelization_strategy : tfep.utils.parallel.ParallelizationStrategy, optional The parallelization strategy used to distribute batches of energy and gradient calculations. By default, these are executed serially. launcher_kwargs : Dict, optional Other kwargs for ``launcher`` (with the exception of ``cwd`` which is automatically determined based on ``working_dir_path``). grompp_launcher_kwargs : Dict, optional Other kwargs for ``grompp_launcher``. n_attempts : int, optional Number of times MiMiC is restarted before raising a ``RuntimeError`` when MiMiC crashes without creating an error report in the ``LocalError-X-X-X.log`` file. on_unconverged : str, optional Specifies how to handle the case in which the self-consistent calculation did not converge. It can have the following values: - ``'raise'``: Raises a ``RuntimeError`` and halts the execution. - ``'success'``: Treat the calculation as converged and return the latest energy and force values. - ``'nan'``: Return ``float('nan')`` energy and zero forces. If this is set to anything other than ``'success'``, the ``stdout`` keyword argument must be included in ``launcher_kwargs`` and set to ``subprocess.PIPE`` so that Python can intercept and parse the output to detect the convergence warning message. on_local_error : str, optional Specifies how to handle the case in which the calculation ends with an error and CPMD creates an error report in the ``LocalError-X-X-X.log`` file. It can have the following values: - ``'raise'``: Raises a ``RuntimeError`` and halts the execution. - ``'nan'``: Return ``float('nan')`` energy and zero forces. See Also -------- :class:`.MiMiCPotentialEnergyFunc` More details on input parameters and implementation details. """ super().__init__(positions_unit=positions_unit, energy_unit=energy_unit) self.cpmd_cmd = cpmd_cmd self.mdrun_cmd = mdrun_cmd self.grompp_cmd = grompp_cmd self.gromacs_to_cpmd_atom_indices = gromacs_to_cpmd_atom_indices self.launcher = launcher self.grompp_launcher = grompp_launcher self.precompute_gradient = precompute_gradient self.working_dir_path = working_dir_path self.cleanup_working_dir = cleanup_working_dir self.parallelization_strategy = parallelization_strategy self.launcher_kwargs = launcher_kwargs self.grompp_launcher_kwargs = grompp_launcher_kwargs self.n_attempts = n_attempts self.on_unconverged = on_unconverged self.on_local_error = on_local_error
[docs] def forward(self, batch_positions: torch.Tensor, batch_cell: torch.Tensor) -> torch.Tensor: """Compute a differential potential energy for a batch of configurations. Parameters ---------- batch_positions : torch.Tensor A tensor of positions in flattened format (i.e., with shape ``(batch_size, 3*n_atoms)``) in units of ``self.positions_unit``. Note that the order of the atoms is assumed to be that of the GROMACS input files, not the one used internally by CPMD. batch_cell : torch.Tensor Shape ``(batch_size, 6)``. Unitcell dimensions. For each data point, the first 3 elements represent the vector lengths in units of ``self.positions_unit`` and the last 3 their respective angles (in degrees). Returns ------- potential_energy : torch.Tensor ``potential_energy[i]`` is the potential energy of configuration ``batch_positions[i]`` and ``batch_cell[i]`` in units of ``self.energy_unit`` (or MiMiC units if ``energy_unit`` is not provided). """ return mimic_potential_energy( batch_positions=batch_positions, batch_cell=batch_cell, cpmd_cmd=self.cpmd_cmd, mdrun_cmd=self.mdrun_cmd, grompp_cmd=self.grompp_cmd, gromacs_to_cpmd_atom_indices=self.gromacs_to_cpmd_atom_indices, launcher=self.launcher, grompp_launcher=self.grompp_launcher, positions_unit=self._positions_unit, energy_unit=self._energy_unit, precompute_gradient=self.precompute_gradient, working_dir_path=self.working_dir_path, cleanup_working_dir=self.cleanup_working_dir, parallelization_strategy=self.parallelization_strategy, launcher_kwargs=self.launcher_kwargs, grompp_launcher_kwargs=self.grompp_launcher_kwargs, n_attempts=self.n_attempts, on_unconverged=self.on_unconverged, on_local_error=self.on_local_error, )
[docs] def energy(self, batch_positions: pint.Quantity, batch_cell: pint.Quantity) -> pint.Quantity: """Compute a the potential energy of a batch of configurations. Parameters ---------- batch_positions : pint.Quantity An array of positions with units and shape: ``(batch_size, n_atoms, 3)`` or ``(n_atoms, 3)``. If no units are attached to the array, it is assumed the positions are is in ``self.positions_unit`` units (or MiMiC units if ``positions_unit`` was not provided). Note that the order of the atoms is assumed to be that of the GROMACS input files, not the one used internally by CPMD. batch_cell : pint.Quantity An array of box vectors with units and shape: ``(batch_size, 3)`` or ``(3,)`` defining the orthorhombic box side lengths (the only one currently supported in MiMiC). If no units are attached to the array, it is assumed the positions are is in ``self.positions_unit`` units (or MiMiC units if ``positions_unit`` was not provided). Returns ------- potential_energies : pint.Quantity ``potential_energies[i]`` is the potential energy of configuration ``batch_positions[i]`` and ``batch_cell[i]``. """ # Add units. batch_positions = self._ensure_positions_has_units(batch_positions) batch_cell = self._ensure_positions_has_units(batch_cell) return _run_mimic( self.cpmd_cmd, self.mdrun_cmd, self.grompp_cmd, self.gromacs_to_cpmd_atom_indices, batch_positions=batch_positions, batch_cell=batch_cell, launcher=self.launcher, grompp_launcher=self.grompp_launcher, return_energy=True, return_force=False, unit_registry=None, working_dir_path=self.working_dir_path, cleanup_working_dir=self.cleanup_working_dir, parallelization_strategy=self.parallelization_strategy, launcher_kwargs=self.launcher_kwargs, grompp_launcher_kwargs=self.grompp_launcher_kwargs, n_attempts=self.n_attempts, on_unconverged=self.on_unconverged, on_local_error=self.on_local_error, )
[docs] def force(self, batch_positions: pint.Quantity, batch_cell: pint.Quantity) -> pint.Quantity: """Compute the force for a batch of configurations. Parameters ---------- batch_positions : pint.Quantity An array of positions with units and shape: ``(batch_size, n_atoms, 3)`` or ``(n_atoms, 3)``. If no units are attached to the array, it is assumed the positions are is in ``self.positions_unit`` units (or MiMiC units if ``positions_unit`` was not provided). Note that the order of the atoms is assumed to be that of the GROMACS input files, not the one used internally by CPMD. batch_cell : pint.Quantity An array of box vectors with units and shape: ``(batch_size, 3)`` or ``(3,)`` defining the orthorhombic box side lengths (the only one currently supported in MiMiC). If no units are attached to the array, it is assumed the positions are is in ``self.positions_unit`` units (or MiMiC units if ``positions_unit`` was not provided). Returns ------- forces : pint.Quantity ``forces[i]`` is the force of configuration ``batch_positions[i]`` and ``batch_cell[i]``. """ # Add units. batch_positions = self._ensure_positions_has_units(batch_positions) batch_cell = self._ensure_positions_has_units(batch_cell) return _run_mimic( self.cpmd_cmd, self.mdrun_cmd, self.grompp_cmd, self.gromacs_to_cpmd_atom_indices, batch_positions=batch_positions, batch_cell=batch_cell, launcher=self.launcher, grompp_launcher=self.grompp_launcher, return_energy=False, return_force=True, unit_registry=None, working_dir_path=self.working_dir_path, cleanup_working_dir=self.cleanup_working_dir, parallelization_strategy=self.parallelization_strategy, launcher_kwargs=self.launcher_kwargs, grompp_launcher_kwargs=self.grompp_launcher_kwargs, n_attempts=self.n_attempts, on_unconverged=self.on_unconverged, on_local_error=self.on_local_error, )
def _ensure_positions_has_units(self, batch_positions) -> pint.Quantity: """Add units to an array of positions.""" try: batch_positions.units except AttributeError: return batch_positions * self.positions_unit return batch_positions
# ============================================================================= # TORCH FUNCTIONAL API # =============================================================================
[docs] class MiMiCPotentialEnergyFunc(torch.autograd.Function): """PyTorch-differentiable QM/MM potential energy function wrapped around MiMiC. The function calls MiMiC through the command line interface using user-prepared CPMD and GROMACS input files and reads the resulting energies and forces. The function can use the input files as templates and evaluate energies and forces for batches of configurations. To do this, the input files must be setup to generate the ``ENERGIES`` and ``FTRAJECTORY`` trajectory files, which can be generated when both ``&CPMD.MOLECULAR DYNAMICS`` and ``&CPMD.TRAJECTORY FORCES`` options are used in the CPMD input script. The returned energy and force will be read from the first entry in the respective trajectory file. .. note:: With this mechanism, the returned energy and force will be associated to the configuration AFTER the first integration step, not to the configuration passed in ``batch_positions``. However, this is necessary as currently there is no way in MiMiC to save the forces of all the atoms (including the MM atoms) with a single wavefunction optimization calculation. To maximize the efficiency and reduce the error, we suggest setting ``&CPMD.TRAJECTORY FORCES`` and ``&CPMD.MAXSTEP`` to 1 and set a very small ``&CPMD.TIMESTEP`` (e.g., 0.000001 a.u.). .. note:: The order of the atoms typically differs in GROMACS and CPMD. The input and output positions and forces of this function always use the GROMACS atom order. If backpropagation is not necessary, ``&CPMD.TRAJECTORY FORCES`` can be left out but the option ``precompute_gradients`` must be set to ``False`` or the function will expect an output ``FTRAJECTORY`` file. Because GROMACS does not support resuming from a coordinate file (it requires a full checkpoint file), to perform the calculation starting from an arbitrary batch data point with different coordinates than those in the tpr file, we need to regenerate a temporary tpr file with the correct coordinates with GROMPP. Only orthorhombic boxes are currently supported by MiMiC. thus the box vectors are simply specified by the lengths of the box sides. The function supports running each MiMiC execution in a separate working directory to safely support batch parallelization schemes through :class:``tfep.utils.parallel.ParallelizationStrategy`` objects. Sometimes the communication between GROMACS and CPMD can fail causing a crash. In this case, the MiMiC execution is attempted ``n_attempts`` times before raising a ``RuntimeError``. It is possible to handle in different ways the cases when the calculation does not converge the wavefunction within the number of SCF steps specified or when MiMiC terminates with an error, depending on whether the user wants to halt the program or continue with a NaN potential energy value. Parameters ---------- ctx : torch.autograd.function._ContextMethodMixin A context to save information for the gradient. batch_positions : torch.Tensor A tensor of positions in flattened format (i.e., with shape ``(batch_size, 3*n_atoms)``). Note that the order of the atoms is assumed to be that of the GROMACS input files, not the one used internally by CPMD. batch_cell : torch.Tensor, optional Shape ``(batch_size, 6)``. Unitcell dimensions. For each data point, the first 3 elements represent the vector lengths in units of ``self.positions_unit`` and the last 3 their respective angles (in degrees). cpmd_cmd : tfep.potentials.mimic.Cpmd The CPMD command to be run for MiMiC's execution that encapsulates the path to the CPMD input script and options. The ``&MIMIC.PATHS`` option and atomic coordinates can be placeholders as they are automatically set by this function according to the ``working_dir_path`` and ``batch_positions`` arguments. All other options must be set correctly for the function to run successfully. mdrun_cmd : tfep.potentials.mimic.GmxMdrun The GMX mdrun command to be run for MiMiC's execution that encapsulates the path to the GROMACS input script and running options. The ``mdrun_cmd.tpr_input_file_path`` can be left unset since a new ``.tpr`` file with the correct positions is automatically generated with ``gromp_cmd``. gromacs_to_cpmd_atom_indices : Dict[int, int] A dictionary associating atom indices in GROMACS to atom indices in CPMD. grompp_cmd : tfep.potentials.mimic.GmxGrompp, optional This command is used to generate the the ``.tpr`` file with the correct coordinates. To do so, the batch positions are first stored in a ``.trr`` file which is then passed to grompp. Thus, the ``GmxGrompp.tpr_output_file_path`` and ``GmxGrompp.trajectory_input_file_path`` options can be ``None``. launcher : tfep.utils.cli.Launcher, optional The ``Launcher`` to use to run the ``cpmd_cmd`` and ``mdrun_cmd``. If not passed, a new :class:`tfep.utils.cli.Launcher` is created. grompp_launcher : tfep.utils.cli.Launcher, optional The ``Launcher`` to use to run the ``grompp_cmd`` command. If not passed, a new :class:`tfep.utils.cli.Launcher` is created. positions_unit : pint.Unit, optional The unit of the positions passed. This is used to appropriately convert ``batch_positions`` to the units used by MiMiC. If ``None``, no conversion is performed, which assumes that the input positions are in Bohr. energy_unit : pint.Unit, optional The unit used for the returned energies (and as a consequence forces). This is used to appropriately convert MiMiC energies into the desired units. If ``None``, no conversion is performed, which means that energies and forces will be in hartrees and hartrees/bohr respectively. precompute_gradient : bool, optional If ``False``, the ``FTRAJECTORY`` file is not read after executing MiMiC. This might save a small amount of time if backpropagation is not needed. working_dir_path : str or List[str], optional The working directory to be used to run MiMiC and grompp. This must exist. If a list, ``batch_positions[i]`` is evaluated in the directory ``working_dir_path[i]``. cleanup_working_dir : bool, optional If ``True`` and ``working_dir_path`` is passed, all the files inside the working directory are removed after executing MiMiC. The directory(s) itself is not deleted. parallelization_strategy : tfep.utils.parallel.ParallelizationStrategy, optional The parallelization strategy used to distribute batches of energy and gradient calculations. By default, these are executed serially. launcher_kwargs : Dict, optional Other kwargs for ``launcher`` (with the exception of ``cwd`` which is automatically determined based on ``working_dir_path``). grompp_launcher_kwargs : Dict, optional Other kwargs for ``grompp_launcher``. n_attempts : int, optional Number of times MiMiC is restarted before raising a ``RuntimeError`` when MiMiC crashes without creating an error report in the ``LocalError-X-X-X.log`` file. on_unconverged : str, optional Specifies how to handle the case in which the self-consistent calculation did not converge. It can have the following values: - ``'raise'``: Raises a ``RuntimeError`` and halts the execution. - ``'success'``: Treat the calculation as converged and return the latest energy and force values. - ``'nan'``: Return ``float('nan')`` energy and zero forces. If this is set to anything other than ``'success'``, the ``stdout`` keyword argument must be included in ``launcher_kwargs`` and set to ``subprocess.PIPE`` so that Python can intercept and parse the output to detect the convergence warning message. on_local_error : str, optional Specifies how to handle the case in which the calculation ends with an error and CPMD creates an error report in the ``LocalError-X-X-X.log`` file. It can have the following values: - ``'raise'``: Raises a ``RuntimeError`` and halts the execution. - ``'nan'``: Return ``float('nan')`` energy and zero forces. Returns ------- potentials : torch.Tensor ``potentials[i]`` is the potential energy of configuration ``batch_positions[i]``. See Also -------- :class:`.MiMiCPotential` ``Module`` API for computing potential energies with MiMiC. """
[docs] @staticmethod def forward( ctx: torch.autograd.function._ContextMethodMixin, batch_positions: torch.Tensor, batch_cell: torch.Tensor, cpmd_cmd: Cpmd, mdrun_cmd: GmxMdrun, grompp_cmd: GmxGrompp, gromacs_to_cpmd_atom_indices: Dict[int, int], launcher: Optional[Launcher] = None, grompp_launcher: Optional[Launcher] = None, positions_unit: Optional[pint.Unit] = None, energy_unit: Optional[pint.Unit] = None, precompute_gradient: bool = True, working_dir_path: Optional[Union[str, List[str]]]=None, cleanup_working_dir: bool = False, parallelization_strategy: Optional[ParallelizationStrategy] = None, launcher_kwargs: Optional[Dict[str, Any]] = None, grompp_launcher_kwargs: Optional[Dict[str, Any]] = None, n_attempts: int = 1, on_unconverged: str = 'raise', on_local_error: str = 'raise', ): """Compute the potential energy of the molecule with MiMiC.""" # Check for unit registry. if positions_unit is not None: unit_registry = positions_unit._REGISTRY elif energy_unit is not None: unit_registry = energy_unit._REGISTRY else: unit_registry = pint.UnitRegistry() # Convert flattened positions tensor to numpy array of shape # (batch_size, n_atoms, 3) and attach units. if positions_unit is None: positions_unit = MiMiCPotential.default_positions_unit(unit_registry) batch_positions_arr = flattened_to_atom(batch_positions.detach().cpu().numpy()) batch_positions_arr *= positions_unit if batch_cell is None: batch_cell_arr = None else: cell_lengths, cell_angles = batch_cell[:, :3], batch_cell[:, 3:] if not torch.allclose(cell_angles, torch.tensor(90.).to(cell_angles)): raise ValueError('MiMiC supports only orthorombic boxes') batch_cell_arr = cell_lengths.detach().cpu().numpy() batch_cell_arr *= positions_unit # Determine whether we need forces. if precompute_gradient: return_force = True else: return_force = False # Run MiMiC. result = _run_mimic( cpmd_cmd, mdrun_cmd, grompp_cmd, gromacs_to_cpmd_atom_indices, batch_positions=batch_positions_arr, batch_cell=batch_cell_arr, launcher=launcher, grompp_launcher=grompp_launcher, return_energy=True, return_force=return_force, unit_registry=unit_registry, working_dir_path=working_dir_path, cleanup_working_dir=cleanup_working_dir, parallelization_strategy=parallelization_strategy, launcher_kwargs=launcher_kwargs, grompp_launcher_kwargs=grompp_launcher_kwargs, n_attempts=n_attempts, on_unconverged=on_unconverged, on_local_error=on_local_error, ) if not precompute_gradient: energies = result else: energies, forces = result # Convert the force to a flattened tensor before storing it in ctx. # to compute the gradient during backpropagation. forces = forces_array_to_tensor(forces, positions_unit, energy_unit).to(batch_positions) ctx.save_for_backward(forces) # Convert to unitless Tensor. energies = energies_array_to_tensor(energies, energy_unit).to(batch_positions) return energies
[docs] @staticmethod def backward(ctx, grad_output): """Compute the gradient of the potential energy.""" # We still need to return a None gradient for each # input of forward() beside batch_positions. n_input_args = 19 grad_input = [None for _ in range(n_input_args)] # Compute gradient w.r.t. batch_positions. if ctx.needs_input_grad[0]: # Retrieve pre-computed forces. if len(ctx.saved_tensors) == 1: forces, = ctx.saved_tensors else: raise ValueError('Cannot compute gradients if precompute_gradient ' 'option is set to False.') # Accumulate gradient, which has opposite sign of the force. grad_input[0] = -forces * grad_output[:, None] return tuple(grad_input)
[docs] def mimic_potential_energy( batch_positions: torch.Tensor, batch_cell: torch.Tensor, cpmd_cmd: Cpmd, mdrun_cmd: GmxMdrun, grompp_cmd: GmxGrompp, gromacs_to_cpmd_atom_indices: Dict[int, int], launcher: Optional[Launcher] = None, grompp_launcher: Optional[Launcher] = None, positions_unit: Optional[pint.Unit] = None, energy_unit: Optional[pint.Unit] = None, precompute_gradient: bool = True, working_dir_path: Optional[Union[str, List[str]]]=None, cleanup_working_dir: bool = False, parallelization_strategy: Optional[ParallelizationStrategy] = None, launcher_kwargs: Optional[Dict[str, Any]] = None, grompp_launcher_kwargs: Optional[Dict[str, Any]] = None, n_attempts: int = 1, on_unconverged: str = 'raise', on_local_error: str = 'raise', ): """PyTorch-differentiable QM/MM potential energy using MiMIC. PyTorch ``Function``s do not accept keyword arguments. This function wraps :func:`.MiMiCPotentialEnergyFunc.apply` to enable standard functional notation. See the documentation on the original function for the input parameters. See Also -------- :class:`.MiMiCPotentialEnergyFunc` More details on input parameters and implementation details. """ # apply() does not accept keyword arguments. return MiMiCPotentialEnergyFunc.apply( batch_positions, batch_cell, cpmd_cmd, mdrun_cmd, grompp_cmd, gromacs_to_cpmd_atom_indices, launcher, grompp_launcher, positions_unit, energy_unit, precompute_gradient, working_dir_path, cleanup_working_dir, parallelization_strategy, launcher_kwargs, grompp_launcher_kwargs, n_attempts, on_unconverged, on_local_error, )
# ============================================================================= # MAIN FUNCTIONS WRAPPING MIMIC # ============================================================================= def _run_mimic( cpmd_cmd: Cpmd, mdrun_cmd: GmxMdrun, grompp_cmd: GmxGrompp, gromacs_to_cpmd_atom_indices: Dict[int, int], batch_positions: Optional[pint.Quantity] = None, batch_cell: Optional[pint.Quantity] = None, launcher: Optional[Launcher] = None, grompp_launcher: Optional[Launcher] = None, return_energy: bool = False, return_force: bool = False, unit_registry: pint.UnitRegistry = None, working_dir_path: Optional[Union[str, List[str]]]=None, cleanup_working_dir: bool = False, parallelization_strategy: Optional[ParallelizationStrategy] = None, launcher_kwargs: Optional[Dict[str, Any]] = None, grompp_launcher_kwargs: Optional[Dict[str, Any]] = None, n_attempts: int = 1, on_unconverged: str = 'raise', on_local_error: str = 'raise', ): """Run MiMiC. See also the docstring of ``MiMiCPotentialEnergyFunc``. Some notes to remember about the implementation. The input batch_positions are passed in GROMACS order (which is likely the most common user case), but the CPMD output files (e.g., FTRAJECTORY, GEOMETRY) use the CPMD order. Thus the function must have a map of the atom indices between GROMACS and CPMD. MiMiC centers the QM atoms in the box with a translation. This means that the positions passed do not correspond to those evaluated, but because only a translation is performed, the forces should be identical in both frames of reference. The &MIMIC.PATHS in the CPMD input script must point to the working directory at the time of the execution. If this is not the case, this function creates a copy of the script in the working directory (called 'cpmd.inp') with the correct &MIMIC.PATHS option to be used for executing the command. Parameters ---------- cpmd_cmd : tfep.potentials.mimic.Cpmd The CPMD command to be run for MiMiC encapsulating the path to the input script and options. The ``&MIMIC.PATHS`` option and atomic coordinates can be placeholders as they are automatically set by this function according to the ``working_dir_path`` and ``batch_positions`` arguments. All other options must be set correctly for the execution. mdrun_cmd : tfep.potentials.mimic.GmxMdrun The GMX mdrun command to be run for MiMiC encapsulating the path to the input script and running options. When ``batch_positions`` is ``None``. The positions are taken from the ``.tpr`` file encapsulated in this command. Otherwise, this command is run using a new ``.tpr`` file generated with ``grompp_cmd``. grompp_cmd : tfep.potentials.mimic.GmxGrompp, optional If ``batch_positions`` is passed, this command is used to generate the the ``.tpr`` file with the correct starting coordinates. To do so, the batch positions are first stored in a ``.trr`` file which is then passed to grompp. Thus, the ``GmxGrompp.tpr_output_file_path`` and ``GmxGrompp.trajectory_input_file_path`` options can be ``None``. gromacs_to_cpmd_atom_indices : Dict[int, int] A dictionary associating atom indices in GROMACS to atom indices in CPMD. batch_positions : pint.Quantity, optional An array of positions with units and shape: ``(batch_size, n_atoms, 3)`` or ``(n_atoms, 3)``. If ``None``, the coordinates in the input files are evaluated. Note that the order of the atoms is assumed to be that of the GROMACS input files, not the one used internally by CPMD. batch_cell : pint.Quantity, optional An array of box vectors with units and shape: ``(batch_size, 3)`` or ``(3,)`` defining the orthorhombic box side lengths (the only one currently supported in MiMiC). If ``None``, the box vectors in the input files are evaluated. launcher : tfep.utils.cli.Launcher or List[tfep.utils.cli.Launcher], optional The ``Launcher`` to use to run the ``cpmd_cmd`` and ``mdrun_cmd``. If a ``list``, it must have one launcher for each batch. If not passed, a new instance of :class:`tfep.utils.cli.Launcher` is used. grompp_launcher : tfep.utils.cli.Launcher, optional The ``Launcher`` to use to run the ``grompp_cmd`` command. If not passed, a new :class:`tfep.utils.cli.Launcher` is created. return_energy : bool, optional If ``True``, the potential energies are returned. return_force : bool, optional If ``True``, the forces are returned. unit_registry : pint.UnitRegistry, optional The unit registry to use for the energy units. If ``None`` and ``batch_positions`` has units attached, the unit registry of the positions is used. Otherwise, the returned values use a new ``UnitRegistry`` is created. working_dir_path : str or List[str], optional The working directory to be used to run MiMiC (and eventually grompp). This must exist. If ``batch_positions`` is passed, this can be a list specifying one directory for each batch configuration. cleanup_working_dir : bool, optional If ``True`` and ``working_dir_path`` is passed, all the files inside the working directory are removed after executing MiMiC. The directory(s) itself is not deleted. parallelization_strategy : tfep.utils.parallel.ParallelizationStrategy, optional The parallelization strategy used to distribute batches of energy and gradient calculations. By default, these are executed serially. launcher_kwargs : Dict, optional Other kwargs for ``launcher`` (with the exception of ``cwd`` which is automatically determined based on ``working_dir_path``). grompp_launcher_kwargs : Dict, optional Other kwargs for ``grompp_launcher``. n_attempts : int, optional Number of times MiMiC is restarted before raising a ``RuntimeError`` when MiMiC crashes without creating an error report in the ``LocalError-X-X-X.log`` file. on_unconverged : str, optional Specifies how to handle the case in which the self-consistent calculation did not converge. It can have the following values: - ``'raise'``: Raises a ``RuntimeError`` and halts the execution. - ``'success'``: Treat the calculation as converged and return the latest energy and force values. - ``'nan'``: Return ``float('nan')`` energy and zero forces. If this is set to anything other than ``'success'``, the ``stdout`` keyword argument must be included in ``launcher_kwargs`` and set to ``subprocess.PIPE`` so that Python can intercept and parse the output to detect the convergence warning message. on_local_error : str, optional Specifies how to handle the case in which the calculation ends with an error and CPMD creates an error report in the ``LocalError-X-X-X.log`` file. It can have the following values: - ``'raise'``: Raises a ``RuntimeError`` and halts the execution. - ``'nan'``: Return ``float('nan')`` energy and zero forces. Returns ------- energies : pint.Quantity, optional If ``batch_positions`` was not given or if it is of shape ``(n_atoms, 3)``, this is a single number. Otherwise, ``energies[i]`` is the potential of configuration ``batch_positions[i]``. This is returned only if ``return_energy`` is ``True``. forces : torch.Tensor or pint.Quantity, optional If ``batch_positions`` was not given or if it is of shape ``(n_atoms, 3)``, this is a numpy array of shape ``(n_atoms, 3)`` with the molecule's force. Otherwise, ``forces[i]`` is the force of configuration ``batch_positions[i]``. This is returned only if ``return_force`` is ``True``. As with the ``batch_positions``, the order of the atoms in each force follows that of the GROMACS input files, not the one used internally by CPMD. """ # Mutable default arguments. if parallelization_strategy is None: parallelization_strategy = SerialStrategy() # Obtain the unit registry. if batch_positions is not None: unit_registry = batch_positions._REGISTRY elif unit_registry is None: unit_registry = pint.UnitRegistry() # First dimension should be batch. Save the input shape before modifying. # We need this to know the result. is_batch = (batch_positions is not None) and (len(batch_positions.shape) >= 3) if not is_batch: batch_positions = [batch_positions] batch_cell = [batch_cell] elif batch_cell is None: # batch_cell still needs to be in batch format. batch_cell = [batch_cell] * batch_positions.shape[0] # Make sure working_dir_path and launcher are in batch format. n_configurations = len(batch_positions) if working_dir_path is None or isinstance(working_dir_path, str): working_dir_path = [working_dir_path] * n_configurations else: working_dir_path = [os.path.realpath(p) for p in working_dir_path] try: iter(launcher) except TypeError: launcher = [launcher] * n_configurations # Run the command. task = functools.partial( _run_mimic_task, cpmd_cmd, mdrun_cmd, grompp_cmd, gromacs_to_cpmd_atom_indices, grompp_launcher, return_energy, return_force, cleanup_working_dir, launcher_kwargs, grompp_launcher_kwargs, n_attempts, on_unconverged, on_local_error, ) distributed_args = zip(batch_positions, batch_cell, launcher, working_dir_path) returned_values = parallelization_strategy.run(task, distributed_args) # Convert from a list of shape (batch_size, 2) to (2, batch_size). returned_values = list(zip(*returned_values)) # Convert to a single array or to not-batch format. if is_batch: returned_values = [np.array(res) for res in returned_values] else: returned_values = [res[0] for res in returned_values] # Add units. default_energy_unit = MiMiCPotential.default_energy_unit(unit_registry) default_potential_unit = MiMiCPotential.default_positions_unit(unit_registry) if return_energy: returned_values[0] = returned_values[0] * default_energy_unit if return_force: returned_values[-1] = returned_values[-1] * default_energy_unit / default_potential_unit return returned_values def _run_mimic_task( cpmd_cmd, mdrun_cmd, grompp_cmd, gromacs_to_cpmd_atom_indices, grompp_launcher, return_energy, return_force, cleanup_working_dir, launcher_kwargs, grompp_launcher_kwargs, n_attempts, on_unconverged, on_local_error, positions, box_vectors, launcher, working_dir_path, ): """This is the task passed to the ``ParallelizationStrategy`` to run MiMiC. The arguments are essentially the same as _run_mimic but for a single data point of a batch. It returns energy and force (depending on ``return_energy`` and ``return_force`` respectively) as unitless arrays in units of hartree and hartree/bohr respectively. """ # Mutable default arguments. if launcher_kwargs is None: launcher_kwargs = {} if grompp_launcher_kwargs is None: grompp_launcher_kwargs = {} # If we need to check for unconverged self-consistent calculation, # we need to capture the output so that we can parse it. check_convergence = on_unconverged != 'success' if check_convergence and (launcher_kwargs.get('stdout', None) != subprocess.PIPE): raise ValueError(f"If on_unconverged={on_unconverged}, then 'launcher_kwargs'" " must include stdout=subprocess.PIPE") # If no working directory was specified, this is executed in the current one. if working_dir_path is None: working_dir_path = os.getcwd() # Make sure working_dir_path is an absolute path working_dir_path = os.path.realpath(working_dir_path) # Prepare the cpmd command. cpmd_cmd = _prepare_cpmd_command(cpmd_cmd, working_dir_path, positions, box_vectors) # Prepare the mdrun command. mdrun_cmd = _prepare_mdrun_command( mdrun_cmd, grompp_cmd, working_dir_path, positions, box_vectors, grompp_launcher, **grompp_launcher_kwargs ) # Run MiMiC. if launcher is None: launcher = Launcher() # Flag checking whether a LocalError-X-X-X.log file was produced. has_local_error = False # The communication mechanism is a bit fragile. If MiMIC crashes, it won't # save the ENERGIES file and cause a FileNotFoundError. We attempt several # times before giving up. for attempt_idx in range(n_attempts): # This is where we store energy and/or force. returned_values = [] try: result = launcher.run(cpmd_cmd, mdrun_cmd, cwd=working_dir_path, **launcher_kwargs) # With multiprog, only a single result is returned. try: result_cpmd = result[0] except TypeError: result_cpmd = result # Check if it is unconverged. if check_convergence: is_unconverged = re.search(b'DENSITY NOT CONVERGED', result_cpmd.stdout) is not None else: is_unconverged = False # Read the energy/force from the trajectory files. if not is_unconverged: if return_energy: energy = _read_first_energy(working_dir_path) returned_values.append(energy) if return_force: force = _read_first_force(working_dir_path, gromacs_to_cpmd_atom_indices) returned_values.append(force) # Stop the attempts if the calculation was successful or if is_unconverged is True. break except FileNotFoundError: # Check for LocalError file. local_error_file_paths = list(glob.glob(os.path.join(working_dir_path, 'LocalError-*.log'))) if len(local_error_file_paths) > 0: print('Local error detected: Found these files:', local_error_file_paths, flush=True) has_local_error = True break # The MiMiC calculation crashed before ENERGIES and/or FTRAJECTORY was written. print('Attempt {}/{} failed'.format(attempt_idx+1, n_attempts), flush=True) if attempt_idx == n_attempts-1: raise RuntimeError('Cannot run MiMiC.') # Handle errors. if is_unconverged or has_local_error: # Log the full stdout. if result_cpmd.stdout is not None: print(result_cpmd.stdout.decode('utf-8'), flush=True) # Return nan if requested. if ((is_unconverged and on_unconverged == 'nan') or (has_local_error and on_local_error == 'nan')): if return_energy: returned_values.append(np.nan) if return_force: returned_values.append(np.zeros_like(positions)) elif is_unconverged and on_unconverged == 'raise': raise RuntimeError('The self consistent calculation did not converge.') elif has_local_error and on_local_error == 'raise': raise RuntimeError('Detected LocalError-X-X-X.log file.') else: raise ValueError(("'on_unconverged' can be 'success', 'raise', or 'nan'" " while 'on_local_error' can be 'raise' or 'nan'.")) # Clean up directory. if cleanup_working_dir: for file_name in os.listdir(working_dir_path): file_path = os.path.join(working_dir_path, file_name) if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) return returned_values def _prepare_cpmd_command(cpmd_cmd, working_dir_path, positions=None, box_vectors=None): """Prepare the CPMD script and command. This function: - Makes sure the &MIMIC.PATHS option in the CPMD input script point to the working directory used for the execution. - Sets the positions and box vectors in the CPMD input script based to those in ``positions`` (if the positions are passed). At the time of writing the keyword &SYSTEM.ANGSTROM is not supported in MiMiC so all positions are written in Bohr. Moreover, only orthorhombic boxes are supported in MiMiC, thus ``box_vectors`` simply contains the lengths of the box sides. When changes are needed, the modified version is created inside the working directory with the name 'cpmd.inp' to preserve the original file, and the returned ``Cpmd`` object is a copy of the ``cpmd_cmd`` argument pointing to the correct input file. The atom indices map in &MIMIC.OVERLAPS is used to resolve which positions in ``positions`` refer to which CPMD atom. Parameters ---------- cpmd_cmd : CLITool The original CPMD command. working_dir_path : str The path to the working directory used for executing the mimic command. positions : pint.Quantity, optional An array of positions with units and shape ``(n_atoms, 3)``. If ``None``, the coordinates in the input file are evaluated. box_vectors : pint.Quantity, optional An array of box vectors with units and shape ``(3,)`` defining the orthorhombic box side lengths (the only one currently supported in MiMiC). If ``None``, the box in the input file is evaluated. Returns ------- cpmd_cmd : tfep.potentials.mimic.Cpmd A CPMD commandthat points to the correct (original or copied) input file. """ OUTPUT_CPMD_FILE_NAME = 'cpmd.inp' # Read the original CPMD script file. If a relative path, this is relative # to the execution working directory. with temporary_cd(working_dir_path): cpmd_input_file_path = os.path.realpath(cpmd_cmd.args[0]) # Parse the file. (cpmd_file_lines, path_line_idx, box_vectors_line_idx, gromacs_to_cpmd_qm_atom_indices, cpmd_atom_to_line_idx) = _parse_cpmd_input(cpmd_input_file_path) # If the path is incorrect, update it. paths_value = cpmd_file_lines[path_line_idx].strip() update_path = working_dir_path != os.path.realpath(paths_value) if update_path: cpmd_file_lines[path_line_idx] = working_dir_path + '\n' # Update the box vectors and positions. if positions is not None: if box_vectors is not None: box_vectors_bohr = box_vectors.to(MiMiCPotential.DEFAULT_POSITIONS_UNIT).magnitude cpmd_file_lines[box_vectors_line_idx] = ' '.join([str(x) for x in box_vectors_bohr]) + '\n' # Cycle through all atoms and update their lines one by one. for gromacs_atom_idx, cpmd_atom_idx in gromacs_to_cpmd_qm_atom_indices.items(): line_idx = cpmd_atom_to_line_idx[cpmd_atom_idx] atom_position = positions[gromacs_atom_idx].to(MiMiCPotential.DEFAULT_POSITIONS_UNIT).magnitude cpmd_file_lines[line_idx] = ' '.join([str(x) for x in atom_position]) + '\n' # Create a modified copy of the file and update the command to point to it. if update_path: # Write the file. with open(os.path.join(working_dir_path, OUTPUT_CPMD_FILE_NAME), 'w') as f: for line in cpmd_file_lines: f.write(line) # Update the command without modifying the original. cpmd_cmd = copy.deepcopy(cpmd_cmd) cpmd_cmd.args = [OUTPUT_CPMD_FILE_NAME] + list(cpmd_cmd.args)[1:] return cpmd_cmd def _prepare_mdrun_command(mdrun_cmd, grompp_cmd, working_dir_path, positions=None, box_vectors=None, grompp_launcher=None, **kwargs): """Prepare the mdrun command for execution in the given working directory. If ``positions`` is not ``None``, the function uses the grompp command to create a new .tpr file with the correct positions and box vectors. Currently, it is impossible to update the tpr file with the new positions using the GROMACS tools without calling grompp again. In this case, the returned ``mdrun_cmd`` is a copy of the original that points to the new .tpr file. Only orthorhombic boxes are currently supported by MiMiC. thus ``box_vectors`` simply contains the lengths of the box sides. Parameters ---------- mdrun_cmd : GmxMdrun The mdrun command to prepare. This is modified. grompp_cmd : GmxGrompp The grompp command used to generate the new .tpr file. To do so, the batch positions are first stored in a ``.trr`` file which is then passed to grompp. Thus, the ``GmxGrompp.tpr_output_file_path`` and ``GmxGrompp.trajectory_input_file_path`` options can be ``None``. working_dir_path : str The path to the working directory used for executing the mimic (and eventually grompp) command. positions : pint.Quantity, optional An array of positions with units and shape ``(n_atoms, 3)``. If ``None``, the coordinates in the input file are evaluated. box_vectors : pint.Quantity, optional An array of box vectors with units and shape ``(3,)`` defining the orthorhombic box side lengths (the only one currently supported in MiMiC). If ``None``, the box in the input file is evaluated. grompp_launcher : Launcher, optional The launcher to use for grompp. If ``None`` a standard ``Launcher`` is used. **kwargs Other keyword arguments to pass to ``grompp_launcher``. Returns ------- mdrun_cmd : GmxMdrun The mdrun command pointing to the appropriate .tpr file. """ if positions is None: return mdrun_cmd tpr_file_name = 'gromacs.tpr' in_gro_file_name = 'positions.trr' # Mutable default arguments. if grompp_launcher is None: grompp_launcher = Launcher() # Create a new .gro file in the working directory to be used as input for # grompp using as template the structure file supplied with grompp_cmd. import MDAnalysis template_universe = MDAnalysis.Universe(grompp_cmd.structure_input_file_path) template_universe.atoms.positions = positions.to('angstrom').magnitude # MDAnalysis needs the angles of the box vectors as well. MiMiC supports # only orthorombic boxes. if box_vectors is not None: dimensions = np.concatenate([box_vectors.to('angstrom').magnitude, [90.0, 90, 90]]) template_universe.dimensions = dimensions in_gro_file_path = os.path.join(working_dir_path, in_gro_file_name) with MDAnalysis.Writer(in_gro_file_path, n_atoms=template_universe.trajectory.n_atoms) as w: w.write(template_universe) # Copy the commands to avoid modifying the original one. mdrun_cmd = copy.deepcopy(mdrun_cmd) grompp_cmd = copy.deepcopy(grompp_cmd) # Configure and run the command. grompp_cmd.trajectory_input_file_path = in_gro_file_name grompp_cmd.tpr_output_file_path = tpr_file_name grompp_launcher.run(grompp_cmd, cwd=working_dir_path, **kwargs) # Update the mdrun command. We can use the tpr_file_name since the execution # will happen using working_dir_path as working directory as well. mdrun_cmd.tpr_input_file_path = tpr_file_name return mdrun_cmd # ============================================================================= # CPMD SCRIPT INPUT/OUTPUT PARSING UTILITIES # ============================================================================= def _parse_cpmd_input(cpmd_input_file_path): """Parse the input file content and return some the relevant info. Returns ------- cpmd_file_lines : List[str] The content of the file as read by file.readlines(). paths_line_idx : int The line index where the path to the working directory should be. box_vectors_line_idx : int The line index where the box is specified in &MIMIC.BOX. gromacs_to_cpmd_qm_atom_indices : Dict[int, int] Associates a GROMACS atom index to a CPMD atom index for the QM atoms (as given by ``&MIMIC.OVERLAPS``). cpmd_atom_to_line_idx : Dict[int, int] Associates a CPMD atom index to the line index where its coordinates are stored. """ # Read the file. with open(cpmd_input_file_path, 'r') as f: cpmd_file_lines = f.readlines() # Parsing results organized by blocks. parsed = {} # Identify the line with the path to the working directory. line_idx = 0 while line_idx < len(cpmd_file_lines): line = cpmd_file_lines[line_idx].strip() # If the line matches a block name, the entire block is parsed. try: line_idx = _parse_cpmd_block_dispatch[line](cpmd_file_lines, line_idx+1, parsed) except KeyError: # Unknown block or empty line. line_idx += 1 return (cpmd_file_lines, parsed['paths_line_idx'], parsed['box_vectors_line_idx'], parsed['gromacs_to_cpmd_qm_atom_indices'], parsed['cpmd_atom_to_line_idx']) def _parse_cpmd_mimic_block(lines, line_idx, parsed): """Update parsed with info in the &MIMIC block. It adds to ``parsed`` the following keys. - paths_line_idx: the line index where the path to the working dir is. - box_vectors_line_idx: the line index where the box is defined in &MIMIC.BOX. - gromacs_to_cpmd_qm_atom_indices: a Dict[Int, Int] that associate a GROMACS atom index to a CPMD atom index for the QM atoms (as given by &MIMIC.OVERLAPS). Parameters ---------- lines : List[str] The lines of the entire file as read by file.readlines(). line_idx : int The index of the first line after '&MIMIC'. parsed : Dict A memo dictionary where the parsed info is saved. Returns ------- line_idx : int The line index right after the '&END' directive of the '&MIMIC' block. """ parsed['paths_line_idx'] = None parsed['box_vectors_line_idx'] = None parsed['gromacs_to_cpmd_qm_atom_indices'] = {} while line_idx < len(lines): line = lines[line_idx].strip() if line.startswith('PATHS'): # First line after PATHS option is the number of layers. The second # line after PATHS is the actual path to the working directory. parsed['paths_line_idx'] = line_idx + 2 line_idx += 3 elif line.startswith('BOX'): parsed['box_vectors_line_idx'] = line_idx + 1 line_idx += 2 elif line.startswith('OVERLAPS'): # First line is the number of atoms. n_atoms = int(lines[line_idx+1]) # Parse all OVERLAPS lines. line_idx += 2 for i in range(n_atoms): line = lines[line_idx+i].split() gromacs_idx, cpmd_idx = int(line[1])-1, int(line[3])-1 if line[0] == 1: gromacs_idx, cpmd_idx = cpmd_idx, gromacs_idx parsed['gromacs_to_cpmd_qm_atom_indices'][gromacs_idx] = cpmd_idx # Update first line. line_idx += n_atoms elif line.startswith('&END'): break else: line_idx += 1 return line_idx + 1 def _parse_cpmd_atoms_block(lines, line_idx, parsed): """Add to ``parsed`` ``cpmd_atom_to_line_idx``. ``cpmd_atom_to_line_idx`` is a Dict[int, int] that associates a CPMD atom index to the line number where its coordinates are stored. Parameters ---------- lines : List[str] The lines of the entire file as read by file.readlines(). line_idx : int The index of the first line after '&ATOMS'. parsed : Dict A memo dictionary where the parsed info is saved. Returns ------- line_idx : int The line index right after the '&END' directive of the '&MIMIC' block. """ parsed['cpmd_atom_to_line_idx'] = {} current_atom_idx = 0 while line_idx < len(lines): line = lines[line_idx].strip() if line.startswith('*'): # New atom type. First line is nonlocality, second is number of atoms. n_atoms = int(lines[line_idx+2]) # Add the atoms to the map. line_idx += 3 for element_atom_idx in range(n_atoms): parsed['cpmd_atom_to_line_idx'][current_atom_idx] = line_idx+element_atom_idx current_atom_idx += 1 line_idx += n_atoms elif line.startswith('&END'): break else: # Empty line. line_idx += 1 return line_idx + 1 # Parsing dispatch function. _parse_cpmd_block_dispatch = { '&MIMIC': _parse_cpmd_mimic_block, '&ATOMS': _parse_cpmd_atoms_block, } def _read_first_energy(cpmd_dir_path): """Read the first energy from the ENERGIES trajectory file. The energy is returned as a unitless float in the same units used by CPMD. """ energies_traj_file_path = os.path.join(cpmd_dir_path, 'ENERGIES') with open(energies_traj_file_path, 'r') as f: for line in f: line = line.split() step = int(line[0]) if step == 1: energy = float(line[3]) return energy def _read_first_force(cpmd_dir_path, gromacs_to_cpmd_atom_indices): """Read the first force from the FTRAJECTORY trajectory file. Parameters ---------- cpmd_dir_path : str The directory where the FTRAJECTORY file is stored. gromacs_to_cpmd_atom_indices : Dict[int, int], optional Associates a GROMACS atom index to a CPMD atom index. This must be passed with ``positions``. Returns ------- force : np.ndarray The force as unitless numpy array of floats in the same units used by CPMD. """ force = [] # Read the forces in the FTRAJECTORY file, which use the CPMD atom order. force_traj_file_path = os.path.join(cpmd_dir_path, 'FTRAJECTORY') with open(force_traj_file_path, 'r') as f: for line in f: line = line.split() if line[0] == '1': # Columns 2-7 are positions and velocities. force.append([float(x) for x in line[7:]]) # Convert from CPMD to GROMACS atom order. n_atoms = len(force) force = [force[gromacs_to_cpmd_atom_indices.get(i, i)] for i in range(n_atoms)] # Convert to array. return np.array(force)