"""
A collection of protocols for building coordinates for molecular systems.
"""
import logging
from collections import defaultdict
from enum import Enum
from os import path
import numpy as np
from simtk.openmm import app
from propertyestimator import unit
from propertyestimator.substances import Substance
from propertyestimator.utils import packmol, create_molecule_from_smiles
from propertyestimator.utils.exceptions import PropertyEstimatorException
from propertyestimator.workflow.decorators import protocol_input, protocol_output
from propertyestimator.workflow.plugins import register_calculation_protocol
from propertyestimator.workflow.protocols import BaseProtocol
[docs]@register_calculation_protocol()
class BuildCoordinatesPackmol(BaseProtocol):
"""Creates a set of 3D coordinates with a specified composition.
Notes
-----
The coordinates are created using packmol.
"""
@protocol_input(int)
def max_molecules(self):
"""The maximum number of molecules to be added to the system."""
pass
@protocol_input(unit.Quantity)
def mass_density(self):
"""The target density of the created system."""
pass
@protocol_input(list)
def box_aspect_ratio(self):
"""The aspect ratio of the simulation box. The default is [1.0, 1.0, 1.0],
i.e a cubic box."""
return self._box_aspect_ratio
@protocol_input(Substance)
def substance(self):
"""The composition of the system to build."""
pass
@protocol_input(bool)
def verbose_packmol(self):
"""If True, packmol will be allowed to log verbose information to the logger,
and any working packmol files will be retained."""
pass
@protocol_input(bool)
def retain_packmol_files(self):
"""If True, packmol will not delete all of the temporary files it creates
while building the coordinates."""
pass
@protocol_output(int)
def output_number_of_molecules(self):
"""The number of molecules in the created system. This may be less than
maximum requested due to rounding of mole fractions.
"""
pass
@protocol_output(Substance)
def output_substance(self):
"""The substance which was built by packmol. This may differ from the input
substance for system containing two or more components due to rounding of
mole fractions. The mole fractions provided by this output should always be
used when weighting values by a mole fraction.
"""
pass
@protocol_output(str)
def coordinate_file_path(self):
"""The file path to the created PDB coordinate file."""
pass
[docs] def __init__(self, protocol_id):
"""Constructs a new BuildCoordinatesPackmol object.
"""
super().__init__(protocol_id)
self._substance = None
self._coordinate_file_path = None
self._positions = None
self._max_molecules = 1000
self._mass_density = 0.95 * unit.grams / unit.milliliters
self._verbose_packmol = False
self._retain_packmol_files = False
self._box_aspect_ratio = [1.0, 1.0, 1.0]
self._output_number_of_molecules = None
self._output_substance = None
def _build_molecule_arrays(self, directory):
"""Converts the input substance into a list of openeye OEMol's and a list of
counts for how many of each there should be as determined by the `max_molecules`
input and the molecules respective mole fractions.
Parameters
----------
directory: The directory in which this protocols working files are being saved.
Returns
-------
list of openeye.oechem.OEMol
The list of openeye molecules.
list of int
The number of each molecule which should be added to the system.
PropertyEstimatorException, optional
None if no exceptions occurred, otherwise the exception.
"""
molecules = []
for component in self._substance.components:
molecule = create_molecule_from_smiles(component.smiles)
if molecule is None:
return None, None, PropertyEstimatorException(directory=directory,
message=f'{component} could not be converted '
f'to a Molecule')
molecules.append(molecule)
# Determine how many molecules of each type will be present in the system.
molecules_per_component = self._substance.get_molecules_per_component(self._max_molecules)
number_of_molecules = [0] * self._substance.number_of_components
for index, component in enumerate(self._substance.components):
number_of_molecules[index] = molecules_per_component[component.identifier]
if sum(number_of_molecules) > self._max_molecules:
return None, None, PropertyEstimatorException(directory=directory,
message=f'The number of molecules to create '
f'({sum(number_of_molecules)}) is greater '
f'than the maximum number requested '
f'({self._max_molecules}).')
return molecules, number_of_molecules, None
def _rebuild_substance(self, number_of_molecules):
"""Rebuilds the `Substance` object which the protocol will create coordinates.
This may not be the same as the input system due to the finite number of molecules
to be added causing rounding of mole fractions.
Parameters
----------
number_of_molecules: list of int
The number of each component which should be added to the system.
Returns
-------
Substance
The substance which contains the corrected component amounts.
"""
new_amounts = defaultdict(list)
total_number_of_molecules = sum(number_of_molecules)
# Handle any exact amounts.
for component in self._substance.components:
exact_amounts = [amount for amount in self._substance.get_amounts(component) if
isinstance(amount, Substance.ExactAmount)]
if len(exact_amounts) == 0:
continue
total_number_of_molecules -= exact_amounts[0].value
new_amounts[component].append(exact_amounts[0])
# Recompute the mole fractions.
total_mole_fraction = 0.0
number_of_new_mole_fractions = 0
for index, component in enumerate(self._substance.components):
mole_fractions = [amount for amount in self._substance.get_amounts(component) if
isinstance(amount, Substance.MoleFraction)]
if len(mole_fractions) == 0:
continue
molecule_count = number_of_molecules[index]
if component in new_amounts:
molecule_count -= new_amounts[component][0].value
new_mole_fraction = molecule_count / total_number_of_molecules
new_amounts[component].append(Substance.MoleFraction(new_mole_fraction))
total_mole_fraction += new_mole_fraction
number_of_new_mole_fractions += 1
if not np.isclose(total_mole_fraction, 1.0) and number_of_new_mole_fractions > 0:
raise ValueError('The new mole fraction does not equal 1.0')
output_substance = Substance()
for component, amounts in new_amounts.items():
for amount in amounts:
output_substance.add_component(component, amount)
return output_substance
def _save_results(self, directory, topology, positions):
"""Save the results of running PACKMOL in the working directory
Parameters
----------
directory: str
The directory to save the results in.
topology : simtk.openmm.Topology
The topology of the created system.
positions : propertyestimator.unit.Quantity
A `propertyestimator.unit.Quantity` wrapped `numpy.ndarray` (shape=[natoms,3])
which contains the created positions with units compatible with angstroms.
"""
from simtk import unit as simtk_unit
simtk_positions = positions.to(unit.angstrom).magnitude * simtk_unit.angstrom
self._coordinate_file_path = path.join(directory, 'output.pdb')
with open(self._coordinate_file_path, 'w+') as minimised_file:
# noinspection PyTypeChecker
app.PDBFile.writeFile(topology, simtk_positions, minimised_file)
logging.info('Coordinates generated: ' + self._substance.identifier)
[docs] def execute(self, directory, available_resources):
logging.info(f'Generating coordinates for {self._substance.identifier}: {self.id}')
if self._substance is None:
return PropertyEstimatorException(directory=directory,
message='The substance input is non-optional')
molecules, number_of_molecules, exception = self._build_molecule_arrays(directory)
if exception is not None:
return exception
self._output_number_of_molecules = sum(number_of_molecules)
self._output_substance = self._rebuild_substance(number_of_molecules)
packmol_directory = path.join(directory, 'packmol_files')
# Create packed box
topology, positions = packmol.pack_box(molecules=molecules,
number_of_copies=number_of_molecules,
mass_density=self._mass_density,
box_aspect_ratio=self._box_aspect_ratio,
verbose=self._verbose_packmol,
working_directory=packmol_directory,
retain_working_files=self._retain_packmol_files)
if topology is None or positions is None:
return PropertyEstimatorException(directory=directory,
message='Packmol failed to complete.')
self._save_results(directory, topology, positions)
return self._get_output_dictionary()
[docs]@register_calculation_protocol()
class SolvateExistingStructure(BuildCoordinatesPackmol):
"""Creates a set of 3D coordinates with a specified composition.
Notes
-----
The coordinates are created using packmol.
"""
@protocol_input(str)
def solute_coordinate_file(self):
"""A file path to the solute to solvate."""
pass
[docs] def __init__(self, protocol_id):
super().__init__(protocol_id)
self._solute_coordinate_file = None
[docs] def execute(self, directory, available_resources):
logging.info(f'Generating coordinates for {self._substance.identifier}: {self.id}')
if self._substance is None:
return PropertyEstimatorException(directory=directory,
message='The substance input is non-optional')
if self._solute_coordinate_file is None:
return PropertyEstimatorException(directory=directory,
message='The solute coordinate file input is non-optional')
molecules, number_of_molecules, exception = self._build_molecule_arrays(directory)
if exception is not None:
return exception
packmol_directory = path.join(directory, 'packmol_files')
# Create packed box
topology, positions = packmol.pack_box(molecules=molecules,
number_of_copies=number_of_molecules,
structure_to_solvate=self._solute_coordinate_file,
mass_density=self._mass_density,
verbose=self._verbose_packmol,
working_directory=packmol_directory,
retain_working_files=self._retain_packmol_files)
if topology is None or positions is None:
return PropertyEstimatorException(directory=directory,
message='Packmol failed to complete.')
self._save_results(directory, topology, positions)
return self._get_output_dictionary()
[docs]@register_calculation_protocol()
class BuildDockedCoordinates(BaseProtocol):
"""Creates a set of coordinates for a ligand bound to some receptor.
Notes
-----
This protocol currently only supports docking with the OpenEye OEDocking
framework.
"""
[docs] class ActivateSiteLocation(Enum):
"""An enum which describes the methods by which a receptors
activate site(s) is located."""
ReceptorCenterOfMass = 'ReceptorCenterOfMass'
@protocol_input(Substance)
def ligand_substance(self):
"""A substance containing only the ligand to dock."""
pass
@protocol_input(int)
def number_of_ligand_conformers(self):
"""The number of conformers to try and dock into the receptor structure."""
pass
@protocol_input(str)
def receptor_coordinate_file(self):
"""The file path to the coordinates of the receptor molecule."""
pass
@protocol_input(ActivateSiteLocation)
def activate_site_location(self):
"""Defines the method by which the activate site is identified. Currently the only available
option is `ActivateSiteLocation.ReceptorCenterOfMass`"""
pass
@protocol_output(str)
def docked_ligand_coordinate_path(self):
"""The file path to the coordinates of the ligand in it's docked
pose, aligned with the initial `receptor_coordinate_file`."""
pass
@protocol_output(str)
def docked_complex_coordinate_path(self):
"""The file path to the docked ligand-receptor complex."""
pass
@protocol_output(str)
def ligand_residue_name(self):
"""The residue name assigned to the docked ligand."""
return self._ligand_residue_name
@protocol_output(str)
def receptor_residue_name(self):
"""The residue name assigned to the receptor."""
return self._receptor_residue_name
[docs] def __init__(self, protocol_id):
super().__init__(protocol_id)
self._ligand_substance = None
self._number_of_ligand_conformers = 100
self._receptor_coordinate_file = None
self._activate_site_location = self.ActivateSiteLocation.ReceptorCenterOfMass
self._docked_ligand_coordinate_path = None
self._docked_complex_coordinate_path = None
self._ligand_residue_name = 'LIG'
self._receptor_residue_name = 'REC'
def _create_receptor(self):
"""Create an OpenEye receptor from a mol2 file.
Returns
-------
openeye.oedocking.OEReceptor
The OpenEye receptor object.
"""
from openeye import oechem, oedocking
input_stream = oechem.oemolistream(self._receptor_coordinate_file)
original_receptor_molecule = oechem.OEGraphMol()
oechem.OEReadMolecule(input_stream, original_receptor_molecule)
center_of_mass = oechem.OEFloatArray(3)
oechem.OEGetCenterOfMass(original_receptor_molecule, center_of_mass)
receptor = oechem.OEGraphMol()
oedocking.OEMakeReceptor(receptor, original_receptor_molecule,
center_of_mass[0], center_of_mass[1], center_of_mass[2])
return receptor
def _create_ligand(self):
"""Create an OpenEye receptor from a mol2 file.
Returns
-------
openeye.oechem.OEMol
The OpenEye ligand object with multiple conformers.
"""
from openforcefield.topology import Molecule
ligand = Molecule.from_smiles(self._ligand_substance.components[0].smiles)
ligand.generate_conformers(n_conformers=self._number_of_ligand_conformers)
# Assign AM1-BCC charges to the ligand just as an initial guess
# for docking. In future, we may want to get the charge model
# directly from the force field.
ligand.compute_partial_charges_am1bcc()
return ligand.to_openeye()
[docs] def execute(self, directory, available_resources):
if (len(self._ligand_substance.components) != 1 or
self._ligand_substance.components[0].role != Substance.ComponentRole.Ligand):
return PropertyEstimatorException(directory=directory,
message='The ligand substance must contain a single ligand component.')
import mdtraj
from openeye import oechem, oedocking
from simtk import unit as simtk_unit
logging.info('Initializing the receptor molecule.')
receptor_molecule = self._create_receptor()
logging.info('Initializing the ligand molecule.')
ligand_molecule = self._create_ligand()
logging.info('Initializing the docking object.')
# Dock the ligand to the receptor.
dock = oedocking.OEDock()
dock.Initialize(receptor_molecule)
docked_ligand = oechem.OEGraphMol()
logging.info('Performing the docking.')
status = dock.DockMultiConformerMolecule(docked_ligand, ligand_molecule)
if status != oedocking.OEDockingReturnCode_Success:
return PropertyEstimatorException(directory=directory,
message='The ligand could not be successfully docked')
docking_method = oedocking.OEDockMethodGetName(oedocking.OEDockMethod_Default)
oedocking.OESetSDScore(docked_ligand, dock, docking_method)
dock.AnnotatePose(docked_ligand)
self._docked_ligand_coordinate_path = path.join(directory, 'ligand.pdb')
output_stream = oechem.oemolostream(self._docked_ligand_coordinate_path)
oechem.OEWriteMolecule(output_stream, docked_ligand)
output_stream.close()
receptor_pdb_path = path.join(directory, 'receptor.pdb')
output_stream = oechem.oemolostream(receptor_pdb_path)
oechem.OEWriteMolecule(output_stream, receptor_molecule)
output_stream.close()
ligand_trajectory = mdtraj.load(self._docked_ligand_coordinate_path)
ligand_residue = ligand_trajectory.topology.residue(0)
ligand_residue.name = self._ligand_residue_name
# Save the ligand file with the correct residue name.
ligand_trajectory.save(self._docked_ligand_coordinate_path)
receptor_trajectory = mdtraj.load(receptor_pdb_path)
receptor_residue = receptor_trajectory.topology.residue(0)
receptor_residue.name = self._receptor_residue_name
# Create a merged ligand-receptor topology.
complex_topology = ligand_trajectory.topology.copy()
atom_mapping = {}
new_residue = complex_topology.add_residue(receptor_residue.name, complex_topology.chain(0))
for receptor_atom in receptor_residue.atoms:
new_atom = complex_topology.add_atom(receptor_atom.name, receptor_atom.element, new_residue,
serial=receptor_atom.serial)
atom_mapping[receptor_atom] = new_atom
for bond in receptor_trajectory.topology.bonds:
complex_topology.add_bond(atom_mapping[bond[0]], atom_mapping[bond[1]],
type=bond.type, order=bond.order)
complex_positions = []
complex_positions.extend(ligand_trajectory.openmm_positions(0).value_in_unit(simtk_unit.angstrom))
complex_positions.extend(receptor_trajectory.openmm_positions(0).value_in_unit(simtk_unit.angstrom))
complex_positions *= simtk_unit.angstrom
self._docked_complex_coordinate_path = path.join(directory, 'complex.pdb')
with open(self._docked_complex_coordinate_path, 'w+') as file:
app.PDBFile.writeFile(complex_topology.to_openmm(),
complex_positions, file)
return self._get_output_dictionary()