"""
A collection of protocols for building coordinates for molecular systems.
"""
import logging
from enum import Enum
from os import path
from simtk.openmm import app
from propertyestimator import unit
from propertyestimator.substances import Substance
from propertyestimator.utils import packmol, create_molecule_from_smiles
from propertyestimator.utils.exceptions import PropertyEstimatorException
from propertyestimator.workflow.decorators import protocol_input, protocol_output
from propertyestimator.workflow.plugins import register_calculation_protocol
from propertyestimator.workflow.protocols import BaseProtocol
[docs]@register_calculation_protocol()
class BuildCoordinatesPackmol(BaseProtocol):
"""Creates a set of 3D coordinates with a specified composition.
Notes
-----
The coordinates are created using packmol.
"""
@protocol_input(int)
def max_molecules(self):
"""The maximum number of molecules to be added to the system."""
pass
@protocol_input(unit.Quantity)
def mass_density(self):
"""The target density of the created system."""
pass
@protocol_input(list)
def box_aspect_ratio(self):
"""The aspect ratio of the simulation box. The default is [1.0, 1.0, 1.0],
i.e a cubic box."""
return self._box_aspect_ratio
@protocol_input(Substance)
def substance(self):
"""The composition of the system to build."""
pass
@protocol_input(bool)
def verbose_packmol(self):
"""If True, packmol will be allowed to log verbose information to the logger,
and any working packmol files will be retained."""
pass
@protocol_input(bool)
def retain_packmol_files(self):
"""If True, packmol will not delete all of the temporary files it creates
while building the coordinates."""
pass
@protocol_output(int)
def final_number_of_molecules(self):
"""The file path to the created PDB coordinate file.
"""
# TODO: This is a temporary addition until inputs are made available as outputs by default.
pass
@protocol_output(str)
def coordinate_file_path(self):
"""The file path to the created PDB coordinate file."""
pass
[docs] def __init__(self, protocol_id):
"""Constructs a new BuildCoordinatesPackmol object.
"""
super().__init__(protocol_id)
self._substance = None
self._coordinate_file_path = None
self._positions = None
self._max_molecules = 1000
self._mass_density = 0.95 * unit.grams / unit.milliliters
self._verbose_packmol = False
self._retain_packmol_files = False
self._box_aspect_ratio = [1.0, 1.0, 1.0]
self._final_number_of_molecules = None
def _build_molecule_arrays(self, directory):
"""Converts the input substance into a list of openeye OEMol's and a list of
counts for how many of each there should be as determined by the `max_molecules`
input and the molecules respective mole fractions.
Parameters
----------
directory: The directory in which this protocols working files are being saved.
Returns
-------
list of openeye.oechem.OEMol
The list of openeye molecules.
list of int
The number of each molecule which should be added to the system.
PropertyEstimatorException, optional
None if no exceptions occured, otherwise the exception.
"""
molecules = []
for component in self._substance.components:
molecule = create_molecule_from_smiles(component.smiles)
if molecule is None:
return None, None, PropertyEstimatorException(directory=directory,
message=f'{component} could not be converted '
f'to a Molecule')
molecules.append(molecule)
# Determine how many molecules of each type will be present in the system.
molecules_per_component = self._substance.get_molecules_per_component(self._max_molecules)
number_of_molecules = [0] * self._substance.number_of_components
for index, component in enumerate(self._substance.components):
number_of_molecules[index] = molecules_per_component[component.identifier]
return molecules, number_of_molecules, None
def _save_results(self, directory, topology, positions):
"""Save the results of running PACKMOL in the working directory
Parameters
----------
directory: str
The directory to save the results in.
topology : simtk.openmm.Topology
The topology of the created system.
positions : propertyestimator.unit.Quantity
A `propertyestimator.unit.Quantity` wrapped `numpy.ndarray` (shape=[natoms,3])
which contains the created positions with units compatible with angstroms.
"""
from simtk import unit as simtk_unit
simtk_positions = positions.to(unit.angstrom).magnitude * simtk_unit.angstrom
self._coordinate_file_path = path.join(directory, 'output.pdb')
with open(self._coordinate_file_path, 'w+') as minimised_file:
# noinspection PyTypeChecker
app.PDBFile.writeFile(topology, simtk_positions, minimised_file)
logging.info('Coordinates generated: ' + self._substance.identifier)
[docs] def execute(self, directory, available_resources):
logging.info(f'Generating coordinates for {self._substance.identifier}: {self.id}')
if self._substance is None:
return PropertyEstimatorException(directory=directory,
message='The substance input is non-optional')
self._final_number_of_molecules = self._max_molecules
molecules, number_of_molecules, exception = self._build_molecule_arrays(directory)
if exception is not None:
return exception
packmol_directory = path.join(directory, 'packmol_files')
# Create packed box
topology, positions = packmol.pack_box(molecules=molecules,
number_of_copies=number_of_molecules,
mass_density=self._mass_density,
box_aspect_ratio=self._box_aspect_ratio,
verbose=self._verbose_packmol,
working_directory=packmol_directory,
retain_working_files=self._retain_packmol_files)
if topology is None or positions is None:
return PropertyEstimatorException(directory=directory,
message='Packmol failed to complete.')
self._save_results(directory, topology, positions)
return self._get_output_dictionary()
[docs]@register_calculation_protocol()
class SolvateExistingStructure(BuildCoordinatesPackmol):
"""Creates a set of 3D coordinates with a specified composition.
Notes
-----
The coordinates are created using packmol.
"""
@protocol_input(str)
def solute_coordinate_file(self):
"""A file path to the solute to solvate."""
pass
[docs] def __init__(self, protocol_id):
super().__init__(protocol_id)
self._solute_coordinate_file = None
[docs] def execute(self, directory, available_resources):
logging.info(f'Generating coordinates for {self._substance.identifier}: {self.id}')
if self._substance is None:
return PropertyEstimatorException(directory=directory,
message='The substance input is non-optional')
if self._solute_coordinate_file is None:
return PropertyEstimatorException(directory=directory,
message='The solute coordinate file input is non-optional')
molecules, number_of_molecules, exception = self._build_molecule_arrays(directory)
if exception is not None:
return exception
packmol_directory = path.join(directory, 'packmol_files')
# Create packed box
topology, positions = packmol.pack_box(molecules=molecules,
number_of_copies=number_of_molecules,
structure_to_solvate=self._solute_coordinate_file,
mass_density=self._mass_density,
verbose=self._verbose_packmol,
working_directory=packmol_directory,
retain_working_files=self._retain_packmol_files)
if topology is None or positions is None:
return PropertyEstimatorException(directory=directory,
message='Packmol failed to complete.')
self._save_results(directory, topology, positions)
return self._get_output_dictionary()
[docs]@register_calculation_protocol()
class BuildDockedCoordinates(BaseProtocol):
"""Creates a set of coordinates for a ligand bound to some receptor.
Notes
-----
This protocol currently only supports docking with the OpenEye OEDocking
framework.
"""
[docs] class ActivateSiteLocation(Enum):
"""An enum which describes the methods by which a receptors
activate site(s) is located."""
ReceptorCenterOfMass = 'ReceptorCenterOfMass'
@protocol_input(Substance)
def ligand_substance(self):
"""A substance containing only the ligand to dock."""
pass
@protocol_input(int)
def number_of_ligand_conformers(self):
"""The number of conformers to try and dock into the receptor structure."""
pass
@protocol_input(str)
def receptor_coordinate_file(self):
"""The file path to the coordinates of the receptor molecule."""
pass
@protocol_input(ActivateSiteLocation)
def activate_site_location(self):
"""Defines the method by which the activate site is identified. Currently the only available
option is `ActivateSiteLocation.ReceptorCenterOfMass`"""
pass
@protocol_output(str)
def docked_ligand_coordinate_path(self):
"""The file path to the coordinates of the ligand in it's docked
pose, aligned with the initial `receptor_coordinate_file`."""
pass
@protocol_output(str)
def docked_complex_coordinate_path(self):
"""The file path to the docked ligand-receptor complex."""
pass
@protocol_output(str)
def ligand_residue_name(self):
"""The residue name assigned to the docked ligand."""
return self._ligand_residue_name
@protocol_output(str)
def receptor_residue_name(self):
"""The residue name assigned to the receptor."""
return self._receptor_residue_name
[docs] def __init__(self, protocol_id):
super().__init__(protocol_id)
self._ligand_substance = None
self._number_of_ligand_conformers = 100
self._receptor_coordinate_file = None
self._activate_site_location = self.ActivateSiteLocation.ReceptorCenterOfMass
self._docked_ligand_coordinate_path = None
self._docked_complex_coordinate_path = None
self._ligand_residue_name = 'LIG'
self._receptor_residue_name = 'REC'
def _create_receptor(self):
"""Create an OpenEye receptor from a mol2 file.
Returns
-------
openeye.oedocking.OEReceptor
The OpenEye receptor object.
"""
from openeye import oechem, oedocking
input_stream = oechem.oemolistream(self._receptor_coordinate_file)
original_receptor_molecule = oechem.OEGraphMol()
oechem.OEReadMolecule(input_stream, original_receptor_molecule)
center_of_mass = oechem.OEFloatArray(3)
oechem.OEGetCenterOfMass(original_receptor_molecule, center_of_mass)
receptor = oechem.OEGraphMol()
oedocking.OEMakeReceptor(receptor, original_receptor_molecule,
center_of_mass[0], center_of_mass[1], center_of_mass[2])
return receptor
def _create_ligand(self):
"""Create an OpenEye receptor from a mol2 file.
Returns
-------
openeye.oechem.OEMol
The OpenEye ligand object with multiple conformers.
"""
from openforcefield.topology import Molecule
ligand = Molecule.from_smiles(self._ligand_substance.components[0].smiles)
ligand.generate_conformers(n_conformers=self._number_of_ligand_conformers)
# Assign AM1-BCC charges to the ligand just as an initial guess
# for docking. In future, we may want to get the charge model
# directly from the force field.
ligand.compute_partial_charges_am1bcc()
return ligand.to_openeye()
[docs] def execute(self, directory, available_resources):
if (len(self._ligand_substance.components) != 1 or
self._ligand_substance.components[0].role != Substance.ComponentRole.Ligand):
return PropertyEstimatorException(directory=directory,
message='The ligand substance must contain a single ligand component.')
import mdtraj
from openeye import oechem, oedocking
from simtk import unit as simtk_unit
logging.info('Initializing the receptor molecule.')
receptor_molecule = self._create_receptor()
logging.info('Initializing the ligand molecule.')
ligand_molecule = self._create_ligand()
logging.info('Initializing the docking object.')
# Dock the ligand to the receptor.
dock = oedocking.OEDock()
dock.Initialize(receptor_molecule)
docked_ligand = oechem.OEGraphMol()
logging.info('Performing the docking.')
status = dock.DockMultiConformerMolecule(docked_ligand, ligand_molecule)
if status != oedocking.OEDockingReturnCode_Success:
return PropertyEstimatorException(directory=directory,
message='The ligand could not be successfully docked')
docking_method = oedocking.OEDockMethodGetName(oedocking.OEDockMethod_Default)
oedocking.OESetSDScore(docked_ligand, dock, docking_method)
dock.AnnotatePose(docked_ligand)
self._docked_ligand_coordinate_path = path.join(directory, 'ligand.pdb')
output_stream = oechem.oemolostream(self._docked_ligand_coordinate_path)
oechem.OEWriteMolecule(output_stream, docked_ligand)
output_stream.close()
receptor_pdb_path = path.join(directory, 'receptor.pdb')
output_stream = oechem.oemolostream(receptor_pdb_path)
oechem.OEWriteMolecule(output_stream, receptor_molecule)
output_stream.close()
ligand_trajectory = mdtraj.load(self._docked_ligand_coordinate_path)
ligand_residue = ligand_trajectory.topology.residue(0)
ligand_residue.name = self._ligand_residue_name
# Save the ligand file with the correct residue name.
ligand_trajectory.save(self._docked_ligand_coordinate_path)
receptor_trajectory = mdtraj.load(receptor_pdb_path)
receptor_residue = receptor_trajectory.topology.residue(0)
receptor_residue.name = self._receptor_residue_name
# Create a merged ligand-receptor topology.
complex_topology = ligand_trajectory.topology.copy()
atom_mapping = {}
new_residue = complex_topology.add_residue(receptor_residue.name, complex_topology.chain(0))
for receptor_atom in receptor_residue.atoms:
new_atom = complex_topology.add_atom(receptor_atom.name, receptor_atom.element, new_residue,
serial=receptor_atom.serial)
atom_mapping[receptor_atom] = new_atom
for bond in receptor_trajectory.topology.bonds:
complex_topology.add_bond(atom_mapping[bond[0]], atom_mapping[bond[1]],
type=bond.type, order=bond.order)
complex_positions = []
complex_positions.extend(ligand_trajectory.openmm_positions(0).value_in_unit(simtk_unit.angstrom))
complex_positions.extend(receptor_trajectory.openmm_positions(0).value_in_unit(simtk_unit.angstrom))
complex_positions *= simtk_unit.angstrom
self._docked_complex_coordinate_path = path.join(directory, 'complex.pdb')
with open(self._docked_complex_coordinate_path, 'w+') as file:
app.PDBFile.writeFile(complex_topology.to_openmm(),
complex_positions, file)
return self._get_output_dictionary()