Source code for propertyestimator.substances

"""
An API for defining and creating substances.
"""

import abc
import math
from enum import Enum

import numpy as np

from propertyestimator import unit
from propertyestimator.utils.serialization import TypedBaseModel


[docs]class Substance(TypedBaseModel): """Defines the components, their amounts, and their roles in a system. Examples -------- A neat liquid containing only a single component: >>> liquid = Substance() >>> liquid.add_component(Substance.Component(smiles='O'), Substance.MoleFraction(1.0)) A binary mixture containing two components, where the mole fractions are explicitly stated: >>> binary_mixture = Substance() >>> binary_mixture.add_component(Substance.Component(smiles='O'), Substance.MoleFraction(0.2)) >>> binary_mixture.add_component(Substance.Component(smiles='CO'), Substance.MoleFraction(0.8)) The infinite dilution of one molecule within a bulk solvent or mixture may also be specified by defining the exact number of copies of that molecule, rather than a mole fraction: >>> benzene = Substance.Component(smiles='C1=CC=CC=C1', role=Substance.ComponentRole.Solute) >>> water = Substance.Component(smiles='O', role=Substance.ComponentRole.Solvent) >>> >>> infinite_dilution = Substance() >>> infinite_dilution.add_component(component=benzene, amount=Substance.ExactAmount(1)) # Infinite dilution. >>> infinite_dilution.add_component(component=water, amount=Substance.MoleFraction(1.0)) In this example we explicitly flag benzene as being the solute and the water component the solvent. This enables workflow's to easily identify key molecules of interest, such as the molecule which should be 'grown' into solution during solvation free energy calculations. """
[docs] class ComponentRole(Enum): """An enum which describes the role of a component in the system, such as whether the component is a solvent, a solute, a receptor etc. These roles are mainly only used by specific protocols to identify the correct species in a system, such as when doing docking or performing solvation free energy calculations. """ Solvent = 'Solvent' Solute = 'Solute' Ligand = 'Ligand' Receptor = 'Receptor' Undefined = 'Undefined'
[docs] class Component(TypedBaseModel): """Defines a single component in a system, as well as properties such as it's relative proportion in the system. """ @property def identifier(self): """str: A unique identifier for this component, which is either a smiles descriptor or the supplied label.""" return self._smiles or self._label @property def label(self): """str: A string label which describes this compound, for example, CB8.""" return self._label @property def smiles(self): """str: The smiles pattern which describes this component, which may be None for complex (e.g protein) molecules.""" return self._smiles @property def role(self): """ComponentRole: The role of this component in the system, such as a ligand or a receptor.""" return self._role def __init__(self, smiles=None, label=None, role=None): """Constructs a new Component object with either a label or a smiles string, but not both. Notes ----- The `label` and `smiles` arguments are mutually exclusive, and only one can be passed while the other should be `None`. Parameters ---------- smiles: str A SMILES descriptor of the component label: str A string label which describes this compound, for example, CB8. role: ComponentRole, optional The role of this component in the system. If no role is specified, a default role of solvent is applied. """ if label == smiles: label = None assert ((label is None and smiles is not None) or (label is not None and smiles is None) or (label is None and smiles is None)) label = label if label is not None else smiles self._label = label self._smiles = smiles self._role = role or Substance.ComponentRole.Solvent def __getstate__(self): return { 'label': self.label, 'smiles': self.smiles, 'role': self.role } def __setstate__(self, state): self._label = state['label'] self._smiles = state['smiles'] self._role = state['role'] def __str__(self): return self.identifier def __hash__(self): return hash((self.identifier, self._role)) def __eq__(self, other): return hash(self) == hash(other) def __ne__(self, other): return not (self == other)
[docs] class Amount(abc.ABC): """An abstract representation of the amount of a given component in a substance. """ @property def value(self): """The value of this amount.""" return self._value @property def identifier(self): """A string identifier for this amount.""" raise NotImplementedError() def __init__(self, value=None): """Constructs a new Amount object.""" self._value = value
[docs] @abc.abstractmethod def to_number_of_molecules(self, total_substance_molecules, tolerance=None): """Converts this amount to an exact number of molecules Parameters ---------- total_substance_molecules: int The total number of molecules in the whole substance. This amount will contribute to a portion of this total number. tolerance: float, optional The tolerance with which this amount should be in. As an example, when converting a mole fraction into a number of molecules, the total number of molecules may not be sufficiently large enough to reproduce this amount. Returns ------- int The number of molecules which this amount represents, given the `total_substance_molecules`. """ raise NotImplementedError()
def __getstate__(self): return {'value': self._value} def __setstate__(self, state): self._value = state['value'] def __str__(self): return self.identifier def __eq__(self, other): return np.isclose(self._value, other.value) def __ne__(self, other): return not (self == other) def __hash__(self): return hash(self.identifier)
[docs] class MoleFraction(Amount): """Represents the amount of a component in a substance as a mole fraction.""" @property def value(self): """float: The value of this amount.""" return super(Substance.MoleFraction, self).value @property def identifier(self): return f'{{{self._value:.6f}}}' def __init__(self, value=1.0): """Constructs a new MoleFraction object. Parameters ---------- value: float A mole fraction in the range (0.0, 1.0] """ if value <= 0.0 or value > 1.0: raise ValueError('A mole fraction must be greater than zero, and less than or ' 'equal to one.') if math.floor(value * 1e6) < 1: raise ValueError('Mole fractions are only precise to the sixth ' 'decimal place within this class representation.') super().__init__(value)
[docs] def to_number_of_molecules(self, total_substance_molecules, tolerance=None): # Determine how many molecules of each type will be present in the system. number_of_molecules = self._value * total_substance_molecules fractional_number_of_molecules = number_of_molecules % 1 if np.isclose(fractional_number_of_molecules, 0.5): number_of_molecules = int(number_of_molecules) else: number_of_molecules = int(round(number_of_molecules)) if number_of_molecules == 0: raise ValueError('The total number of substance molecules was not large enough, ' 'such that this non-zero amount translates into zero molecules ' 'of this component in the substance.') if tolerance is not None: mole_fraction = number_of_molecules / total_substance_molecules if abs(mole_fraction - self._value) > tolerance: raise ValueError(f'The mole fraction ({mole_fraction}) given a total number of molecules ' f'({total_substance_molecules}) is outside of the tolerance {tolerance} ' f'of the target mole fraction {self._value}') return number_of_molecules
[docs] class ExactAmount(Amount): """Represents the amount of a component in a substance as an exact number of molecules. The expectation is that this amount should be used for components which are infinitely dilute (such as ligands in binding calculations), and hence do not contribute to the total mole fraction of a substance""" @property def value(self): """int: The value of this amount.""" return super(Substance.ExactAmount, self).value @property def identifier(self): return f'({int(round(self._value)):d})' def __init__(self, value=1): """Constructs a new ExactAmount object. Parameters ---------- value: int An exact number of molecules. """ if not np.isclose(int(round(value)), value): raise ValueError('The value must be an integer.') super().__init__(value)
[docs] def to_number_of_molecules(self, total_substance_molecules, tolerance=None): return self._value
@property def identifier(self): """str: A unique str representation of this substance, which encodes all components and their amounts in the substance.""" component_identifiers = [component.identifier for component in self._components] component_identifiers.sort() sorted_component_identifiers = [component.identifier for component in self._components] sorted_component_identifiers.sort() identifier_split = [] for component_identifier in sorted_component_identifiers: component_amounts = sorted(self._amounts[component_identifier], key=lambda x: type(x).__name__) amount_identifier = ''.join([component_amount.identifier for component_amount in component_amounts]) identifier = f'{component_identifier}{amount_identifier}' identifier_split.append(identifier) return '|'.join(identifier_split) @property def components(self): """list of Substance.Component: A list of all of the components in this substance.""" return self._components @property def number_of_components(self): """int: The number of different components in this substance.""" return len(self._components)
[docs] def __init__(self): """Constructs a new Substance object.""" self._amounts = {} self._components = []
[docs] @classmethod def from_components(cls, *components): """Creates a new `Substance` object from a list of components. This method assumes that all components should be present with equal mole fractions. Parameters ---------- components: Substance.Component or str The components to add to the substance. These may either be full `Substance.Component` objects or just the smiles representation of the component. Returns ------- Substance The substance containing the requested components in equal amounts. """ if len(components) == 0: raise ValueError('At least one component must be specified') mole_fraction = 1.0 / len(components) return_substance = cls() for component in components: if isinstance(component, str): component = Substance.Component(smiles=component) return_substance.add_component(component, Substance.MoleFraction(mole_fraction)) return return_substance
[docs] def add_component(self, component, amount): """Add a component to the Substance. If the component is already present in the substance, then the mole fraction will be added to the current mole fraction of that component. Parameters ---------- component : Substance.Component The component to add to the system. amount : Substance.Amount The amount of this component in the substance. """ assert isinstance(component, Substance.Component) assert isinstance(amount, Substance.Amount) if isinstance(amount, Substance.MoleFraction): total_mole_fraction = amount.value for component_identifier in self._amounts: total_mole_fraction += sum([amount.value for amount in self._amounts[component_identifier] if isinstance(amount, Substance.MoleFraction)]) if np.isclose(total_mole_fraction, 1.0): total_mole_fraction = 1.0 if total_mole_fraction > 1.0: raise ValueError(f'The total mole fraction of this substance {total_mole_fraction} exceeds 1.0') if component.identifier not in self._amounts: self._components.append(component) existing_amount_of_type = None all_amounts = [] if component.identifier not in self._amounts else self._amounts[component.identifier] remaining_amounts = [] # Check to see if an amount of the same type already exists in # the substance, such that this amount should be appended to it. for existing_amount in all_amounts: if not type(existing_amount) is type(amount): remaining_amounts.append(existing_amount) continue existing_amount_of_type = existing_amount break if existing_amount_of_type is not None: # Append any existing amounts to the new amount. amount = type(amount)(existing_amount_of_type.value + amount.value) remaining_amounts.append(amount) self._amounts[component.identifier] = frozenset(remaining_amounts)
[docs] def get_amounts(self, component): """Returns the amounts of the component in this substance. Parameters ---------- component: str or Substance.Component The component (or it's identifier) to retrieve the amount of. Returns ------- list of Substance.Amount The amounts of the component in this substance. """ assert isinstance(component, str) or isinstance(component, Substance.Component) identifier = component if isinstance(component, str) else component.identifier return self._amounts[identifier]
[docs] def get_molecules_per_component(self, maximum_molecules, tolerance=None): """Returns the number of molecules for each component in this substance, given a maximum total number of molecules. Parameters ---------- maximum_molecules: int The maximum number of molecules. tolerance: float, optional The tolerance within which this amount should be represented. As an example, when converting a mole fraction into a number of molecules, the total number of molecules may not be sufficiently large enough to reproduce this amount. Returns ------- dict of str and int A dictionary of molecule counts per component, where each key is a component identifier. """ number_of_molecules = {} remaining_molecule_slots = maximum_molecules for index, component in enumerate(self._components): amounts = self._amounts[component.identifier] for amount in amounts: if not isinstance(amount, Substance.ExactAmount): continue remaining_molecule_slots -= amount.value if remaining_molecule_slots < 0: raise ValueError(f'The required number of molecules {maximum_molecules - remaining_molecule_slots} ' f'exceeds the provided maximum number ({maximum_molecules}).') for component in self._components: number_of_molecules[component.identifier] = 0 for amount in self._amounts[component.identifier]: number_of_molecules[component.identifier] += amount.to_number_of_molecules(remaining_molecule_slots, tolerance) return number_of_molecules
[docs] @staticmethod def calculate_aqueous_ionic_mole_fraction(ionic_strength): """Determines what mole fraction of ions is needed to yield an aqueous system of a given ionic strength. Parameters ---------- ionic_strength: unit.Quantity The ionic string in units of molar. Returns ------- float The mole fraction of ions. """ # Taken from YANK: # https://github.com/choderalab/yank/blob/4dfcc8e127c51c20180fe6caeb49fcb1f21730c6/Yank/pipeline.py#L1869 water_molarity = (998.23 * unit.gram / unit.litre) / (18.01528 * unit.gram / unit.mole) ionic_mole_fraction = ionic_strength / (ionic_strength + water_molarity) return ionic_mole_fraction
def __getstate__(self): return { 'components': self._components, 'amounts': self._amounts } def __setstate__(self, state): self._components = state['components'] self._amounts = state['amounts'] def __str__(self): return self.identifier def __hash__(self): sorted_component_identifiers = [component.identifier for component in self._components] sorted_component_identifiers.sort() component_by_id = {component.identifier: component for component in self._components} string_hash_split = [] for identifier in sorted_component_identifiers: component_role = component_by_id[identifier].role component_amounts = sorted(self._amounts[identifier], key=lambda x: type(x).__name__) amount_identifier = ''.join([component_amount.identifier for component_amount in component_amounts]) string_hash_split.append(f'{identifier}_{component_role}_{amount_identifier}') string_hash = '|'.join(string_hash_split) return hash(string_hash) def __eq__(self, other): return isinstance(other, Substance) and hash(self) == hash(other) def __ne__(self, other): return not (self == other)