"""
An API for defining and creating substances.
"""
import abc
import math
from enum import Enum
import numpy as np
from propertyestimator import unit
from propertyestimator.utils.serialization import TypedBaseModel
[docs]class Substance(TypedBaseModel):
"""Defines the components, their amounts, and their roles in a system.
Examples
--------
A neat liquid containing only a single component:
>>> liquid = Substance()
>>> liquid.add_component(Substance.Component(smiles='O'), Substance.MoleFraction(1.0))
A binary mixture containing two components, where the mole fractions are explicitly stated:
>>> binary_mixture = Substance()
>>> binary_mixture.add_component(Substance.Component(smiles='O'), Substance.MoleFraction(0.2))
>>> binary_mixture.add_component(Substance.Component(smiles='CO'), Substance.MoleFraction(0.8))
The infinite dilution of one molecule within a bulk solvent or mixture may also be specified
by defining the exact number of copies of that molecule, rather than a mole fraction:
>>> benzene = Substance.Component(smiles='C1=CC=CC=C1', role=Substance.ComponentRole.Solute)
>>> water = Substance.Component(smiles='O', role=Substance.ComponentRole.Solvent)
>>>
>>> infinite_dilution = Substance()
>>> infinite_dilution.add_component(component=benzene, amount=Substance.ExactAmount(1)) # Infinite dilution.
>>> infinite_dilution.add_component(component=water, amount=Substance.MoleFraction(1.0))
In this example we explicitly flag benzene as being the solute and the water component the solvent.
This enables workflow's to easily identify key molecules of interest, such as the molecule which should
be 'grown' into solution during solvation free energy calculations.
"""
[docs] class ComponentRole(Enum):
"""An enum which describes the role of a component in the system,
such as whether the component is a solvent, a solute, a receptor etc.
These roles are mainly only used by specific protocols to identify
the correct species in a system, such as when doing docking or performing
solvation free energy calculations.
"""
Solvent = 'Solvent'
Solute = 'Solute'
Ligand = 'Ligand'
Receptor = 'Receptor'
Undefined = 'Undefined'
[docs] class Component(TypedBaseModel):
"""Defines a single component in a system, as well as properties
such as it's relative proportion in the system.
"""
@property
def identifier(self):
"""str: A unique identifier for this component, which is either a
smiles descriptor or the supplied label."""
return self._smiles or self._label
@property
def label(self):
"""str: A string label which describes this compound, for example, CB8."""
return self._label
@property
def smiles(self):
"""str: The smiles pattern which describes this component, which may be None
for complex (e.g protein) molecules."""
return self._smiles
@property
def role(self):
"""ComponentRole: The role of this component in the system, such as a
ligand or a receptor."""
return self._role
def __init__(self, smiles=None, label=None, role=None):
"""Constructs a new Component object with either a label or
a smiles string, but not both.
Notes
-----
The `label` and `smiles` arguments are mutually exclusive, and only
one can be passed while the other should be `None`.
Parameters
----------
smiles: str
A SMILES descriptor of the component
label: str
A string label which describes this compound, for example, CB8.
role: ComponentRole, optional
The role of this component in the system. If no role is specified,
a default role of solvent is applied.
"""
if label == smiles:
label = None
assert ((label is None and smiles is not None) or
(label is not None and smiles is None) or
(label is None and smiles is None))
label = label if label is not None else smiles
self._label = label
self._smiles = smiles
self._role = role or Substance.ComponentRole.Solvent
def __getstate__(self):
return {
'label': self.label,
'smiles': self.smiles,
'role': self.role
}
def __setstate__(self, state):
self._label = state['label']
self._smiles = state['smiles']
self._role = state['role']
def __str__(self):
return self.identifier
def __hash__(self):
return hash((self.identifier, self._role))
def __eq__(self, other):
return hash(self) == hash(other)
def __ne__(self, other):
return not (self == other)
[docs] class Amount(abc.ABC):
"""An abstract representation of the amount of a given component
in a substance.
"""
@property
def value(self):
"""The value of this amount."""
return self._value
@property
def identifier(self):
"""A string identifier for this amount."""
raise NotImplementedError()
def __init__(self, value=None):
"""Constructs a new Amount object."""
self._value = value
[docs] @abc.abstractmethod
def to_number_of_molecules(self, total_substance_molecules, tolerance=None):
"""Converts this amount to an exact number of molecules
Parameters
----------
total_substance_molecules: int
The total number of molecules in the whole substance. This amount
will contribute to a portion of this total number.
tolerance: float
The tolerance with which this amount should be in. As an example,
when converting a mole fraction into a number of molecules, the
total number of molecules may not be sufficently large enough to
reproduce this amount.
Returns
-------
int
The number of molecules which this amount represents,
given the `total_substance_molecules`.
"""
raise NotImplementedError()
def __getstate__(self):
return {'value': self._value}
def __setstate__(self, state):
self._value = state['value']
def __str__(self):
return self.identifier
def __eq__(self, other):
return np.isclose(self._value, other.value)
[docs] class MoleFraction(Amount):
"""Represents the amount of a component in a substance as a
mole fraction."""
@property
def value(self):
"""float: The value of this amount."""
return super(Substance.MoleFraction, self).value
@property
def identifier(self):
return f'{{{self._value:.6f}}}'
def __init__(self, value=1.0):
"""Constructs a new MoleFraction object.
Parameters
----------
value: float
A mole fraction in the range (0.0, 1.0]
"""
if value <= 0.0 or value > 1.0:
raise ValueError('A mole fraction must be greater than zero, and less than or '
'equal to one.')
if math.floor(value * 1e6) < 1:
raise ValueError('Mole fractions are only precise to the sixth '
'decimal place.')
super().__init__(value)
[docs] def to_number_of_molecules(self, total_substance_molecules, tolerance=None):
# Determine how many molecules of each type will be present in the system.
number_of_molecules = int(round(self._value * total_substance_molecules))
if number_of_molecules == 0:
raise ValueError('The total number of substance molecules was not large enough, '
'such that this non-zero amount translates into zero molecules '
'of this component in the substance.')
if tolerance is not None:
mole_fraction = number_of_molecules / total_substance_molecules
if abs(mole_fraction - self._value) > tolerance:
raise ValueError(f'The mole fraction ({mole_fraction}) given a total number of molecules '
f'({total_substance_molecules}) is outside of the tolerance {tolerance} '
f'of the target mole fraction {self._value}')
return number_of_molecules
[docs] class ExactAmount(Amount):
"""Represents the amount of a component in a substance as an
exact number of molecules.
The expectation is that this amount should be used for components which
are infinitely dilute (such as ligands in binding calculations), and hence
do not contribute to the total mole fraction of a substance"""
@property
def value(self):
"""int: The value of this amount."""
return super(Substance.ExactAmount, self).value
@property
def identifier(self):
return f'({int(round(self._value)):d})'
def __init__(self, value=1):
"""Constructs a new ExactAmount object.
Parameters
----------
value: int
An exact number of molecules.
"""
if not np.isclose(int(round(value)), value):
raise ValueError('The value must be an integer.')
super().__init__(value)
[docs] def to_number_of_molecules(self, total_substance_molecules, tolerance=None):
return self._value
@property
def identifier(self):
"""str: A unique str representation of this substance, which encodes all components
and their amounts in the substance."""
component_identifiers = [component.identifier for component in self._components]
component_identifiers.sort()
sorted_component_identifiers = [component.identifier for component in self._components]
sorted_component_identifiers.sort()
identifier_split = []
for component_identifier in sorted_component_identifiers:
component_amount = self._amounts[component_identifier]
identifier = f'{component_identifier}{component_amount.identifier}'
identifier_split.append(identifier)
return '|'.join(identifier_split)
@property
def components(self):
"""list of Substance.Component: A list of all of the components in this substance."""
return self._components
@property
def number_of_components(self):
"""int: The number of different components in this substance."""
return len(self._components)
[docs] def __init__(self):
"""Constructs a new Substance object."""
self._amounts = {}
self._components = []
[docs] def add_component(self, component, amount):
"""Add a component to the Substance. If the component is already present in
the substance, then the mole fraction will be added to the current mole
fraction of that component.
Parameters
----------
component : Substance.Component
The component to add to the system.
amount : Substance.Amount
The amount of this component in the substance.
"""
assert isinstance(component, Substance.Component)
assert isinstance(amount, Substance.Amount)
if isinstance(amount, Substance.MoleFraction):
total_mole_fraction = amount.value + sum([amount.value for amount in self._amounts if
isinstance(amount, Substance.MoleFraction)])
if total_mole_fraction > 1.0:
raise ValueError(f'The total mole fraction of this substance {total_mole_fraction} exceeds 1.0')
if component.identifier not in self._amounts:
self._amounts[component.identifier] = amount
self._components.append(component)
return
existing_amount = self._amounts[component.identifier]
if not type(existing_amount) is type(amount):
raise ValueError(f'This component already exists in the substance, but in a '
f'different amount type ({type(existing_amount)}) than that '
f'specified ({type(amount)})')
new_amount = type(amount)(existing_amount.value + amount.value)
self._amounts[component.identifier] = new_amount
[docs] def get_amount(self, component):
"""Returns the amount of the component in this substance.
Parameters
----------
component: str or Substance.Component
The component (or it's identifier) to retrieve the mole fraction of.
Returns
-------
Substance.Amount
The amount of the component in this substance.
"""
assert isinstance(component, str) or isinstance(component, Substance.Component)
identifier = component if isinstance(component, str) else component.identifier
return self._amounts[identifier]
[docs] def get_molecules_per_component(self, maximum_molecules):
"""Returns the number of molecules for each component in this substance,
given a maximum total number of molecules.
Parameters
----------
maximum_molecules: int
The maximum number of molecules.
Returns
-------
dict of str and int
A dictionary of molecule counts per component, where each key is
a component identifier.
"""
number_of_molecules = {}
remaining_molecule_slots = maximum_molecules
for index, component in enumerate(self._components):
amount = self._amounts[component.identifier]
if not isinstance(amount, Substance.ExactAmount):
continue
remaining_molecule_slots -= amount.value
if remaining_molecule_slots < 0:
raise ValueError(f'The required number of molecules {maximum_molecules - remaining_molecule_slots} '
f'exceeds the provided maximum number ({maximum_molecules}).')
for index, component in enumerate(self._components):
amount = self._amounts[component.identifier]
number_of_molecules[component.identifier] = amount.to_number_of_molecules(remaining_molecule_slots)
return number_of_molecules
[docs] @staticmethod
def calculate_aqueous_ionic_mole_fraction(ionic_strength):
"""Determines what mole fraction of ions is needed to yield
an aqueous system of a given ionic strength.
Parameters
----------
ionic_strength: unit.Quantity
The ionic string in units of molar.
Returns
-------
float
The mole fraction of ions.
"""
# Taken from YANK:
# https://github.com/choderalab/yank/blob/4dfcc8e127c51c20180fe6caeb49fcb1f21730c6/Yank/pipeline.py#L1869
water_molarity = (998.23 * unit.gram / unit.litre) / (18.01528 * unit.gram / unit.mole)
ionic_mole_fraction = ionic_strength / (ionic_strength + water_molarity)
return ionic_mole_fraction
def __getstate__(self):
return {
'components': self._components,
'amounts': self._amounts
}
def __setstate__(self, state):
self._components = state['components']
self._amounts = state['amounts']
def __str__(self):
return self.identifier
def __hash__(self):
sorted_component_identifiers = [component.identifier for component in self._components]
sorted_component_identifiers.sort()
component_by_id = {component.identifier: component for component in self._components}
string_hash_split = []
for identifier in sorted_component_identifiers:
component_role = component_by_id[identifier].role
component_amount = self._amounts[identifier].identifier
string_hash_split.append(f'{identifier}_{component_role}_{component_amount}')
string_hash = '|'.join(string_hash_split)
return hash(string_hash)
def __eq__(self, other):
return hash(self) == hash(other)
def __ne__(self, other):
return not (self == other)