"""
An API for defining, storing, and loading collections of physical property data.
"""
from simtk.openmm.app import element
from propertyestimator.utils import create_molecule_from_smiles
from propertyestimator.utils.serialization import TypedBaseModel
[docs]class PhysicalPropertyDataSet(TypedBaseModel):
"""
An object for storing and curating data sets of both physical property
measurements and estimated. This class defines a number of convenience
functions for filtering out unwanted properties, and for generating
general statistics (such as the number of properties per substance)
about the set.
"""
[docs] def __init__(self):
"""
Constructs a new PhysicalPropertyDataSet object.
"""
self._properties = {}
self._sources = []
@property
def properties(self):
"""
dict of str and list of PhysicalProperty: A list of all of the properties
within this set, partitioned by substance identifier.
TODO: Add a link to Substance.identifier when have access to sphinx docs.
TODO: Investigate why PhysicalProperty is not cross-linking.
See Also
--------
Substance.identifier()
"""
return self._properties
@property
def sources(self):
"""list of Source: The list of sources from which the properties were gathered"""
return self._sources
@property
def number_of_properties(self):
"""int: The number of properties in the data set."""
return sum([len(properties) for properties in self._properties.values()])
[docs] def merge(self, data_set):
"""Merge another data set into the current one.
Parameters
----------
data_set : PhysicalPropertyDataSet
The secondary data set to merge into this one.
"""
if data_set is None:
return
# TODO: Do we need to check whether merging the same data set here?
for substance_hash in data_set.properties:
if substance_hash not in self._properties:
self._properties[substance_hash] = []
self._properties[substance_hash].extend(
data_set.properties[substance_hash])
self._sources.extend(data_set.sources)
[docs] def filter_by_function(self, filter_function):
"""Filter the data set using a given filter function.
Parameters
----------
filter_function : lambda
The filter function.
"""
filtered_properties = {}
# This works for now - if we wish to be able to undo a filter then
# a 'filtered' list needs to be maintained separately to the main list.
for substance_id in self._properties:
substance_properties = list(filter(
filter_function, self._properties[substance_id]))
if len(substance_properties) <= 0:
continue
filtered_properties[substance_id] = substance_properties
self._properties = {}
for substance_id in filtered_properties:
self._properties[substance_id] = filtered_properties[substance_id]
[docs] def filter_by_property_types(self, *property_type):
"""Filter the data set based on the type of property (e.g Density).
Parameters
----------
property_type : PropertyType or str
The type of property which should be retained.
Examples
--------
Filter the dataset to only contain densities and static dielectric constants
>>> # Load in the data set of properties which will be used for comparisons
>>> from propertyestimator.datasets import ThermoMLDataSet
>>> data_set = ThermoMLDataSet.from_doi('10.1016/j.jct.2016.10.001')
>>>
>>> # Filter the dataset to only include densities and dielectric constants.
>>> from propertyestimator.properties import Density, DielectricConstant
>>> data_set.filter_by_property_types(Density, DielectricConstant)
or
>>> data_set.filter_by_property_types('Density', 'DielectricConstant')
"""
property_types = []
for type_to_retain in property_type:
if isinstance(type_to_retain, str):
property_types.append(type_to_retain)
else:
property_types.append(type_to_retain.__name__)
def filter_function(x):
return type(x).__name__ in property_types
self.filter_by_function(filter_function)
[docs] def filter_by_phases(self, phases):
"""Filter the data set based on the phase of the property (e.g liquid).
Parameters
----------
phases : PropertyPhase
The phase of property which should be retained.
Examples
--------
Filter the dataset to only include liquid properties.
>>> # Load in the data set of properties which will be used for comparisons
>>> from propertyestimator.datasets import ThermoMLDataSet
>>> data_set = ThermoMLDataSet.from_doi('10.1016/j.jct.2016.10.001')
>>>
>>> from propertyestimator.properties import PropertyPhase
>>> data_set.filter_by_temperature(PropertyPhase.Liquid)
"""
def filter_function(x):
return x.phase & phases
self.filter_by_function(filter_function)
[docs] def filter_by_temperature(self, min_temperature, max_temperature):
"""Filter the data set based on a minimum and maximum temperature.
Parameters
----------
min_temperature : unit.Quantity
The minimum temperature.
max_temperature : unit.Quantity
The maximum temperature.
Examples
--------
Filter the dataset to only include properties measured between 130-260 K.
>>> # Load in the data set of properties which will be used for comparisons
>>> from propertyestimator.datasets import ThermoMLDataSet
>>> data_set = ThermoMLDataSet.from_doi('10.1016/j.jct.2016.10.001')
>>>
>>> from propertyestimator import unit
>>> data_set.filter_by_temperature(min_temperature=130*unit.kelvin, max_temperature=260*unit.kelvin)
"""
def filter_function(x):
return min_temperature <= x.thermodynamic_state.temperature <= max_temperature
self.filter_by_function(filter_function)
[docs] def filter_by_pressure(self, min_pressure, max_pressure):
"""Filter the data set based on a minimum and maximum pressure.
Parameters
----------
min_pressure : unit.Quantity
The minimum pressure.
max_pressure : unit.Quantity
The maximum pressure.
Examples
--------
Filter the dataset to only include properties measured between 70-150 kPa.
>>> # Load in the data set of properties which will be used for comparisons
>>> from propertyestimator.datasets import ThermoMLDataSet
>>> data_set = ThermoMLDataSet.from_doi('10.1016/j.jct.2016.10.001')
>>>
>>> from propertyestimator import unit
>>> data_set.filter_by_temperature(min_pressure=70*unit.kilopascal, max_temperature=150*unit.kilopascal)
"""
def filter_function(x):
if x.thermodynamic_state.pressure is None:
return True
return min_pressure <= x.thermodynamic_state.pressure <= max_pressure
self.filter_by_function(filter_function)
[docs] def filter_by_components(self, number_of_components):
"""Filter the data set based on a minimum and maximum temperature.
Parameters
----------
number_of_components : int
The allowed number of components in the mixture.
Examples
--------
Filter the dataset to only include pure substance properties.
>>> # Load in the data set of properties which will be used for comparisons
>>> from propertyestimator.datasets import ThermoMLDataSet
>>> data_set = ThermoMLDataSet.from_doi('10.1016/j.jct.2016.10.001')
>>>
>>> data_set.filter_by_components(number_of_components=1)
"""
def filter_function(x):
return x.substance.number_of_components == number_of_components
self.filter_by_function(filter_function)
[docs] def filter_by_elements(self, *allowed_elements):
"""Filters out those properties which were estimated for
compounds which contain elements outside of those defined
in `allowed_elements`.
Parameters
----------
allowed_elements: str
The symbols (e.g. C, H, Cl) of the elements to
retain.
"""
def filter_function(physical_property):
substance = physical_property.substance
for component in substance.components:
oe_molecule = create_molecule_from_smiles(component.smiles, 0)
for atom in oe_molecule.GetAtoms():
atomic_number = atom.GetAtomicNum()
atomic_element = element.Element.getByAtomicNumber(atomic_number).symbol
if atomic_element in allowed_elements:
continue
return False
return True
self.filter_by_function(filter_function)
[docs] def filter_by_smiles(self, *allowed_smiles):
"""Filters out those properties which were estimated for
compounds which do not appear in the allowed `smiles` list.
Parameters
----------
allowed_smiles: str
The smiles identifiers of the compounds to keep
after filtering.
"""
def filter_function(physical_property):
substance = physical_property.substance
for component in substance.components:
if component.smiles in allowed_smiles:
continue
return False
return True
self.filter_by_function(filter_function)
def __getstate__(self):
return {
'properties': self._properties,
'sources': self._sources
}
def __setstate__(self, state):
self._properties = state['properties']
self._sources = state['sources']