Source code for propertyestimator.protocols.analysis

"""
A collection of protocols for running analysing the results of molecular simulations.
"""

import logging
from os import path

import numpy as np

from propertyestimator import unit
from propertyestimator.utils import statistics, timeseries
from propertyestimator.utils.exceptions import PropertyEstimatorException
from propertyestimator.utils.quantities import EstimatedQuantity
from propertyestimator.utils.statistics import StatisticsArray, bootstrap
from propertyestimator.workflow.decorators import protocol_input, protocol_output, MergeBehaviour
from propertyestimator.workflow.plugins import register_calculation_protocol
from propertyestimator.workflow.protocols import BaseProtocol


[docs]@register_calculation_protocol() class AveragePropertyProtocol(BaseProtocol): """An abstract base class for protocols which will calculate the average of a property and its uncertainty via bootstrapping. """ @protocol_input(int, merge_behavior=MergeBehaviour.GreatestValue) def bootstrap_iterations(self): """The number of bootstrap iterations to perform.""" pass @protocol_input(float, merge_behavior=MergeBehaviour.GreatestValue) def bootstrap_sample_size(self): """The relative sample size to use for bootstrapping.""" pass @protocol_output(EstimatedQuantity) def value(self): """The averaged value.""" pass @protocol_output(int) def equilibration_index(self): """The index in the data set after which the data is stationary.""" pass @protocol_output(float) def statistical_inefficiency(self): """The statistical inefficiency in the data set.""" pass @protocol_output(unit.Quantity) def uncorrelated_values(self): """The uncorrelated values which the average was calculated from.""" pass
[docs] def __init__(self, protocol_id): super().__init__(protocol_id) self._bootstrap_iterations = 250 self._bootstrap_sample_size = 1.0 self._value = None self._equilibration_index = None self._statistical_inefficiency = None self._uncorrelated_values = None
def _bootstrap_function(self, **sample_kwargs): """The function to perform on the data set being sampled by bootstrapping. Parameters ---------- sample_kwargs: dict of str and np.ndarray A key words dictionary of the bootstrap sample data, where the sample data is a numpy array of shape=(num_frames, num_dimensions) with dtype=float. Returns ------- float The result of evaluating the data. """ assert len(sample_kwargs) == 1 sample_data = next(iter(sample_kwargs.values())) return sample_data.mean()
[docs] def execute(self, directory, available_resources): return self._get_output_dictionary()
[docs]@register_calculation_protocol() class AverageTrajectoryProperty(AveragePropertyProtocol): """An abstract base class for protocols which will calculate the average of a property from a simulation trajectory. """ @protocol_input(str) def input_coordinate_file(self): """The file path to the starting coordinates of a trajectory.""" pass @protocol_input(str) def trajectory_path(self): """The file path to the trajectory to average over.""" pass
[docs] def __init__(self, protocol_id): super().__init__(protocol_id) self._input_coordinate_file = None self._trajectory_path = None self.trajectory = None
[docs] def execute(self, directory, available_resources): import mdtraj if self._trajectory_path is None: return PropertyEstimatorException(directory=directory, message='The AverageTrajectoryProperty protocol ' 'requires a previously calculated trajectory') self.trajectory = mdtraj.load_dcd(filename=self._trajectory_path, top=self._input_coordinate_file) return self._get_output_dictionary()
[docs]@register_calculation_protocol() class ExtractAverageStatistic(AveragePropertyProtocol): """Extracts the average value from a statistics file which was generated during a simulation. """ @protocol_input(str) def statistics_path(self): """The file path to the trajectory to average over.""" pass @protocol_input(statistics.ObservableType) def statistics_type(self): """The file path to the trajectory to average over.""" pass @protocol_input(int) def divisor(self): """A divisor to divide the statistic by. This is useful if a statistic (such as enthalpy) needs to be normalised by the number of molecules.""" pass
[docs] def __init__(self, protocol_id): super().__init__(protocol_id) self._statistics_path = None self._statistics_type = statistics.ObservableType.PotentialEnergy self._divisor = 1 self._statistics = None
[docs] def execute(self, directory, available_resources): logging.info('Extracting {}: {}'.format(self._statistics_type, self.id)) if self._statistics_path is None: return PropertyEstimatorException(directory=directory, message='The ExtractAverageStatistic protocol ' 'requires a previously calculated statistics file') self._statistics = statistics.StatisticsArray.from_pandas_csv(self.statistics_path) if self._statistics_type not in self._statistics: return PropertyEstimatorException(directory=directory, message=f'The {self._statistics_path} statistics file contains no ' f'data of type {self._statistics_type}.') values = self._statistics[self._statistics_type] statistics_unit = values[0].units unitless_values = values.to(statistics_unit).magnitude unitless_values = np.array(unitless_values) / float(self._divisor) unitless_values, self._equilibration_index, self._statistical_inefficiency = \ timeseries.decorrelate_time_series(unitless_values) final_value, final_uncertainty = bootstrap(self._bootstrap_function, self._bootstrap_iterations, self._bootstrap_sample_size, values=unitless_values) self._uncorrelated_values = unitless_values * statistics_unit self._value = EstimatedQuantity(final_value * statistics_unit, final_uncertainty * statistics_unit, self.id) logging.info('Extracted {}: {}'.format(self._statistics_type, self.id)) return self._get_output_dictionary()
[docs]@register_calculation_protocol() class ExtractUncorrelatedData(BaseProtocol): """An abstract base class for protocols which will subsample a data set, yielding only equilibrated, uncorrelated data. """ @protocol_input(int) def equilibration_index(self): """The index in the data set after which the data is stationary.""" pass @protocol_input(float) def statistical_inefficiency(self): """The statistical inefficiency in the data set.""" pass @protocol_output(int) def number_of_uncorrelated_samples(self): """The number of uncorrelated samples.""" pass
[docs] def __init__(self, protocol_id): super().__init__(protocol_id) self._equilibration_index = None self._statistical_inefficiency = None self._number_of_uncorrelated_samples = None
[docs] def execute(self, directory, available_resources): raise NotImplementedError
[docs]@register_calculation_protocol() class ExtractUncorrelatedTrajectoryData(ExtractUncorrelatedData): """A protocol which will subsample frames from a trajectory, yielding only uncorrelated frames as determined from a provided statistical inefficiency and equilibration time. """ @protocol_input(str) def input_coordinate_file(self): """The file path to the starting coordinates of a trajectory.""" pass @protocol_input(str) def input_trajectory_path(self): """The file path to the trajectory to subsample.""" pass @protocol_output(str) def output_trajectory_path(self): """The file path to the subsampled trajectory.""" pass
[docs] def __init__(self, protocol_id): super().__init__(protocol_id) self._input_coordinate_file = None self._input_trajectory_path = None self._output_trajectory_path = None
[docs] def execute(self, directory, available_resources): import mdtraj logging.info('Subsampling trajectory: {}'.format(self.id)) if self._input_trajectory_path is None: return PropertyEstimatorException(directory=directory, message='The ExtractUncorrelatedTrajectoryData protocol ' 'requires a previously calculated trajectory') trajectory = mdtraj.load_dcd(filename=self._input_trajectory_path, top=self._input_coordinate_file) trajectory = trajectory[self._equilibration_index:] uncorrelated_indices = timeseries.get_uncorrelated_indices(trajectory.n_frames, self._statistical_inefficiency) uncorrelated_trajectory = trajectory[uncorrelated_indices] self._output_trajectory_path = path.join(directory, 'uncorrelated_trajectory.dcd') uncorrelated_trajectory.save_dcd(self._output_trajectory_path) self._number_of_uncorrelated_samples = len(trajectory) logging.info('Trajectory subsampled: {}'.format(self.id)) return self._get_output_dictionary()
[docs]@register_calculation_protocol() class ExtractUncorrelatedStatisticsData(ExtractUncorrelatedData): """A protocol which will subsample entries from a statistics array, yielding only uncorrelated entries as determined from a provided statistical inefficiency and equilibration time. """ @protocol_input(str) def input_statistics_path(self): """The file path to the statistics to subsample.""" pass @protocol_output(str) def output_statistics_path(self): """The file path to the subsampled statistics.""" pass
[docs] def __init__(self, protocol_id): super().__init__(protocol_id) self._input_statistics_path = None self._output_statistics_path = None
[docs] def execute(self, directory, available_resources): logging.info('Subsampling statistics: {}'.format(self.id)) if self._input_statistics_path is None: return PropertyEstimatorException(directory=directory, message='The ExtractUncorrelatedStatisticsData protocol ' 'requires a previously calculated statisitics file') statistics_array = StatisticsArray.from_pandas_csv(self._input_statistics_path) uncorrelated_indices = timeseries.get_uncorrelated_indices(len(statistics_array) - self._equilibration_index, self._statistical_inefficiency) uncorrelated_indices = [index + self._equilibration_index for index in uncorrelated_indices] uncorrelated_statistics = StatisticsArray.from_existing(statistics_array, uncorrelated_indices) self._output_statistics_path = path.join(directory, 'uncorrelated_statistics.csv') uncorrelated_statistics.to_pandas_csv(self._output_statistics_path) logging.info('Statistics subsampled: {}'.format(self.id)) self._number_of_uncorrelated_samples = len(uncorrelated_statistics) return self._get_output_dictionary()