Source code for tesliper.glassware.conformers
"""A ``tesliper``'s main data storage."""
import logging as lgg
from collections import Counter, OrderedDict
from collections.abc import ItemsView, KeysView, ValuesView
from contextlib import contextmanager
from inspect import Parameter
from itertools import chain
from reprlib import recursive_repr
from typing import Callable, Iterable, Optional, Sequence, Union
import numpy as np
from tesliper.exceptions import InconsistentDataError, TesliperError
from .. import datawork as dw
from . import arrays as ar
from .array_base import _ARRAY_CONSTRUCTORS
# LOGGER
logger = lgg.getLogger(__name__)
logger.setLevel(lgg.DEBUG)
# TYPE HINTS
AnyArray = Union[
ar.DataArray,
ar.Energies,
ar.FloatArray,
ar.FilenamesArray,
ar.InfoArray,
ar.BooleanArray,
ar.IntegerArray,
ar.Bands,
ar.VibrationalData,
ar.ScatteringData,
ar.ElectronicData,
ar.VibrationalActivities,
ar.ScatteringActivities,
ar.ElectronicActivities,
ar.Transitions,
ar.Geometry,
]
# CLASSES
class _KeptItemsView(ItemsView):
def __init__(self, mapping, indices=False):
super().__init__(mapping)
self.indices = indices
def __contains__(self, item):
key, value = item
try:
kept = self._mapping.kept[self._mapping.index_of(key)]
except KeyError:
return False
else:
if not kept:
return False
else:
v = self._mapping[key]
return v is value or v == value
def __iter__(self):
indices = self.indices
for idx, (key, kept) in enumerate(zip(self._mapping, self._mapping.kept)):
if kept:
value = self._mapping[key]
yield key, value if not indices else (idx, key, value)
def __reversed__(self):
yield from iter(reversed(list(self)))
class _KeptValuesView(ValuesView):
def __init__(self, mapping, indices=False):
super().__init__(mapping)
self.indices = indices
def __contains__(self, value):
for key, kept in zip(self._mapping, self._mapping.kept):
v = self._mapping[key]
if (v is value or v == value) and kept:
return True
return False
def __iter__(self):
indices = self.indices
for idx, (key, kept) in enumerate(zip(self._mapping, self._mapping.kept)):
if kept:
value = self._mapping[key]
yield value if not indices else (idx, value)
def __reversed__(self):
yield from iter(reversed(list(self)))
class _KeptKeysView(KeysView):
def __init__(self, mapping, indices=False):
super().__init__(mapping)
self.indices = indices
def __contains__(self, key):
try:
return self._mapping.kept[self._mapping.index_of(key)]
except KeyError:
return False
def __iter__(self):
indices = self.indices
for idx, (key, kept) in enumerate(zip(self._mapping, self._mapping.kept)):
if kept:
yield key if not indices else (idx, key)
def __reversed__(self):
yield from iter(reversed(list(self)))
[docs]class Conformers(OrderedDict):
"""Container for data extracted from quantum chemical software output files.
Data for each file is stored in the underlying OrderedDict, under the key of
said file's name. Its values are dictionaries with genres name (as key)
and appropriate data pairs. Beside this, its essential functionality is
transformation of stored data to corresponding DataArray objects with
use of :meth:`.arrayed` method. It provides some control over this transformation,
especially in terms of including/excluding particular conformers' data
on creation of new DataArray instance. This type of control is here called
trimming. Trimming can be achieved by use of various *trim* methods defined
in this class or by direct changes to :attr:`.kept` attribute. See its
documentation for more information.
Attributes
----------
primary_genres
Class attribute. Data genres considered most important, used as default when
checking for conformers completeness (see :meth:`.trim_incomplete` method).
Notes
-----
Inherits from collections.OrderedDict.
"""
primary_genres = tuple(
"dip rot vosc vrot losc lrot raman1 roa1 scf zpe ent ten gib".split()
)
def __init__(
self,
*args,
allow_data_inconsistency: bool = False,
temperature_of_the_system: float = 298.15,
**kwargs,
):
"""
Parameters
----------
*args
list of arguments for creation of underlying dictionary
allow_data_inconsistency : bool, optional
specifies if data inconsistency should be allowed in created DataArray
object instances, defaults to False
temperature_of_the_system : float, optional
Temperature of the system in Kelvin units, must be zero or higher.
Defaults to room temperature = 298.15 K.
**kwargs
list of arbitrary keyword arguments for creation of underlying
dictionary
"""
self.allow_data_inconsistency = allow_data_inconsistency
self.temperature = temperature_of_the_system
self.kept = []
self.filenames = []
self._indices = {}
super().__init__(*args, **kwargs)
@property
def temperature(self) -> float:
"""Temperature of the system expressed in Kelvin units.
Value of this parameter is passed to :term:`data array`\\s created with the
:meth:`.arrayed` method, provided that the target data array class supports a
parameter named *t* in it's constructor.
.. versionadded:: 0.9.1
Raises
------
ValueError
if set to a value lower than zero.
"""
return vars(self)["temperature"]
@temperature.setter
def temperature(self, value):
if value <= 0:
raise ValueError(
"Temperature of the system must be higher than absolute zero."
)
vars(self)["temperature"] = value
[docs] def clear(self):
"""Remove all items from the Conformers instance."""
self._kept = []
self.filenames = []
self._indices = {}
self.temperature = 298.15
super().clear()
@recursive_repr()
def __repr__(self) -> str:
if not self:
return (
f"{self.__class__.__name__}("
f"allow_data_inconsistency={self.allow_data_inconsistency})"
)
return (
f"{self.__class__.__name__}({list(self.items())}, "
f"allow_data_inconsistency={self.allow_data_inconsistency})"
)
def __setitem__(self, key, value):
try:
value = dict(value)
except TypeError as error:
raise TypeError("Can't convert given value to dictionary.") from error
except ValueError as error:
raise ValueError("Can't convert given value to dictionary.") from error
if key in self:
index = self._indices[key]
else:
index = len(self.filenames)
self.filenames.append(key)
self.kept.append(True)
super().__setitem__(key, value)
self._indices[key] = index
def __delitem__(self, key):
index = self._indices[key]
super().__delitem__(key)
del self.filenames[index]
del self.kept[index]
del self._indices[key]
for index, key in enumerate(self.keys()):
self._indices[key] = index
[docs] def popitem(self, last=True):
"""Remove and return a (key, value) pair from the dictionary.
Pairs are returned in LIFO order if last is true or FIFO order if false.
"""
idx = -1 if last else 0
try:
key = self.filenames[idx]
return key, self.pop(key)
except IndexError:
raise KeyError("Conforemres is empty.")
[docs] def move_to_end(self, key, last=True):
"""Move an existing element to the end (or beginning if last==False).
Raises KeyError if the element does not exist.
"""
idx = self.index_of(key)
new_idx = 0 if not last else len(self.filenames) - 1
super().move_to_end(key, last)
self.filenames.insert(new_idx, self.filenames.pop(idx))
self._kept.insert(new_idx, self._kept.pop(idx))
self._indices = {k: i for i, k in enumerate(self.filenames)}
[docs] def copy(self):
"conformers.copy() -> a shallow copy of conformers"
cp = self.__class__(
allow_data_inconsistency=self.allow_data_inconsistency,
temperature_of_the_system=self.temperature,
**self,
)
cp.kept = self.kept
return cp
@property
def kept(self):
"""List of booleans, one for each conformer stored, defining if
particular conformers data should be included in corresponding DataArray
instance, created by :meth:`.arrayed` method. It may be changed by use of trim
methods, by setting its value directly, or by modification of the
underlying list. For the first option refer to those methods
documentation, for rest see the Examples section.
Returns
-------
list of bool
List of booleans, one for each conformer stored, defining if
particular conformers data should be included in corresponding
DataArray instance.
Raises
------
TypeError
If assigned values is not a sequence.
If elements of given sequence are not one of types: bool, int, str.
ValuesError
If number of given boolean values doesn't match number of contained
conformers.
KeyError
If any of given string values is not in underlying dictionary keys.
IndexError
If any of given integer values is not in range
0 <= i < number of conformers.
Examples
--------
New list of values can be set in a few ways. Firstly, it is the
most straightforward to just assign a new list of boolean values to
the :attr:`.kept` attribute. This list should have the same number of elements
as the number of conformers contained. A ValueError is raised if it
doesn't.
>>> c = Conformers(one={}, two={}, tree={})
>>> c.kept
[True, True, True]
>>> c.kept = [False, True, False]
>>> c.kept
[False, True, False]
>>> c.kept = [False, True, False, True]
Traceback (most recent call last):
...
ValueError: Must provide boolean value for each known conformer.
4 values provided, 3 excepted.
Secondly, list of filenames of conformers intended to be kept may be
given. Only these conformers will be kept. If given filename is not in
the underlying Conformers' dictionary, KeyError is raised.
>>> c.kept = ['one']
>>> c.kept
[True, False, False]
>>> c.kept = ['two', 'other']
Traceback (most recent call last):
...
KeyError: Unknown conformers: other.
Thirdly, list of integers representing conformers indices may be given.
Only conformers with specified indices will be kept. If one of given integers
can't be translated to conformer's index, IndexError is raised. Indexing with
negative values is not supported currently.
>>> c.kept = [1, 2]
>>> c.kept
[False, True, True]
>>> c.kept = [2, 3]
Traceback (most recent call last):
...
IndexError: Indexes out of bounds: 3.
Fourthly, assigning ``True`` or ``False`` to this attribute will mark all
conformers as kept or not kept respectively.
>>> c.kept = False
>>> c.kept
[False, False, False]
>>> c.kept = True
>>> c.kept
[True, True, True]
Lastly, list of kept values may be modified by setting its elements
to True or False. It is advised against, however, as mistake such as
``c.kept[:2] = [True, False, False]`` will break some functionality by
forcibly changing size of :attr:`.kept` list.
Notes
-----
Type of the first element of given sequence is used for dynamic
dispatch.
"""
# TODO: Consider making return value immutable.
return self._kept
@kept.setter
def kept(self, blade: Union[Sequence[bool], Sequence[str], Sequence[int], bool]):
if blade is True or blade is False:
self._kept = [blade for _ in self.keys()]
return
try:
first = blade[0]
except (TypeError, KeyError):
raise TypeError(f"Excepted sequence or boolean, got: {type(blade)}.")
except IndexError:
self._kept = [False for _ in self.keys()]
return # empty sequence is understood as "keep nothing"
if isinstance(first, (str, np.str_)):
blade = set(blade)
if not blade.issubset(self.keys()):
raise KeyError(f"Unknown conformers: {', '.join(blade-self.keys())}")
else:
self._kept = [fnm in blade for fnm in self.keys()]
elif isinstance(first, (bool, np.bool_)):
if not len(blade) == len(self):
raise ValueError(
f"Must provide boolean value for each known conformer. "
f"{len(blade)} values provided, {len(self)} excepted."
)
else:
self._kept = [bool(b) for b in blade] # convert from np.bool_
elif isinstance(first, (int, np.integer)):
length = len(self)
out_of_bounds = [b for b in blade if not 0 <= b < length]
if out_of_bounds:
raise IndexError(
f"Indexes out of bounds: "
f"{', '.join(str(n) for n in out_of_bounds)}."
)
else:
blade = set(blade)
self._kept = [num in blade for num in range(len(self))]
else:
raise TypeError(
f"Expected sequence of strings, integers or booleans, got: "
f"{type(first)} as first sequence's element."
)
[docs] def update(self, other=None, **kwargs):
"""Works like ``dict.update``, but if key is already present, it updates
dictionary associated with given key rather than assigning new value.
Keys of dictionary passed as positional parameter (or additional keyword
arguments given) should be conformers' identifiers and its values should be
dictionaries of {"genre": values} for those conformers.
Please note, that values of status genres like 'optimization_completed'
and 'normal_termination' will be updated as well for such key,
if are present in given new values.
"""
if other is not None:
other = dict(other)
else:
other = dict()
items = chain(other.items(), kwargs.items())
for key, value in items:
if key in self:
self[key].update(value)
else:
self[key] = value
[docs] def arrayed(
self, genre: str, full: bool = False, strict: bool = True, **kwargs
) -> AnyArray:
"""Lists requested data and returns as appropriate :class:`.DataArray` instance.
.. versionadded:: 0.9.1
The *strict* parameter.
Parameters
----------
genre
String representing data genre. Must be one of known genres.
full
Boolean indicating if full set of data should be taken, ignoring
any trimming conducted earlier. Defaults to ``False``.
strict
Boolean indicating if additional kwargs that doesn't match signature of data
array's constructor should cause an exception as normally (``strict =
True``) or be silently ignored (``strict = False``). Defaults to ``True``.
kwargs
Additional keyword parameters passed to data array constructor.
Any explicitly given parameters will take precedence over automatically
retrieved and default values.
Returns
-------
DataArray
Arrayed data of desired genre as appropriate :class:`.DataArray` object.
Notes
-----
For now, the special "filenames" genre always ignores *kwargs*.
"""
try:
cls = _ARRAY_CONSTRUCTORS[genre] # ArrayBase subclasses
except KeyError:
raise ValueError(f"Unknown genre '{genre}'.")
if genre == "filenames":
# return early if filenames requested
return cls(
genre=genre,
filenames=list(self.kept_keys() if not full else self.keys()),
allow_data_inconsistency=self.allow_data_inconsistency,
)
view = self.kept_items() if not full else self.items()
array = ((fname, conf, conf[genre]) for fname, conf in view if genre in conf)
try:
filenames, confs, values = zip(*array)
except ValueError: # if no elements in `array`
logger.debug(
f"Array of gerne {genre} requested, but no such data available "
f"or conformers providing this data were trimmed off. "
f"Returning an empty array."
)
filenames, confs, values = [], [], []
default_params = cls.get_init_params()
default_params["genre"] = genre
default_params["filenames"] = filenames
default_params["values"] = values
default_params["allow_data_inconsistency"] = self.allow_data_inconsistency
init_params = {}
for key, value in default_params.items():
if key in kwargs:
# explicitly given keyword parameters take precedence
init_params[key] = kwargs.pop(key)
continue
if key == "t":
# if not given explicitly, temperature is taken form self
init_params[key] = self.temperature
continue
if not isinstance(default_params[key], Parameter):
# if value for parameter is already established, just take it
init_params[key] = value
continue
param_genre = ( # maybe ``key`` is not a param's genre name
value.genre_getter(genre) if hasattr(value, "genre_getter") else key
)
try:
init_params[key] = [conf[param_genre] for conf in confs]
except KeyError:
# can't retrieve ``param_genre`` data from each included conformer
# set param to its default value or raise an error if it don't have one
if value.default is not value.empty:
init_params[key] = value.default
else:
raise TesliperError(
f"One or more conformers does not provide value for "
f"'{param_genre}' genre, needed to instantiate {cls.__name__} "
"object. You may provide missing values as a keyword parameters"
" to the `Conformers.arrayed()` method call."
)
if not init_params[key] and value.default is not value.empty:
# genre produces an empty array, but parameter has default value
init_params[key] = value.default
if kwargs and strict:
# any kwargs not popped till now are not expected by the ``cls.__init__()``
# if ``strict`` handling requested, add them anyway to cause an exception
init_params.update(**kwargs)
return cls(**init_params)
[docs] def by_index(self, index: int) -> dict:
"""Returns data for conformer on desired index."""
return self[self.filenames[index]]
[docs] def key_of(self, index: int) -> str:
"""Returns name of conformer associated with given index."""
return self.filenames[index]
[docs] def index_of(self, key: str) -> int:
"""Return index of given key."""
try:
return self._indices[key]
except KeyError as error:
raise KeyError(f"No such conformer: {key}.") from error
[docs] def has_genre(self, genre: str, ignore_trimming: bool = False) -> bool:
"""Checks if any of stored conformers contains data of given genre.
Parameters
----------
genre : str
Name of genre to test.
ignore_trimming : bool
If all known conformers should be considered (``ignore_trimming = True``)
or only kept ones (``ignore_trimming = False``, default).
Returns
-------
bool
Boolean value indicating if any of stored conformers contains data
of genre in question."""
conformers = self.values() if ignore_trimming else self.kept_values()
for conformer in conformers:
if genre in conformer:
return True
return False
[docs] def has_any_genre(
self, genres: Iterable[str], ignore_trimming: bool = False
) -> bool:
"""Checks if any of stored conformers contains data of any of given
genres.
Parameters
----------
genres : iterable of str
List of names of genres to test.
ignore_trimming : bool
If all known conformers should be considered (``ignore_trimming = True``)
or only kept ones (``ignore_trimming = False``, default).
Returns
-------
bool
Boolean value indicating if any of stored conformers contains data
of any of genres in question."""
conformers = self.values() if ignore_trimming else self.kept_values()
for conformer in conformers:
for genre in genres:
if genre in conformer:
return True
return False
[docs] def all_have_genres(
self, genres: Iterable[str], ignore_trimming: bool = False
) -> bool:
"""Checks if all stored conformers contains data of given genres.
Parameters
----------
genres : iterable of str
List of names of genres to test.
ignore_trimming : bool
If all known conformers should be considered (``ignore_trimming = True``)
or only kept ones (``ignore_trimming = False``, default).
Returns
-------
bool
Boolean value indicating if each stored conformers contains data
of all genres in question."""
genres = set(genres)
conformers = self.values() if ignore_trimming else self.kept_values()
for conformer in conformers:
if genres - conformer.keys():
return False
return True
[docs] def trim_incomplete(
self, wanted: Optional[Iterable[str]] = None, strict: bool = False
) -> None:
"""Mark incomplete conformers as "not kept".
Conformers that does not contain one or more data genres specified as *wanted*
will be marked as "not kept". If *wanted* parameter is not given, it evaluates
to :attr:`.primary_genres`. If no conformer contains all *wanted* genres,
conformers that match the specification most closely are kept. The "closeness"
is defined by number of conformer's genres matching *wanted* genres in the first
place (the more, the better) and the position of particular genre in *wanted*
list in the second place (the closer to the beginning, the better). This
"match closest" behaviour may be turned off by setting parameter
*strict* to ``True``. In such case, only conformers containing all *wanted*
genres will be kept.
Parameters
----------
wanted
List of data genres used as completeness reference.
If not given, evaluates to :attr:`.primary_genres`.
strict
Indicates if all *wanted* genres must be present in the kept conformers
(``strict=True``) or if "match closest" mechanism should be used
as a fallback (``strict=False``, this is the default).
Notes
-----
Conformers previously marked as "not kept" will not be affected.
"""
wanted = wanted if wanted is not None else self.primary_genres
if not strict:
count = [tuple(g in conf for g in wanted) for conf in self.values()]
if not count:
return
best_match = max(count)
complete = (match == best_match for match in count)
else:
complete = (all(g in conf for g in wanted) for conf in self.values())
blade = [kept and cmpl for kept, cmpl in zip(self.kept, complete)]
self._kept = blade
[docs] def trim_imaginary_frequencies(self) -> None:
"""Mark all conformers with imaginary frequencies as "not kept".
Notes
-----
Conformers previously marked as "not kept" will not be affected.
Conformers that doesn't contain "freq" genre will be treated as not having
imaginary frequencies.
"""
dummy = [1]
for index, conf in enumerate(self.values()):
freq = np.array(conf.get("freq", dummy))
if (freq < 0).any():
self._kept[index] = False
[docs] def trim_non_matching_stoichiometry(self, wanted: Optional[str] = None) -> None:
"""Mark all conformers with stoichiometry other than *wanted* as "not kept".
If not given, *wanted* evaluates to the most common stoichiometry.
Parameters
----------
wanted
Only conformers with same stoichiometry will be kept. Evaluates to the most
common stoichiometry if not given.
Notes
-----
Conformers previously marked as "not kept" will not be affected.
Conformers that doesn't contain stoichiometry data are always treated
as non-matching.
"""
if not wanted:
counter = Counter(
conf["stoichiometry"]
for conf in self.values()
if "stoichiometry" in conf
)
counts = counter.most_common()
wanted = counts[0][0] if counts else "" # no conformer has "stoichiometry"
for index, conf in enumerate(self.values()):
if "stoichiometry" not in conf or not conf["stoichiometry"] == wanted:
self._kept[index] = False
[docs] def trim_not_optimized(self) -> None:
"""Mark all conformers that failed structure optimization as "not kept".
Notes
-----
Conformers previously marked as "not kept" will not be affected.
Conformers that doesn't contain optimization data are always treated as
optimized.
"""
for index, conf in enumerate(self.values()):
if not conf.get("optimization_completed", True):
self._kept[index] = False
[docs] def trim_non_normal_termination(self) -> None:
"""Mark all conformers, which calculation job did not terminate normally,
as "not kept".
Notes
-----
Conformers previously marked as "not kept" will not be affected.
Conformers that doesn't contain data regarding their calculation job's
termination are always treated as terminated abnormally.
"""
for index, conf in enumerate(self.values()):
if not conf.get("normal_termination", False):
self._kept[index] = False
[docs] def trim_inconsistent_sizes(self) -> None:
"""Mark as "not kept" all conformers that contain any iterable data genre,
that is of different length, than in case of majority of conformers.
Examples
--------
>>> c = Conformers(
... one={'a': [1, 2, 3]},
... two={'a': [1, 2, 3]},
... three={'a': [1, 2, 3, 4]}
... )
>>> c.kept
[True, True, True]
>>> c.trim_inconsistent_sizes()
>>> c.kept
[True, True, False]
Notes
-----
Conformers previously marked as "not kept" will not be affected.
"""
sizes = {}
for fname, conf in self.items():
for genre, value in conf.items():
if isinstance(value, (np.ndarray, list, tuple)):
sizes.setdefault(genre, {})[fname] = len(value)
maxes = {
genre: Counter(v for v in values.values()).most_common()[0][0]
for genre, values in sizes.items()
}
for index, fname in enumerate(self.keys()):
for genre, most_common in maxes.items():
confs = sizes[genre]
if fname in confs and not confs[fname] == most_common:
self._kept[index] = False
[docs] def trim_to_range(
self,
genre: str,
minimum: Union[int, float] = float("-inf"),
maximum: Union[int, float] = float("inf"),
attribute: str = "values",
) -> None:
"""Marks as "not kept" all conformers, which numeric value of data
of specified genre is outside of the range specified by *minimum*
and *maximum* values.
Parameters
----------
genre
Name of genre that should be compared to specified
minimum and maximum values.
minimum
Minimal accepted value - every conformer, which genre value evaluates
to less than *minimum* will be marked as "not kept".
Defaults to ``float(-inf)``.
maximum
Maximal accepted value - every conformer, which genre value evaluates
to more than *maximum* will be marked as "not kept".
Defaults to ``float(inf)``.
attribute
Attribute of DataArray of specified *genre* that contains one-dimensional
array of numeric values. defaults to `"values"`.
Raises
------
AttributeError
If DataArray associated with *genre* genre has no attribute *attribute*.
ValueError
If data retrieved from specified genre's attribute is not in the form of
one-dimensional array.
TypeError
If comparision cannot be made between elements of specified genre's
attribute and *minimum* or *maximum* values.
Notes
-----
Conformers previously marked as "not kept" will not be affected.
"""
try:
arr = self.arrayed(genre)
atr = getattr(arr, attribute)
except AttributeError as error:
raise AttributeError(
f"Invalid genre/attribute combination: {genre}/{attribute}. "
f"Resulting DataArray object has no attribute {attribute}."
) from error
values = np.asarray(atr)
if values.ndim != 1:
raise ValueError(
f"Invalid genre/attribute combination: {genre}/{attribute}. "
f"DataArray's attribute must contain one-dimensional array of values."
)
try:
in_range = (minimum <= values) & (values <= maximum)
except TypeError as error:
raise TypeError(
f"Cannot compare {type(minimum)} with {type(values[0])}."
) from error
self.kept = arr.filenames[in_range]
[docs] def trim_rmsd(
self,
threshold: Union[int, float],
window_size: Optional[Union[int, float]],
geometry_genre: str = "last_read_geom",
energy_genre: str = "scf",
ignore_hydrogen: bool = True,
moving_window_strategy: Callable = dw.stretching_windows,
) -> None:
"""Marks as "not kept" all conformers that are identical with some other
conformer, judging by a provided RMSD threshold.
To minimize computation cost, conformers are compared inside windows, that is a
subsets of the original list of conformers. Those windows are generated by the
*moving_window_strategy* function. The recommended strategy, and a default
value, is :func:`.streaching_windows`, but other are also available:
:func:`.fixed_windows` and :func:`.pyramid_windows`. This function will be
called with list of energies for conformers compared and (if it is not ``None``)
*window_size* parameter.
With default *moving_window_strategy* conformers, which energy difference (dE)
is higher than given *window_size* are always treated as different, while those
with dE smaller than *window_size* and RMSD value smaller than given *threshold*
are considered identical. From two identical conformers, the one with lower
energy is "kept", and the other is discarded (marked as "not kept").
Notes
-----
RMSD threshold and size of the energy window should be chosen depending on the
parameters of conformers' set: number of conformers, size of the conformer,
its lability, etc. However, *threshold* of 0.5 angstrom and *window_size*
of 5 to 10 kcal/mol is a good place to start if in doubt.
Parameters
----------
threshold : int or float
Maximum RMSD value to consider conformers identical.
window_size : int or float
Size of the energy window, in kcal/mol, inside which RMSD matrix is
calculated. Essentially, a difference in conformers' energy, after which
conformers are always considered different.
geometry_genre : str
Genre of geometry used to calculate RMSD matrix. "last_read_geom" is
default.
energy_genre : str
Genre of energy used to sort and group conformers into windows of given
energy size. "scf" is used by default.
ignore_hydrogen : bool
If hydrogen atom should be discarded before RMSD calculation.
Defaults to ``True``.
moving_window_strategy : callable
Function that generates windows, inside which RMSD comparisions is
performed.
Raises
------
InconsistentDataError
If requested genres does not provide the same set of conformers.
ValueError
When called with ``ignore_hydrogen=True`` but requested
:attr:`.Geometry.atoms` cannot be collapsed to 1-D array.
"""
energy = self.arrayed(energy_genre)
geometry = self.arrayed(geometry_genre)
if not energy.filenames.size == geometry.filenames.size:
raise InconsistentDataError(
"Unequal number of conformers in requested geometry and energy genres. "
"Trim incomplete entries before trimming with :meth:`.trim_rmds`."
)
elif not np.array_equal(energy.filenames, geometry.filenames):
raise InconsistentDataError(
"Different conformers in requested geometry and energy genres. "
"Trim incomplete entries before trimming with :meth:`.trim_rmds`."
)
if not geometry:
return # next steps assume there are some conformers
if ignore_hydrogen and geometry.atoms.shape[0] > 1:
# TODO: remove when dw.geometry.select_atoms supplemented
raise ValueError(
"Cannot ignore hydrogen atoms if requested conformers do not have "
"the same order of atoms. This functionality is not supported yet."
)
geom = (
dw.drop_atoms(geometry.values, geometry.atoms[0], dw.atoms.Atom.H)
if ignore_hydrogen
else geometry.values
)
if window_size is None:
windows = moving_window_strategy(energy.as_kcal_per_mol)
else:
windows = moving_window_strategy(energy.as_kcal_per_mol, window_size)
wanted = dw.rmsd_sieve(geom, windows, threshold)
self.kept = geometry.filenames[wanted]
[docs] def select_all(self) -> None:
"""Marks all conformers as 'kept'. Equivalent to ``conformers.kept = True``."""
self._kept = [True for _ in self._kept]
[docs] def reject_all(self) -> None:
"""Marks all conformers as 'not kept'. Equivalent to
``conformers.kept = False``.
"""
self._kept = [False for _ in self._kept]
[docs] def kept_keys(self, indices: bool = False) -> _KeptKeysView:
"""Equivalent of ``dict.keys()`` but gives view only on conformers marked
as "kept". Returned view may also provide information on conformers index
in its Conformers instance if requested with ``indices=True``.
>>> c = Conformers(c1={"g": 0.1}, c2={"g": 0.2}, c3={"g": 0.3}}
>>> c.kept = [True, False, True]
>>> list(c.kept_keys())
["c1", "c3"]
>>> list(c.kept_keys(indices=True))
[(0, "c1"}), (2, "c3")]
Parameters
----------
indices : bool
If resulting Conformers view should also provide index of each conformer.
Defaults to False.
Returns
-------
_KeptKeysView
View of kept conformers.
"""
return _KeptKeysView(self, indices=indices)
[docs] def kept_values(self, indices: bool = False) -> _KeptValuesView:
"""Equivalent of ``dict.values()`` but gives view only on conformers marked
as "kept". Returned view may also provide information on conformers index
in its Conformers instance if requested with ``indices=True``.
>>> c = Conformers(c1={"g": 0.1}, c2={"g": 0.2}, c3={"g": 0.3}}
>>> c.kept = [True, False, True]
>>> list(c.kept_values())
[{"g": 0.1}, {"g": 0.3}]
>>> list(c.kept_values(indices=True))
[(0, {"g": 0.1}), (2, {"g": 0.3})]
Parameters
----------
indices : bool
If resulting Conformers view should also provide index of each conformer.
Defaults to False.
Returns
-------
_KeptValuesView
View of kept conformers.
"""
return _KeptValuesView(self, indices=indices)
[docs] def kept_items(self, indices: bool = False) -> _KeptItemsView:
"""Equivalent of ``dict.items()`` but gives view only on conformers marked
as "kept". Returned view may also provide information on conformers index
in its Conformers instance if requested with ``indices=True``.
>>> c = Conformers(c1={"g": 0.1}, c2={"g": 0.2}, c3={"g": 0.3}}
>>> c.kept = [True, False, True]
>>> list(c.kept_items())
[("c1", {"g": 0.1}), ("c3", {"g": 0.3})]
>>> list(c.kept_items(indices=True))
[(0, "c1", {"g": 0.1}), (2, "c3", {"g": 0.3})]
Parameters
----------
indices : bool
If resulting Conformers view should also provide index of each conformer.
Defaults to False.
Returns
-------
_KeptItemsView
View of kept conformers.
"""
return _KeptItemsView(self, indices=indices)
@property
@contextmanager
def untrimmed(self) -> "Conformers":
"""Temporally remove trimming. Implemented as context manager to use with
python's 'with' keyword.
Examples
--------
>>> c = Conformers(one={}, two={}, tree={})
>>> c.kept = [False, True, False]
>>> with c.untrimmed:
>>> c.kept
[True, True, True]
>>> c.kept
[False, True, False]
"""
blade = self._kept
self.kept = True
yield self
self._kept = blade
[docs] @contextmanager
def trimmed_to(
self, blade: Union[Sequence[bool], Sequence[str], Sequence[int], bool]
) -> "Conformers":
"""Temporally set trimming blade to given one. Implemented as context manager
to use with python's 'with' keyword.
Parameters
----------
blade : bool or sequence of bool, str, or int
Temporary trimming blade. To better understand how blade setting works,
see Conformers.kept documentation.
Examples
--------
>>> c = Conformers(one={}, two={}, tree={})
>>> c.kept = [True, True, False]
>>> with c.trimmed_to([1, 2]):
>>> c.kept
[False, True, True]
>>> c.kept
[True, True, False]
"""
old_blade = self._kept
self.kept = blade
yield self
self._kept = old_blade
@property
@contextmanager
def inconsistency_allowed(self) -> "Conformers":
"""Temporally sets Conformers' 'allow_data_inconsistency' attribute
to true. Implemented as context manager to use with python's 'with' keyword.
Examples
--------
>>> c = Conformers(...)
>>> with c.inconsistency_allowed:
>>> # do stuff here while c.allow_data_inconsistency is True
>>> c.allow_data_inconsistency
True
>>> c.allow_data_inconsistency
False
"""
inconsistency = self.allow_data_inconsistency
self.allow_data_inconsistency = True
yield self
self.allow_data_inconsistency = inconsistency