Source code for tesliper.glassware.conformers

"""A ``tesliper``'s main data storage."""
import logging as lgg
from collections import Counter, OrderedDict
from collections.abc import ItemsView, KeysView, ValuesView
from contextlib import contextmanager
from inspect import Parameter
from itertools import chain
from reprlib import recursive_repr
from typing import Callable, Iterable, Optional, Sequence, Union

import numpy as np

from tesliper.exceptions import InconsistentDataError, TesliperError

from .. import datawork as dw
from . import arrays as ar
from .array_base import _ARRAY_CONSTRUCTORS

# LOGGER
logger = lgg.getLogger(__name__)
logger.setLevel(lgg.DEBUG)


# TYPE HINTS
AnyArray = Union[
    ar.DataArray,
    ar.Energies,
    ar.FloatArray,
    ar.FilenamesArray,
    ar.InfoArray,
    ar.BooleanArray,
    ar.IntegerArray,
    ar.Bands,
    ar.VibrationalData,
    ar.ScatteringData,
    ar.ElectronicData,
    ar.VibrationalActivities,
    ar.ScatteringActivities,
    ar.ElectronicActivities,
    ar.Transitions,
    ar.Geometry,
]


# CLASSES
class _KeptItemsView(ItemsView):
    def __init__(self, mapping, indices=False):
        super().__init__(mapping)
        self.indices = indices

    def __contains__(self, item):
        key, value = item
        try:
            kept = self._mapping.kept[self._mapping.index_of(key)]
        except KeyError:
            return False
        else:
            if not kept:
                return False
            else:
                v = self._mapping[key]
                return v is value or v == value

    def __iter__(self):
        indices = self.indices
        for idx, (key, kept) in enumerate(zip(self._mapping, self._mapping.kept)):
            if kept:
                value = self._mapping[key]
                yield key, value if not indices else (idx, key, value)

    def __reversed__(self):
        yield from iter(reversed(list(self)))


class _KeptValuesView(ValuesView):
    def __init__(self, mapping, indices=False):
        super().__init__(mapping)
        self.indices = indices

    def __contains__(self, value):
        for key, kept in zip(self._mapping, self._mapping.kept):
            v = self._mapping[key]
            if (v is value or v == value) and kept:
                return True
        return False

    def __iter__(self):
        indices = self.indices
        for idx, (key, kept) in enumerate(zip(self._mapping, self._mapping.kept)):
            if kept:
                value = self._mapping[key]
                yield value if not indices else (idx, value)

    def __reversed__(self):
        yield from iter(reversed(list(self)))


class _KeptKeysView(KeysView):
    def __init__(self, mapping, indices=False):
        super().__init__(mapping)
        self.indices = indices

    def __contains__(self, key):
        try:
            return self._mapping.kept[self._mapping.index_of(key)]
        except KeyError:
            return False

    def __iter__(self):
        indices = self.indices
        for idx, (key, kept) in enumerate(zip(self._mapping, self._mapping.kept)):
            if kept:
                yield key if not indices else (idx, key)

    def __reversed__(self):
        yield from iter(reversed(list(self)))


[docs]class Conformers(OrderedDict): """Container for data extracted from quantum chemical software output files. Data for each file is stored in the underlying OrderedDict, under the key of said file's name. Its values are dictionaries with genres name (as key) and appropriate data pairs. Beside this, its essential functionality is transformation of stored data to corresponding DataArray objects with use of :meth:`.arrayed` method. It provides some control over this transformation, especially in terms of including/excluding particular conformers' data on creation of new DataArray instance. This type of control is here called trimming. Trimming can be achieved by use of various *trim* methods defined in this class or by direct changes to :attr:`.kept` attribute. See its documentation for more information. Attributes ---------- primary_genres Class attribute. Data genres considered most important, used as default when checking for conformers completeness (see :meth:`.trim_incomplete` method). Notes ----- Inherits from collections.OrderedDict. """ primary_genres = tuple( "dip rot vosc vrot losc lrot raman1 roa1 scf zpe ent ten gib".split() ) def __init__( self, *args, allow_data_inconsistency: bool = False, temperature_of_the_system: float = 298.15, **kwargs, ): """ Parameters ---------- *args list of arguments for creation of underlying dictionary allow_data_inconsistency : bool, optional specifies if data inconsistency should be allowed in created DataArray object instances, defaults to False temperature_of_the_system : float, optional Temperature of the system in Kelvin units, must be zero or higher. Defaults to room temperature = 298.15 K. **kwargs list of arbitrary keyword arguments for creation of underlying dictionary """ self.allow_data_inconsistency = allow_data_inconsistency self.temperature = temperature_of_the_system self.kept = [] self.filenames = [] self._indices = {} super().__init__(*args, **kwargs) @property def temperature(self) -> float: """Temperature of the system expressed in Kelvin units. Value of this parameter is passed to :term:`data array`\\s created with the :meth:`.arrayed` method, provided that the target data array class supports a parameter named *t* in it's constructor. .. versionadded:: 0.9.1 Raises ------ ValueError if set to a value lower than zero. """ return vars(self)["temperature"] @temperature.setter def temperature(self, value): if value <= 0: raise ValueError( "Temperature of the system must be higher than absolute zero." ) vars(self)["temperature"] = value
[docs] def clear(self): """Remove all items from the Conformers instance.""" self._kept = [] self.filenames = [] self._indices = {} self.temperature = 298.15 super().clear()
@recursive_repr() def __repr__(self) -> str: if not self: return ( f"{self.__class__.__name__}(" f"allow_data_inconsistency={self.allow_data_inconsistency})" ) return ( f"{self.__class__.__name__}({list(self.items())}, " f"allow_data_inconsistency={self.allow_data_inconsistency})" ) def __setitem__(self, key, value): try: value = dict(value) except TypeError as error: raise TypeError("Can't convert given value to dictionary.") from error except ValueError as error: raise ValueError("Can't convert given value to dictionary.") from error if key in self: index = self._indices[key] else: index = len(self.filenames) self.filenames.append(key) self.kept.append(True) super().__setitem__(key, value) self._indices[key] = index def __delitem__(self, key): index = self._indices[key] super().__delitem__(key) del self.filenames[index] del self.kept[index] del self._indices[key] for index, key in enumerate(self.keys()): self._indices[key] = index
[docs] def popitem(self, last=True): """Remove and return a (key, value) pair from the dictionary. Pairs are returned in LIFO order if last is true or FIFO order if false. """ idx = -1 if last else 0 try: key = self.filenames[idx] return key, self.pop(key) except IndexError: raise KeyError("Conforemres is empty.")
[docs] def move_to_end(self, key, last=True): """Move an existing element to the end (or beginning if last==False). Raises KeyError if the element does not exist. """ idx = self.index_of(key) new_idx = 0 if not last else len(self.filenames) - 1 super().move_to_end(key, last) self.filenames.insert(new_idx, self.filenames.pop(idx)) self._kept.insert(new_idx, self._kept.pop(idx)) self._indices = {k: i for i, k in enumerate(self.filenames)}
[docs] def copy(self): "conformers.copy() -> a shallow copy of conformers" cp = self.__class__( allow_data_inconsistency=self.allow_data_inconsistency, temperature_of_the_system=self.temperature, **self, ) cp.kept = self.kept return cp
@property def kept(self): """List of booleans, one for each conformer stored, defining if particular conformers data should be included in corresponding DataArray instance, created by :meth:`.arrayed` method. It may be changed by use of trim methods, by setting its value directly, or by modification of the underlying list. For the first option refer to those methods documentation, for rest see the Examples section. Returns ------- list of bool List of booleans, one for each conformer stored, defining if particular conformers data should be included in corresponding DataArray instance. Raises ------ TypeError If assigned values is not a sequence. If elements of given sequence are not one of types: bool, int, str. ValuesError If number of given boolean values doesn't match number of contained conformers. KeyError If any of given string values is not in underlying dictionary keys. IndexError If any of given integer values is not in range 0 <= i < number of conformers. Examples -------- New list of values can be set in a few ways. Firstly, it is the most straightforward to just assign a new list of boolean values to the :attr:`.kept` attribute. This list should have the same number of elements as the number of conformers contained. A ValueError is raised if it doesn't. >>> c = Conformers(one={}, two={}, tree={}) >>> c.kept [True, True, True] >>> c.kept = [False, True, False] >>> c.kept [False, True, False] >>> c.kept = [False, True, False, True] Traceback (most recent call last): ... ValueError: Must provide boolean value for each known conformer. 4 values provided, 3 excepted. Secondly, list of filenames of conformers intended to be kept may be given. Only these conformers will be kept. If given filename is not in the underlying Conformers' dictionary, KeyError is raised. >>> c.kept = ['one'] >>> c.kept [True, False, False] >>> c.kept = ['two', 'other'] Traceback (most recent call last): ... KeyError: Unknown conformers: other. Thirdly, list of integers representing conformers indices may be given. Only conformers with specified indices will be kept. If one of given integers can't be translated to conformer's index, IndexError is raised. Indexing with negative values is not supported currently. >>> c.kept = [1, 2] >>> c.kept [False, True, True] >>> c.kept = [2, 3] Traceback (most recent call last): ... IndexError: Indexes out of bounds: 3. Fourthly, assigning ``True`` or ``False`` to this attribute will mark all conformers as kept or not kept respectively. >>> c.kept = False >>> c.kept [False, False, False] >>> c.kept = True >>> c.kept [True, True, True] Lastly, list of kept values may be modified by setting its elements to True or False. It is advised against, however, as mistake such as ``c.kept[:2] = [True, False, False]`` will break some functionality by forcibly changing size of :attr:`.kept` list. Notes ----- Type of the first element of given sequence is used for dynamic dispatch. """ # TODO: Consider making return value immutable. return self._kept @kept.setter def kept(self, blade: Union[Sequence[bool], Sequence[str], Sequence[int], bool]): if blade is True or blade is False: self._kept = [blade for _ in self.keys()] return try: first = blade[0] except (TypeError, KeyError): raise TypeError(f"Excepted sequence or boolean, got: {type(blade)}.") except IndexError: self._kept = [False for _ in self.keys()] return # empty sequence is understood as "keep nothing" if isinstance(first, (str, np.str_)): blade = set(blade) if not blade.issubset(self.keys()): raise KeyError(f"Unknown conformers: {', '.join(blade-self.keys())}") else: self._kept = [fnm in blade for fnm in self.keys()] elif isinstance(first, (bool, np.bool_)): if not len(blade) == len(self): raise ValueError( f"Must provide boolean value for each known conformer. " f"{len(blade)} values provided, {len(self)} excepted." ) else: self._kept = [bool(b) for b in blade] # convert from np.bool_ elif isinstance(first, (int, np.integer)): length = len(self) out_of_bounds = [b for b in blade if not 0 <= b < length] if out_of_bounds: raise IndexError( f"Indexes out of bounds: " f"{', '.join(str(n) for n in out_of_bounds)}." ) else: blade = set(blade) self._kept = [num in blade for num in range(len(self))] else: raise TypeError( f"Expected sequence of strings, integers or booleans, got: " f"{type(first)} as first sequence's element." )
[docs] def update(self, other=None, **kwargs): """Works like ``dict.update``, but if key is already present, it updates dictionary associated with given key rather than assigning new value. Keys of dictionary passed as positional parameter (or additional keyword arguments given) should be conformers' identifiers and its values should be dictionaries of {"genre": values} for those conformers. Please note, that values of status genres like 'optimization_completed' and 'normal_termination' will be updated as well for such key, if are present in given new values. """ if other is not None: other = dict(other) else: other = dict() items = chain(other.items(), kwargs.items()) for key, value in items: if key in self: self[key].update(value) else: self[key] = value
[docs] def arrayed( self, genre: str, full: bool = False, strict: bool = True, **kwargs ) -> AnyArray: """Lists requested data and returns as appropriate :class:`.DataArray` instance. .. versionadded:: 0.9.1 The *strict* parameter. Parameters ---------- genre String representing data genre. Must be one of known genres. full Boolean indicating if full set of data should be taken, ignoring any trimming conducted earlier. Defaults to ``False``. strict Boolean indicating if additional kwargs that doesn't match signature of data array's constructor should cause an exception as normally (``strict = True``) or be silently ignored (``strict = False``). Defaults to ``True``. kwargs Additional keyword parameters passed to data array constructor. Any explicitly given parameters will take precedence over automatically retrieved and default values. Returns ------- DataArray Arrayed data of desired genre as appropriate :class:`.DataArray` object. Notes ----- For now, the special "filenames" genre always ignores *kwargs*. """ try: cls = _ARRAY_CONSTRUCTORS[genre] # ArrayBase subclasses except KeyError: raise ValueError(f"Unknown genre '{genre}'.") if genre == "filenames": # return early if filenames requested return cls( genre=genre, filenames=list(self.kept_keys() if not full else self.keys()), allow_data_inconsistency=self.allow_data_inconsistency, ) view = self.kept_items() if not full else self.items() array = ((fname, conf, conf[genre]) for fname, conf in view if genre in conf) try: filenames, confs, values = zip(*array) except ValueError: # if no elements in `array` logger.debug( f"Array of gerne {genre} requested, but no such data available " f"or conformers providing this data were trimmed off. " f"Returning an empty array." ) filenames, confs, values = [], [], [] default_params = cls.get_init_params() default_params["genre"] = genre default_params["filenames"] = filenames default_params["values"] = values default_params["allow_data_inconsistency"] = self.allow_data_inconsistency init_params = {} for key, value in default_params.items(): if key in kwargs: # explicitly given keyword parameters take precedence init_params[key] = kwargs.pop(key) continue if key == "t": # if not given explicitly, temperature is taken form self init_params[key] = self.temperature continue if not isinstance(default_params[key], Parameter): # if value for parameter is already established, just take it init_params[key] = value continue param_genre = ( # maybe ``key`` is not a param's genre name value.genre_getter(genre) if hasattr(value, "genre_getter") else key ) try: init_params[key] = [conf[param_genre] for conf in confs] except KeyError: # can't retrieve ``param_genre`` data from each included conformer # set param to its default value or raise an error if it don't have one if value.default is not value.empty: init_params[key] = value.default else: raise TesliperError( f"One or more conformers does not provide value for " f"'{param_genre}' genre, needed to instantiate {cls.__name__} " "object. You may provide missing values as a keyword parameters" " to the `Conformers.arrayed()` method call." ) if not init_params[key] and value.default is not value.empty: # genre produces an empty array, but parameter has default value init_params[key] = value.default if kwargs and strict: # any kwargs not popped till now are not expected by the ``cls.__init__()`` # if ``strict`` handling requested, add them anyway to cause an exception init_params.update(**kwargs) return cls(**init_params)
[docs] def by_index(self, index: int) -> dict: """Returns data for conformer on desired index.""" return self[self.filenames[index]]
[docs] def key_of(self, index: int) -> str: """Returns name of conformer associated with given index.""" return self.filenames[index]
[docs] def index_of(self, key: str) -> int: """Return index of given key.""" try: return self._indices[key] except KeyError as error: raise KeyError(f"No such conformer: {key}.") from error
[docs] def has_genre(self, genre: str, ignore_trimming: bool = False) -> bool: """Checks if any of stored conformers contains data of given genre. Parameters ---------- genre : str Name of genre to test. ignore_trimming : bool If all known conformers should be considered (``ignore_trimming = True``) or only kept ones (``ignore_trimming = False``, default). Returns ------- bool Boolean value indicating if any of stored conformers contains data of genre in question.""" conformers = self.values() if ignore_trimming else self.kept_values() for conformer in conformers: if genre in conformer: return True return False
[docs] def has_any_genre( self, genres: Iterable[str], ignore_trimming: bool = False ) -> bool: """Checks if any of stored conformers contains data of any of given genres. Parameters ---------- genres : iterable of str List of names of genres to test. ignore_trimming : bool If all known conformers should be considered (``ignore_trimming = True``) or only kept ones (``ignore_trimming = False``, default). Returns ------- bool Boolean value indicating if any of stored conformers contains data of any of genres in question.""" conformers = self.values() if ignore_trimming else self.kept_values() for conformer in conformers: for genre in genres: if genre in conformer: return True return False
[docs] def all_have_genres( self, genres: Iterable[str], ignore_trimming: bool = False ) -> bool: """Checks if all stored conformers contains data of given genres. Parameters ---------- genres : iterable of str List of names of genres to test. ignore_trimming : bool If all known conformers should be considered (``ignore_trimming = True``) or only kept ones (``ignore_trimming = False``, default). Returns ------- bool Boolean value indicating if each stored conformers contains data of all genres in question.""" genres = set(genres) conformers = self.values() if ignore_trimming else self.kept_values() for conformer in conformers: if genres - conformer.keys(): return False return True
[docs] def trim_incomplete( self, wanted: Optional[Iterable[str]] = None, strict: bool = False ) -> None: """Mark incomplete conformers as "not kept". Conformers that does not contain one or more data genres specified as *wanted* will be marked as "not kept". If *wanted* parameter is not given, it evaluates to :attr:`.primary_genres`. If no conformer contains all *wanted* genres, conformers that match the specification most closely are kept. The "closeness" is defined by number of conformer's genres matching *wanted* genres in the first place (the more, the better) and the position of particular genre in *wanted* list in the second place (the closer to the beginning, the better). This "match closest" behaviour may be turned off by setting parameter *strict* to ``True``. In such case, only conformers containing all *wanted* genres will be kept. Parameters ---------- wanted List of data genres used as completeness reference. If not given, evaluates to :attr:`.primary_genres`. strict Indicates if all *wanted* genres must be present in the kept conformers (``strict=True``) or if "match closest" mechanism should be used as a fallback (``strict=False``, this is the default). Notes ----- Conformers previously marked as "not kept" will not be affected. """ wanted = wanted if wanted is not None else self.primary_genres if not strict: count = [tuple(g in conf for g in wanted) for conf in self.values()] if not count: return best_match = max(count) complete = (match == best_match for match in count) else: complete = (all(g in conf for g in wanted) for conf in self.values()) blade = [kept and cmpl for kept, cmpl in zip(self.kept, complete)] self._kept = blade
[docs] def trim_imaginary_frequencies(self) -> None: """Mark all conformers with imaginary frequencies as "not kept". Notes ----- Conformers previously marked as "not kept" will not be affected. Conformers that doesn't contain "freq" genre will be treated as not having imaginary frequencies. """ dummy = [1] for index, conf in enumerate(self.values()): freq = np.array(conf.get("freq", dummy)) if (freq < 0).any(): self._kept[index] = False
[docs] def trim_non_matching_stoichiometry(self, wanted: Optional[str] = None) -> None: """Mark all conformers with stoichiometry other than *wanted* as "not kept". If not given, *wanted* evaluates to the most common stoichiometry. Parameters ---------- wanted Only conformers with same stoichiometry will be kept. Evaluates to the most common stoichiometry if not given. Notes ----- Conformers previously marked as "not kept" will not be affected. Conformers that doesn't contain stoichiometry data are always treated as non-matching. """ if not wanted: counter = Counter( conf["stoichiometry"] for conf in self.values() if "stoichiometry" in conf ) counts = counter.most_common() wanted = counts[0][0] if counts else "" # no conformer has "stoichiometry" for index, conf in enumerate(self.values()): if "stoichiometry" not in conf or not conf["stoichiometry"] == wanted: self._kept[index] = False
[docs] def trim_not_optimized(self) -> None: """Mark all conformers that failed structure optimization as "not kept". Notes ----- Conformers previously marked as "not kept" will not be affected. Conformers that doesn't contain optimization data are always treated as optimized. """ for index, conf in enumerate(self.values()): if not conf.get("optimization_completed", True): self._kept[index] = False
[docs] def trim_non_normal_termination(self) -> None: """Mark all conformers, which calculation job did not terminate normally, as "not kept". Notes ----- Conformers previously marked as "not kept" will not be affected. Conformers that doesn't contain data regarding their calculation job's termination are always treated as terminated abnormally. """ for index, conf in enumerate(self.values()): if not conf.get("normal_termination", False): self._kept[index] = False
[docs] def trim_inconsistent_sizes(self) -> None: """Mark as "not kept" all conformers that contain any iterable data genre, that is of different length, than in case of majority of conformers. Examples -------- >>> c = Conformers( ... one={'a': [1, 2, 3]}, ... two={'a': [1, 2, 3]}, ... three={'a': [1, 2, 3, 4]} ... ) >>> c.kept [True, True, True] >>> c.trim_inconsistent_sizes() >>> c.kept [True, True, False] Notes ----- Conformers previously marked as "not kept" will not be affected. """ sizes = {} for fname, conf in self.items(): for genre, value in conf.items(): if isinstance(value, (np.ndarray, list, tuple)): sizes.setdefault(genre, {})[fname] = len(value) maxes = { genre: Counter(v for v in values.values()).most_common()[0][0] for genre, values in sizes.items() } for index, fname in enumerate(self.keys()): for genre, most_common in maxes.items(): confs = sizes[genre] if fname in confs and not confs[fname] == most_common: self._kept[index] = False
[docs] def trim_to_range( self, genre: str, minimum: Union[int, float] = float("-inf"), maximum: Union[int, float] = float("inf"), attribute: str = "values", ) -> None: """Marks as "not kept" all conformers, which numeric value of data of specified genre is outside of the range specified by *minimum* and *maximum* values. Parameters ---------- genre Name of genre that should be compared to specified minimum and maximum values. minimum Minimal accepted value - every conformer, which genre value evaluates to less than *minimum* will be marked as "not kept". Defaults to ``float(-inf)``. maximum Maximal accepted value - every conformer, which genre value evaluates to more than *maximum* will be marked as "not kept". Defaults to ``float(inf)``. attribute Attribute of DataArray of specified *genre* that contains one-dimensional array of numeric values. defaults to `"values"`. Raises ------ AttributeError If DataArray associated with *genre* genre has no attribute *attribute*. ValueError If data retrieved from specified genre's attribute is not in the form of one-dimensional array. TypeError If comparision cannot be made between elements of specified genre's attribute and *minimum* or *maximum* values. Notes ----- Conformers previously marked as "not kept" will not be affected. """ try: arr = self.arrayed(genre) atr = getattr(arr, attribute) except AttributeError as error: raise AttributeError( f"Invalid genre/attribute combination: {genre}/{attribute}. " f"Resulting DataArray object has no attribute {attribute}." ) from error values = np.asarray(atr) if values.ndim != 1: raise ValueError( f"Invalid genre/attribute combination: {genre}/{attribute}. " f"DataArray's attribute must contain one-dimensional array of values." ) try: in_range = (minimum <= values) & (values <= maximum) except TypeError as error: raise TypeError( f"Cannot compare {type(minimum)} with {type(values[0])}." ) from error self.kept = arr.filenames[in_range]
[docs] def trim_rmsd( self, threshold: Union[int, float], window_size: Optional[Union[int, float]], geometry_genre: str = "last_read_geom", energy_genre: str = "scf", ignore_hydrogen: bool = True, moving_window_strategy: Callable = dw.stretching_windows, ) -> None: """Marks as "not kept" all conformers that are identical with some other conformer, judging by a provided RMSD threshold. To minimize computation cost, conformers are compared inside windows, that is a subsets of the original list of conformers. Those windows are generated by the *moving_window_strategy* function. The recommended strategy, and a default value, is :func:`.streaching_windows`, but other are also available: :func:`.fixed_windows` and :func:`.pyramid_windows`. This function will be called with list of energies for conformers compared and (if it is not ``None``) *window_size* parameter. With default *moving_window_strategy* conformers, which energy difference (dE) is higher than given *window_size* are always treated as different, while those with dE smaller than *window_size* and RMSD value smaller than given *threshold* are considered identical. From two identical conformers, the one with lower energy is "kept", and the other is discarded (marked as "not kept"). Notes ----- RMSD threshold and size of the energy window should be chosen depending on the parameters of conformers' set: number of conformers, size of the conformer, its lability, etc. However, *threshold* of 0.5 angstrom and *window_size* of 5 to 10 kcal/mol is a good place to start if in doubt. Parameters ---------- threshold : int or float Maximum RMSD value to consider conformers identical. window_size : int or float Size of the energy window, in kcal/mol, inside which RMSD matrix is calculated. Essentially, a difference in conformers' energy, after which conformers are always considered different. geometry_genre : str Genre of geometry used to calculate RMSD matrix. "last_read_geom" is default. energy_genre : str Genre of energy used to sort and group conformers into windows of given energy size. "scf" is used by default. ignore_hydrogen : bool If hydrogen atom should be discarded before RMSD calculation. Defaults to ``True``. moving_window_strategy : callable Function that generates windows, inside which RMSD comparisions is performed. Raises ------ InconsistentDataError If requested genres does not provide the same set of conformers. ValueError When called with ``ignore_hydrogen=True`` but requested :attr:`.Geometry.atoms` cannot be collapsed to 1-D array. """ energy = self.arrayed(energy_genre) geometry = self.arrayed(geometry_genre) if not energy.filenames.size == geometry.filenames.size: raise InconsistentDataError( "Unequal number of conformers in requested geometry and energy genres. " "Trim incomplete entries before trimming with :meth:`.trim_rmds`." ) elif not np.array_equal(energy.filenames, geometry.filenames): raise InconsistentDataError( "Different conformers in requested geometry and energy genres. " "Trim incomplete entries before trimming with :meth:`.trim_rmds`." ) if not geometry: return # next steps assume there are some conformers if ignore_hydrogen and geometry.atoms.shape[0] > 1: # TODO: remove when dw.geometry.select_atoms supplemented raise ValueError( "Cannot ignore hydrogen atoms if requested conformers do not have " "the same order of atoms. This functionality is not supported yet." ) geom = ( dw.drop_atoms(geometry.values, geometry.atoms[0], dw.atoms.Atom.H) if ignore_hydrogen else geometry.values ) if window_size is None: windows = moving_window_strategy(energy.as_kcal_per_mol) else: windows = moving_window_strategy(energy.as_kcal_per_mol, window_size) wanted = dw.rmsd_sieve(geom, windows, threshold) self.kept = geometry.filenames[wanted]
[docs] def select_all(self) -> None: """Marks all conformers as 'kept'. Equivalent to ``conformers.kept = True``.""" self._kept = [True for _ in self._kept]
[docs] def reject_all(self) -> None: """Marks all conformers as 'not kept'. Equivalent to ``conformers.kept = False``. """ self._kept = [False for _ in self._kept]
[docs] def kept_keys(self, indices: bool = False) -> _KeptKeysView: """Equivalent of ``dict.keys()`` but gives view only on conformers marked as "kept". Returned view may also provide information on conformers index in its Conformers instance if requested with ``indices=True``. >>> c = Conformers(c1={"g": 0.1}, c2={"g": 0.2}, c3={"g": 0.3}} >>> c.kept = [True, False, True] >>> list(c.kept_keys()) ["c1", "c3"] >>> list(c.kept_keys(indices=True)) [(0, "c1"}), (2, "c3")] Parameters ---------- indices : bool If resulting Conformers view should also provide index of each conformer. Defaults to False. Returns ------- _KeptKeysView View of kept conformers. """ return _KeptKeysView(self, indices=indices)
[docs] def kept_values(self, indices: bool = False) -> _KeptValuesView: """Equivalent of ``dict.values()`` but gives view only on conformers marked as "kept". Returned view may also provide information on conformers index in its Conformers instance if requested with ``indices=True``. >>> c = Conformers(c1={"g": 0.1}, c2={"g": 0.2}, c3={"g": 0.3}} >>> c.kept = [True, False, True] >>> list(c.kept_values()) [{"g": 0.1}, {"g": 0.3}] >>> list(c.kept_values(indices=True)) [(0, {"g": 0.1}), (2, {"g": 0.3})] Parameters ---------- indices : bool If resulting Conformers view should also provide index of each conformer. Defaults to False. Returns ------- _KeptValuesView View of kept conformers. """ return _KeptValuesView(self, indices=indices)
[docs] def kept_items(self, indices: bool = False) -> _KeptItemsView: """Equivalent of ``dict.items()`` but gives view only on conformers marked as "kept". Returned view may also provide information on conformers index in its Conformers instance if requested with ``indices=True``. >>> c = Conformers(c1={"g": 0.1}, c2={"g": 0.2}, c3={"g": 0.3}} >>> c.kept = [True, False, True] >>> list(c.kept_items()) [("c1", {"g": 0.1}), ("c3", {"g": 0.3})] >>> list(c.kept_items(indices=True)) [(0, "c1", {"g": 0.1}), (2, "c3", {"g": 0.3})] Parameters ---------- indices : bool If resulting Conformers view should also provide index of each conformer. Defaults to False. Returns ------- _KeptItemsView View of kept conformers. """ return _KeptItemsView(self, indices=indices)
@property @contextmanager def untrimmed(self) -> "Conformers": """Temporally remove trimming. Implemented as context manager to use with python's 'with' keyword. Examples -------- >>> c = Conformers(one={}, two={}, tree={}) >>> c.kept = [False, True, False] >>> with c.untrimmed: >>> c.kept [True, True, True] >>> c.kept [False, True, False] """ blade = self._kept self.kept = True yield self self._kept = blade
[docs] @contextmanager def trimmed_to( self, blade: Union[Sequence[bool], Sequence[str], Sequence[int], bool] ) -> "Conformers": """Temporally set trimming blade to given one. Implemented as context manager to use with python's 'with' keyword. Parameters ---------- blade : bool or sequence of bool, str, or int Temporary trimming blade. To better understand how blade setting works, see Conformers.kept documentation. Examples -------- >>> c = Conformers(one={}, two={}, tree={}) >>> c.kept = [True, True, False] >>> with c.trimmed_to([1, 2]): >>> c.kept [False, True, True] >>> c.kept [True, True, False] """ old_blade = self._kept self.kept = blade yield self self._kept = old_blade
@property @contextmanager def inconsistency_allowed(self) -> "Conformers": """Temporally sets Conformers' 'allow_data_inconsistency' attribute to true. Implemented as context manager to use with python's 'with' keyword. Examples -------- >>> c = Conformers(...) >>> with c.inconsistency_allowed: >>> # do stuff here while c.allow_data_inconsistency is True >>> c.allow_data_inconsistency True >>> c.allow_data_inconsistency False """ inconsistency = self.allow_data_inconsistency self.allow_data_inconsistency = True yield self self.allow_data_inconsistency = inconsistency