Source code for tesliper.datawork.spectra

"""Functions that deal with spectra and spectral data."""
import logging as lgg
import math
from typing import Callable, Sequence, Tuple, Union

import numpy as np

# LOGGER
logger = lgg.getLogger(__name__)
logger.setLevel(lgg.DEBUG)


# TYPES
Number = Union[int, float]
Numbers = Sequence[Union[int, float]]
FittingFunctionType = Callable[[np.ndarray, np.ndarray, np.ndarray, float], np.ndarray]


# MODULE FUNCTIONS
[docs]def count_imaginary(frequencies: np.ndarray): """Finds number of imaginary frequencies of each conformer. Parameters ---------- frequencies List of conformers' frequencies. Array with one dimension is interpreted as list of frequencies for single conformer. Returns ------- numpy.ndarray Number of imaginary frequencies of each conformer. Raises ------ ValueError If input array has more than 2 dimensions.""" if frequencies.size == 0: return np.array([]) elif frequencies.ndim < 2: return np.asarray((frequencies < 0).sum()) elif frequencies.ndim == 2: return (frequencies < 0).sum(1) else: raise ValueError( f"Array with {frequencies.ndim} dimensions can't be interpreted " f"as a list of conformers' frequencies." )
[docs]def find_imaginary(frequencies: np.ndarray): """Finds all conformers with imaginary frequency values. Parameters ---------- frequencies List of conformers' frequencies. Returns ------- numpy.ndarray List of the indices of conformers with imaginary frequency values. Raises ------ ValueError If input array has more than 2 dimensions.""" try: imag = count_imaginary(frequencies) except ValueError as err: raise ValueError(err) from err return np.nonzero(imag)[0]
[docs]def gaussian( intensities: np.ndarray, frequencies: np.ndarray, abscissa: np.ndarray, width: Number, ) -> np.ndarray: """Gaussian fitting function for spectra calculation. Parameters ---------- intensities Appropriate values extracted from gaussian output files. frequencies Frequencies extracted from gaussian output files. abscissa List of wavelength/wave number points on spectrum x axis. width Number representing half width of peak at 1/e its maximum height. Returns ------- numpy.ndarray List of calculated intensity values. Raises ------ ValueError If given width is not greater than zero. If *intensities* and *frequencies* are not of the sane shape.""" if width <= 0: raise ValueError("Peak width must be a positive value!") if intensities.shape != frequencies.shape: raise ValueError("*intensities* and *frequencies* must be of same shape!") if abscissa.size == 0: return np.array([]) sigm = width / 1.4142135623730951 # math.sqrt(2) denominator = sigm * 2.5066282746310002 # (2 * math.pi) ** 0.5 it = np.nditer( [abscissa, None], flags=["buffered"], op_flags=[["readonly"], ["writeonly", "allocate", "no_broadcast"]], op_dtypes=[np.float64, np.float64], ) for x, peaks in it: e = intensities * np.exp(-0.5 * ((x - frequencies) / sigm) ** 2) peaks[...] = (e / denominator).sum() return it.operands[1]
[docs]def lorentzian( intensities: np.ndarray, frequencies: np.ndarray, abscissa: np.ndarray, width: Number, ) -> np.ndarray: """Lorentzian fitting function for spectra calculation. Parameters ---------- intensities Appropriate values extracted from gaussian output files. frequencies Frequencies extracted from gaussian output files. abscissa List of wavelength/wave number points on spectrum x axis. width Number representing half width of peak at half its maximum height. Returns ------- numpy.ndarray List of calculated intensity values. Raises ------ ValueError If given width is not greater than zero. If *intensities* and *frequencies* are not of the same shape.""" if width <= 0: raise ValueError("Peak width must be a positive value!") if intensities.shape != frequencies.shape: raise ValueError("*intensities* and *frequencies* must be of same shape!") if abscissa.size == 0: return np.array([]) hwhmsqrd = width ** 2 hwhmoverpi = width / math.pi it = np.nditer( [abscissa, None], flags=["buffered"], op_flags=[["readonly"], ["writeonly", "allocate", "no_broadcast"]], op_dtypes=[np.float64, np.float64], ) for x, val in it: s = intensities / ((frequencies - x) ** 2 + hwhmsqrd) s2 = (hwhmoverpi * s).sum() val[...] = s2 return it.operands[1]
[docs]def calculate_spectra( frequencies: np.ndarray, intensities: np.ndarray, abscissa: np.ndarray, width: Number, fitting: FittingFunctionType, ): """Calculates spectrum for each individual conformer. Parameters ---------- frequencies List of conformers' frequencies in cm^(-1). Should be of shape (number _of_conformers, number_of_frequencies). intensities List of calculated signal intensities for each conformer. Should be of same shape as frequencies. abscissa List of points on x axis in output spectrum in cm^(-1). width : int or float Number representing peak width in cm^(-1), used by fitting function. fitting : function Function, which takes intensities, frequencies, abscissa, hwhm as parameters and returns numpy.array of calculated spectrum points. Returns ------- numpy.ndarray Array of intensity values for each conformer. Raises ------ ValueError If given width is not greater than zero. If *intensities* and *frequencies* are not of the same shape.""" if intensities.shape != frequencies.shape: raise ValueError("*intensities* and *frequencies* must be of same shape!") if not intensities.size: return np.zeros(0) # return early to avoid (0, N) shape of output array spectra = np.zeros([len(frequencies), abscissa.shape[0]]) # template for inten, freq, spr in zip(intensities, frequencies, spectra): spr[...] = fitting(inten, freq, abscissa, width) return spectra
[docs]def calculate_average( values: Union[Numbers, np.ndarray], populations: Union[Numbers, np.ndarray] ) -> np.ndarray: """Calculates weighted average of *values*, where *populations* are used as weights. Parameters ---------- values List of values for each conformer, should be of shape (N, M), where N is number of conformers and M is number of values. populations List of conformers' populations, should be of shape (N,) where N is number of conformers. Should add up to 1. Returns ------- numpy.ndarray weighted arithmetic mean of values given. Raises ------ ValueError If parameters of non-matching shape were given.""" values = np.asanyarray(values) populations = np.asanyarray(populations) if not populations.size == values.shape[0]: raise ValueError( "Exactly one population value for each conformer must be provided." ) if not values.size: return np.zeros([0]) # just return an empty array if *values* is empty popsum = populations.sum() if not np.isclose(popsum, 1): # normalize population data, if needed populations = populations / popsum # populations must be of same shape as values array # so we expand populations with appropriate number of dimensions shape = (-1,) + (1,) * (values.ndim - 1) popul = populations.reshape(*shape) return (values * popul).sum(0)
[docs]def idx_offset(a: Numbers, b: Numbers) -> int: """Calculate offset by which *b* should be shifted to best overlap with *a*. Both *a* and *b* should be sets of points, interpreted as spectral data. Returned offset is a number of data points, by which *b* should be moved relative to *a*, to get the best overlap of given spectra. Parameters ---------- a *y* values of the first spectrum. b *y* values of the second spectrum. Returns ------- int Offset, in number of data points, by which spectrum *b* should be shifted to best match spectrum *a*. Positive value means it should be shifted to the right and negative value means it should be shifted to the left of *a*. Notes ----- The best overlap is found by means of cross-correlation of given spectra. """ a, b = np.asanyarray(a), np.asanyarray(b) # normalize values to be zero centered to prevent influence of padding with zeros a = (a - a.mean()) / a.std() b = (b - b.mean()) / b.std() # calculate cross correlation array and find best overlap cross = np.correlate(a, b, mode="full") best = cross.argmax() return best - b.size + 1
[docs]def unify_abscissa( ax: Numbers, ay: Numbers, bx: Numbers, by: Numbers, upscale: bool = True ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Interpolate one of the given spectra to have the same points density as the other given spectrum. Which spectra should be interpolated is determined based on the density of points of both spectra, by default more loosely spaced spectrum is interpolated to match spacing of the other spectrum. This may be changed by passing ``upscale=False`` to the function call. Parameters ---------- ax Abscissa of the first spectrum. ay Values of the first spectrum. bx Abscissa of the second spectrum. by Values of the second spectrum. upscale If interpolation should be done on more loosely spaced spectrum (default). When set to False, spectrum with lower resolution will be treated as reference. Returns ------- tuple of np.arrays of numbers Spectra, one unchanged and one interpolated, as a tuple of numpy arrays of x and y values. I.e. ``tuple(ax, ay, new_bx, new_by)`` or ``tuple(new_ax, new_ay, bx, by)``, depending on values of *upscale* parameter. """ ax, ay, bx, by = ( np.asanyarray(ax), np.asanyarray(ay), np.asanyarray(bx), np.asanyarray(by), ) ad, bd = ax[1] - ax[0], bx[1] - bx[0] # we assume both have steady step if ad == bd: return ax, ay, bx, by # no need to do anything elif (np.abs(ad) < np.abs(bd)) ^ upscale: # xor on booleans # *ad* is smaller than *bd*, but we don't want to upscale or vice-versa nbx, nby, nax, nay = unify_abscissa(bx, by, ax, ay, upscale) # swap spectra # but return in the same order as given in parameters else: step = np.abs(ad) * np.sign(bd) # ad is new step, but sign from bd nbx = np.arange(bx[0], bx[-1], step) # new abscissa nby = np.interp(nbx, bx, by) # interpolate values on new abscissa nax, nay = ax, ay # the other spectrum stays unchanged return nax, nay, nbx, nby
[docs]def find_offset( ax: Numbers, ay: Numbers, bx: Numbers, by: Numbers, upscale: bool = True ) -> float: """Finds value, by which the spectrum should be shifted along x-axis to best overlap with the first spectrum. If resolution of spectra is not identical, one of them will be interpolated to match resolution of the other one. By default interpolation is done on the lower-resolution spectra. This can be changed by passing ``upscale = False`` to function call. Parameters ---------- ax Abscissa of the first spectrum. ay Values of the first spectrum. bx Abscissa of the second spectrum. by Values of the second spectrum. upscale If interpolation should be done on more loosely spaced spectrum (default). When set to False, spectrum with lower resolution will be treated as reference for density of data points. Returns ------- float Value, by which second spectrum should be shifted, in appropriate units. """ ax, ay, bx, by = unify_abscissa(ax, ay, bx, by, upscale=upscale) shift = idx_offset(ay, by) if shift < 0: offset = ax[0] - bx[abs(shift)] else: offset = ax[shift] - bx[0] return offset
[docs]def find_scaling(a: Numbers, b: Numbers) -> float: """Find factor by which values *b* should be scaled to best match values *a*. Parameters ---------- a *y* values of the first spectrum. b *y* values of the second spectrum. Returns ------- float Scaling factor for *b* values. Notes ----- If scaling factor cannot be reasonably given, i.e. when *b* is an empty list or list of zeros or NaNs, ``1.0`` is returned. Values lower than 1% of maximum are ignored. """ a, b = np.abs(a), np.abs(b) try: amax, bmax = max(a), max(b) except ValueError: return 1.0 scaling = np.mean(a[a >= 0.01 * amax]) / np.mean(b[b >= 0.01 * bmax]) scaling = 1.0 if np.isnan(scaling) else scaling return scaling
# genre: {target: converter} _converters = { "freq": {"wavelen": lambda v: 1e7 / v, "ex_en": lambda v: v / 8065.544}, "wavelen": {"freq": lambda v: 1e7 / v, "ex_en": lambda v: 1239.8 / v}, "ex_en": {"freq": lambda v: v * 8065.544, "wavelen": lambda v: 1239.8 / v}, }
[docs]def convert_band( value: Union[float, np.ndarray], from_genre: str, to_genre: str ) -> Union[float, np.ndarray]: """Convert one representation of band to another. Parameters ---------- value Value(s) to convert. from_genre Genre specifying a representation of band of input data. Should be one of: 'freq', 'wavelen', 'ex_en'. to_genre Genre specifying a representation of band, to which you want to convert. Should be one of: 'freq', 'wavelen', 'ex_en'. Returns ------- float or np.ndarray Requested representation of bands. If *from_genre* is same as *to_genre*, then simply *value* is returned. """ if from_genre == to_genre: return value try: converter = _converters[from_genre][to_genre] except KeyError as error: raise ValueError( f"Unsupported conversion: from '{from_genre}' to '{to_genre}'. " "Genres available for conversion are: 'freq', 'wavelen', 'ex_en'." ) from error return converter(value)