Source code for tesliper.datawork.spectra

"""Functions that deal with spectra and spectral data."""
import logging as lgg
import math
from typing import Callable, Sequence, Tuple, Union

import numpy as np

# LOGGER
logger = lgg.getLogger(__name__)
logger.setLevel(lgg.DEBUG)


# TYPES
Number = Union[int, float]
Numbers = Sequence[Union[int, float]]
FittingFunctionType = Callable[[np.ndarray, np.ndarray, np.ndarray, float], np.ndarray]


# MODULE FUNCTIONS
[docs]def count_imaginary(frequencies: np.ndarray):
    """Finds number of imaginary frequencies of each conformer.

    Parameters
    ----------
    frequencies
        List of conformers' frequencies. Array with one dimension is interpreted
        as list of frequencies for single conformer.

    Returns
    -------
    numpy.ndarray
        Number of imaginary frequencies of each conformer.

    Raises
    ------
    ValueError
        If input array has more than 2 dimensions."""
    if frequencies.size == 0:
        return np.array([])
    elif frequencies.ndim < 2:
        return np.asarray((frequencies < 0).sum())
    elif frequencies.ndim == 2:
        return (frequencies < 0).sum(1)
    else:
        raise ValueError(
            f"Array with {frequencies.ndim} dimensions can't be interpreted "
            f"as a list of conformers' frequencies."
        )


[docs]def find_imaginary(frequencies: np.ndarray):
    """Finds all conformers with imaginary frequency values.

    Parameters
    ----------
    frequencies
        List of conformers' frequencies.

    Returns
    -------
    numpy.ndarray
        List of the indices of conformers with imaginary frequency values.

    Raises
    ------
    ValueError
        If input array has more than 2 dimensions."""
    try:
        imag = count_imaginary(frequencies)
    except ValueError as err:
        raise ValueError(err) from err
    return np.nonzero(imag)[0]


[docs]def gaussian(
    intensities: np.ndarray,
    frequencies: np.ndarray,
    abscissa: np.ndarray,
    width: Number,
) -> np.ndarray:
    """Gaussian fitting function for spectra calculation.

    Parameters
    ----------
    intensities
        Appropriate values extracted from gaussian output files.
    frequencies
        Frequencies extracted from gaussian output files.
    abscissa
        List of wavelength/wave number points on spectrum x axis.
    width
        Number representing half width of peak at 1/e its maximum height.

    Returns
    -------
    numpy.ndarray
        List of calculated intensity values.

    Raises
    ------
    ValueError
        If given width is not greater than zero.
        If *intensities* and *frequencies* are not of the sane shape."""
    if width <= 0:
        raise ValueError("Peak width must be a positive value!")
    if intensities.shape != frequencies.shape:
        raise ValueError("*intensities* and *frequencies* must be of same shape!")
    if abscissa.size == 0:
        return np.array([])
    sigm = width / 1.4142135623730951  # math.sqrt(2)
    denominator = sigm * 2.5066282746310002  # (2 * math.pi) ** 0.5
    it = np.nditer(
        [abscissa, None],
        flags=["buffered"],
        op_flags=[["readonly"], ["writeonly", "allocate", "no_broadcast"]],
        op_dtypes=[np.float64, np.float64],
    )
    for x, peaks in it:
        e = intensities * np.exp(-0.5 * ((x - frequencies) / sigm) ** 2)
        peaks[...] = (e / denominator).sum()
    return it.operands[1]


[docs]def lorentzian(
    intensities: np.ndarray,
    frequencies: np.ndarray,
    abscissa: np.ndarray,
    width: Number,
) -> np.ndarray:
    """Lorentzian fitting function for spectra calculation.

    Parameters
    ----------
    intensities
        Appropriate values extracted from gaussian output files.
    frequencies
        Frequencies extracted from gaussian output files.
    abscissa
        List of wavelength/wave number points on spectrum x axis.
    width
        Number representing half width of peak at half its maximum height.

    Returns
    -------
    numpy.ndarray
        List of calculated intensity values.

    Raises
    ------
    ValueError
        If given width is not greater than zero.
        If *intensities* and *frequencies* are not of the same shape."""
    if width <= 0:
        raise ValueError("Peak width must be a positive value!")
    if intensities.shape != frequencies.shape:
        raise ValueError("*intensities* and *frequencies* must be of same shape!")
    if abscissa.size == 0:
        return np.array([])
    hwhmsqrd = width ** 2
    hwhmoverpi = width / math.pi
    it = np.nditer(
        [abscissa, None],
        flags=["buffered"],
        op_flags=[["readonly"], ["writeonly", "allocate", "no_broadcast"]],
        op_dtypes=[np.float64, np.float64],
    )
    for x, val in it:
        s = intensities / ((frequencies - x) ** 2 + hwhmsqrd)
        s2 = (hwhmoverpi * s).sum()
        val[...] = s2
    return it.operands[1]


[docs]def calculate_spectra(
    frequencies: np.ndarray,
    intensities: np.ndarray,
    abscissa: np.ndarray,
    width: Number,
    fitting: FittingFunctionType,
):
    """Calculates spectrum for each individual conformer.

    Parameters
    ----------
    frequencies
        List of conformers' frequencies in cm^(-1). Should be of shape
        (number _of_conformers, number_of_frequencies).
    intensities
        List of calculated signal intensities for each conformer. Should be
        of same shape as frequencies.
    abscissa
        List of points on x axis in output spectrum in cm^(-1).
    width : int or float
        Number representing peak width in cm^(-1), used by fitting function.
    fitting : function
        Function, which takes intensities, frequencies, abscissa, hwhm as
        parameters and returns numpy.array of calculated spectrum points.

    Returns
    -------
    numpy.ndarray
        Array of intensity values for each conformer.

    Raises
    ------
    ValueError
        If given width is not greater than zero.
        If *intensities* and *frequencies* are not of the same shape."""
    if intensities.shape != frequencies.shape:
        raise ValueError("*intensities* and *frequencies* must be of same shape!")
    if not intensities.size:
        return np.zeros(0)  # return early to avoid (0, N) shape of output array
    spectra = np.zeros([len(frequencies), abscissa.shape[0]])  # template
    for inten, freq, spr in zip(intensities, frequencies, spectra):
        spr[...] = fitting(inten, freq, abscissa, width)
    return spectra


[docs]def calculate_average(
    values: Union[Numbers, np.ndarray], populations: Union[Numbers, np.ndarray]
) -> np.ndarray:
    """Calculates weighted average of *values*, where *populations* are used as
    weights.

    Parameters
    ----------
    values
        List of values for each conformer, should be of shape (N, M), where N is
        number of conformers and M is number of values.
    populations
        List of conformers' populations, should be of shape (N,) where N is
        number of conformers. Should add up to 1.

    Returns
    -------
    numpy.ndarray
        weighted arithmetic mean of values given.

    Raises
    ------
    ValueError
        If parameters of non-matching shape were given."""
    values = np.asanyarray(values)
    populations = np.asanyarray(populations)
    if not populations.size == values.shape[0]:
        raise ValueError(
            "Exactly one population value for each conformer must be provided."
        )
    if not values.size:
        return np.zeros([0])  # just return an empty array if *values* is empty
    popsum = populations.sum()
    if not np.isclose(popsum, 1):
        # normalize population data, if needed
        populations = populations / popsum
    # populations must be of same shape as values array
    # so we expand populations with appropriate number of dimensions
    shape = (-1,) + (1,) * (values.ndim - 1)
    popul = populations.reshape(*shape)
    return (values * popul).sum(0)


[docs]def idx_offset(a: Numbers, b: Numbers) -> int:
    """Calculate offset by which *b* should be shifted to best overlap with *a*.
    Both *a* and *b* should be sets of points, interpreted as spectral data. Returned
    offset is a number of data points, by which *b* should be moved relative to *a*,
    to get the best overlap of given spectra.

    Parameters
    ----------
    a
        *y* values of the first spectrum.
    b
        *y* values of the second spectrum.

    Returns
    -------
    int
        Offset, in number of data points, by which spectrum *b* should be shifted
        to best match spectrum *a*. Positive value means it should be shifted to the
        right and negative value means it should be shifted to the left of *a*.

    Notes
    -----
    The best overlap is found by means of cross-correlation of given spectra.
    """
    a, b = np.asanyarray(a), np.asanyarray(b)
    # normalize values to be zero centered to prevent influence of padding with zeros
    a = (a - a.mean()) / a.std()
    b = (b - b.mean()) / b.std()
    # calculate cross correlation array and find best overlap
    cross = np.correlate(a, b, mode="full")
    best = cross.argmax()
    return best - b.size + 1


[docs]def unify_abscissa(
    ax: Numbers, ay: Numbers, bx: Numbers, by: Numbers, upscale: bool = True
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Interpolate one of the given spectra to have the same points density as the
    other given spectrum.

    Which spectra should be interpolated is determined based on the density of points
    of both spectra, by default more loosely spaced spectrum is interpolated to match
    spacing of the other spectrum. This may be changed by passing ``upscale=False``
    to the function call.

    Parameters
    ----------
    ax
        Abscissa of the first spectrum.
    ay
        Values of the first spectrum.
    bx
        Abscissa of the second spectrum.
    by
        Values of the second spectrum.
    upscale
        If interpolation should be done on more loosely spaced spectrum (default).
        When set to False, spectrum with lower resolution will be treated as reference.

    Returns
    -------
    tuple of np.arrays of numbers
        Spectra, one unchanged and one interpolated, as a tuple of numpy arrays
        of x and y values. I.e. ``tuple(ax, ay, new_bx, new_by)`` or
        ``tuple(new_ax, new_ay, bx, by)``, depending on values of *upscale* parameter.
    """
    ax, ay, bx, by = (
        np.asanyarray(ax),
        np.asanyarray(ay),
        np.asanyarray(bx),
        np.asanyarray(by),
    )
    ad, bd = ax[1] - ax[0], bx[1] - bx[0]  # we assume both have steady step
    if ad == bd:
        return ax, ay, bx, by  # no need to do anything
    elif (np.abs(ad) < np.abs(bd)) ^ upscale:  # xor on booleans
        # *ad* is smaller than *bd*, but we don't want to upscale or vice-versa
        nbx, nby, nax, nay = unify_abscissa(bx, by, ax, ay, upscale)  # swap spectra
        # but return in the same order as given in parameters
    else:
        step = np.abs(ad) * np.sign(bd)  # ad is new step, but sign from bd
        nbx = np.arange(bx[0], bx[-1], step)  # new abscissa
        nby = np.interp(nbx, bx, by)  # interpolate values on new abscissa
        nax, nay = ax, ay  # the other spectrum stays unchanged
    return nax, nay, nbx, nby


[docs]def find_offset(
    ax: Numbers, ay: Numbers, bx: Numbers, by: Numbers, upscale: bool = True
) -> float:
    """Finds value, by which the spectrum should be shifted along x-axis to best
    overlap with the first spectrum. If resolution of spectra is not identical,
    one of them will be interpolated to match resolution of the other one. By default
    interpolation is done on the lower-resolution spectra. This can be changed
    by passing ``upscale = False`` to function call.

    Parameters
    ----------
    ax
        Abscissa of the first spectrum.
    ay
        Values of the first spectrum.
    bx
        Abscissa of the second spectrum.
    by
        Values of the second spectrum.
    upscale
        If interpolation should be done on more loosely spaced spectrum (default).
        When set to False, spectrum with lower resolution will be treated as reference
        for density of data points.

    Returns
    -------
    float
        Value, by which second spectrum should be shifted, in appropriate units.
    """
    ax, ay, bx, by = unify_abscissa(ax, ay, bx, by, upscale=upscale)
    shift = idx_offset(ay, by)
    if shift < 0:
        offset = ax[0] - bx[abs(shift)]
    else:
        offset = ax[shift] - bx[0]
    return offset


[docs]def find_scaling(a: Numbers, b: Numbers) -> float:
    """Find factor by which values *b* should be scaled to best match values *a*.

    Parameters
    ----------
    a
        *y* values of the first spectrum.
    b
        *y* values of the second spectrum.

    Returns
    -------
    float
        Scaling factor for *b* values.

    Notes
    -----
    If scaling factor cannot be reasonably given, i.e. when *b* is an empty list or list
    of zeros or NaNs, ``1.0`` is returned. Values lower than 1% of maximum are ignored.
    """
    a, b = np.abs(a), np.abs(b)
    try:
        amax, bmax = max(a), max(b)
    except ValueError:
        return 1.0
    scaling = np.mean(a[a >= 0.01 * amax]) / np.mean(b[b >= 0.01 * bmax])
    scaling = 1.0 if np.isnan(scaling) else scaling
    return scaling


# genre: {target: converter}
_converters = {
    "freq": {"wavelen": lambda v: 1e7 / v, "ex_en": lambda v: v / 8065.544},
    "wavelen": {"freq": lambda v: 1e7 / v, "ex_en": lambda v: 1239.8 / v},
    "ex_en": {"freq": lambda v: v * 8065.544, "wavelen": lambda v: 1239.8 / v},
}


[docs]def convert_band(
    value: Union[float, np.ndarray], from_genre: str, to_genre: str
) -> Union[float, np.ndarray]:
    """Convert one representation of band to another.

    Parameters
    ----------
    value
        Value(s) to convert.
    from_genre
        Genre specifying a representation of band of input data.
        Should be one of: 'freq', 'wavelen', 'ex_en'.
    to_genre
        Genre specifying a representation of band, to which you want to convert.
        Should be one of: 'freq', 'wavelen', 'ex_en'.

    Returns
    -------
    float or np.ndarray
        Requested representation of bands. If *from_genre* is same as *to_genre*,
        then simply *value* is returned.
    """
    if from_genre == to_genre:
        return value
    try:
        converter = _converters[from_genre][to_genre]
    except KeyError as error:
        raise ValueError(
            f"Unsupported conversion: from '{from_genre}' to '{to_genre}'. "
            "Genres available for conversion are: 'freq', 'wavelen', 'ex_en'."
        ) from error
    return converter(value)