Source code for tesliper.extraction.spectra_parser

"""Parser for spectra files."""
import csv
import logging as lgg
from pathlib import Path
from typing import Optional, Union

import numpy as np

from .parser_base import ParserBase

logger = lgg.getLogger(__name__)


[docs]class SpectraParser(ParserBase):
    """Parser for files containing spectral data. It can parse .txt (in "x y" format)
    and .csv files, returning an numpy.ndarray with loaded spectrum. Parsing process
    may be customized by specifying what delimiter of values should be expected
    and in which column x- and y-values are, if there are more than 2 columns of data.
    If file contains any header, it is ignored.
    """

    purpose = "spectra"
    extensions = (".txt", ".xy", ".csv")

    def __init__(self):
        super().__init__()
        self.delimiter = None
        self.xcolumn = 0
        self.ycolumn = 1

[docs]    def parse(
        self,
        filename: Union[str, Path],
        delimiter: Optional[str] = None,
        xcolumn: int = 0,
        ycolumn: int = 1,
    ) -> np.ndarray:
        """Loads spectral data from file to numpy.array. Currently supports
        only .txt, .xy, and .csv files.

        Parameters
        ----------
        filename: str
            path to file containing spectral data
        delimiter: str, optional
            character used to delimit columns in file, defaults to whitespace
        xcolumn: int, optional
            column, that should be used as points on x axis,
            defaults to 0 (first column)
        ycolumn: int, optional
            column, that should be used as values on y axis,
            defaults to 1 (second column)

        Returns
        -------
        numpy.array
            two-dimensional numpy array ([[x-values], [y-values]])
            of data type ``float``"""
        # TODO: add type checking of passed file, consider those:
        #     https://github.com/audreyr/binaryornot
        #     https://eli.thegreenplace.net/2011/10/19/\
        #       perls-guess-if-file-is-text-or-binary-implemented-in-python/
        # TODO: add binary files support"""
        self.delimiter = delimiter
        self.xcolumn = xcolumn
        self.ycolumn = ycolumn
        filename = str(filename)
        self.workhorse(filename)  # figure out which method to use
        spc = self.workhorse(filename)  # actual parsing
        return spc

[docs]    def initial(self, filename: str):
        super().initial(filename)
        if self.workhorse is self.initial:
            raise ValueError(f"Don't know how to parse file {filename}")

[docs]    @ParserBase.state(trigger=r".+\.(?:txt|xy)$")
    def parse_txt(self, file: Path):
        """Loads spectral data from .txt or .xy file to numpy.array.

        Parameters
        ----------
        file: str
            path to file containing spectral data
        delimiter: str, optional
            character used to delimit columns in file, defaults to whitespace
        xcolumn: int, optional
            column, that should be used as points on x axis,
            defaults to 0 (first column)
        ycolumn: int, optional
            column, that should be used as values on y axis,
            defaults to 1 (second column)

        Returns
        -------
        numpy.array
            two-dimensional numpy array ([[x-values], [y-values]])
            of data type 'float'

        Rises
        -----
        ValueError
            if file passed was read to end, but no spectral data was found;
            this includes columns' numbers out of range and usage of
            inappropriate delimiter"""
        with open(file, "r") as txtfile:
            delimiter = self.delimiter
            xcolumn = self.xcolumn
            ycolumn = self.ycolumn
            line = txtfile.readline()
            lineno = 1
            search = True
            while line and search:
                try:
                    values = [v.strip() for v in line.split(delimiter) if v]
                    x, y = float(values[xcolumn]), float(values[ycolumn])
                    search = False
                except (ValueError, TypeError, IndexError) as error:
                    logger.debug(f"Line omitted due to {error}")
                    line = txtfile.readline()
                    lineno += 1
            if not line:
                raise ValueError(
                    f"No spectral data found in file's columns {xcolumn} "
                    f"and {ycolumn}."
                )
            arr = [(x, y)]
            for lineno, line in enumerate(txtfile, start=lineno + 1):
                values = [v.strip() for v in line.split(delimiter) if v]
                arr.append(tuple(map(float, (values[xcolumn], values[ycolumn]))))
        return np.array(list(zip(*arr)))

[docs]    @ParserBase.state(trigger=r".+\.csv$")
    def parse_csv(self, file: Path):
        """Loads spectral data from csv file to numpy.array.

        Parameters
        ----------
        file: str
            path to file containing spectral data
        delimiter: str, optional
            character used to delimit columns in file, defaults to ','
        xcolumn: int, optional
            column, that should be used as points on x axis,
            defaults to 0 (first column)
        ycolumn: int, optional
            column, that should be used as values on y axis,
            defaults to 1 (second column)

        Returns
        -------
        numpy.array
            two-dimensional numpy array ([[x-values], [y-values]])
            of data type 'float'"""
        delimiter = self.delimiter
        xcolumn = self.xcolumn
        ycolumn = self.ycolumn
        arr = []
        with open(file, newline="") as csvfile:
            dialect = csv.Sniffer().sniff(csvfile.read(1024), delimiters=delimiter)
            csvfile.seek(0)
            reader = csv.reader(csvfile, dialect)
            for line in reader:
                arr.append(tuple(map(float, (line[xcolumn], line[ycolumn]))))
        return np.array(list(zip(*arr)))

[docs]    def parse_spc(self, file):
        """Loads spectral data from spc file to numpy.array.

        Notes
        -----
        This method is not implemented yet, it will raise an error when called.

        Parameters
        ----------
        file: str
            path to file containing spectral data

        Returns
        -------
        numpy.array
            two-dimensional numpy array ([[x-values], [y-values]])
            of data type 'float'

        Raises
        ------
        NotImplementedError
            Whenever called, as this functionality is not implemented yet."""
        # TODO: add support for .spc files
        raise NotImplementedError("Parsing spc files is not implemented yet.")