Source code for tesliper.writing.csv_writer

"""Data export to CSV format."""
import csv
import logging as lgg
from contextlib import contextmanager
from itertools import repeat, zip_longest
from pathlib import Path
from string import Template
from typing import IO, Any, AnyStr, Dict, Iterable, Iterator, List, Optional, Union

import numpy as np

from ..glassware.arrays import (
    Bands,
    Energies,
    FloatArray,
    SpectralActivities,
    SpectralData,
    Transitions,
)
from ..glassware.spectra import SingleSpectrum, Spectra
from .writer_base import WriterBase, _GenericArray

# LOGGER
logger = lgg.getLogger(__name__)
logger.setLevel(lgg.DEBUG)


class _CsvMixin:
    """Mixin class for csv writers.

    This class takes care of setting up format of produced csv files.
    It should be used as a first base class to ensure proper cooperation with other
    base classes. It will pass all given *args and **kwargs to the next base class
    in MRO.
    """

    _known_fmt_params = {
        "delimiter",
        "doublequote",
        "escapechar",
        "lineterminator",
        "quotechar",
        "quoting",
        "skipinitialspace",
        "strict",
    }

    def __init__(
        self,
        *args,
        dialect: Union[str, csv.Dialect] = "excel",
        fmtparams: Optional[Dict] = None,
        include_header: bool = True,
        **kwargs,
    ):
        """
        Parameters
        ----------
        dialect: str or csv.Dialect
            Name of a dialect or :class:`csv.Dialect` object, which will be used by
            :class:`.csv.writer`.
        fmtparams: dict, optional
            Additional formatting parameters for :class:`.csv.writer` to use.
            For list of valid parameters consult :class:`csv.Dialect` documentation.
        include_header: bool, optional
            Determines if file should contain a header with column names, ``True`` by
            default.
        """
        self.dialect = dialect
        self.fmtparams = fmtparams or {}
        self.include_header = include_header
        super().__init__(*args, **kwargs)

    @property
    def dialect(self):
        """Name of a dialect (as string) or csv.Dialect object,
        which will be used by csv.writer.
        """
        return self._dialect

    @dialect.setter
    def dialect(self, dialect: Union[str, csv.Dialect]):
        self._dialect = (
            dialect if isinstance(dialect, csv.Dialect) else csv.get_dialect(dialect)
        )

    @property
    def fmtparams(self):
        """Dict of additional formatting parameters for csv.writer to use.
        For list of valid parameters consult csv.Dialect documentation.

        Raises
        ------
        TypeError
            if invalid parameter is given
        """
        return self._fmtparams

    @fmtparams.setter
    def fmtparams(self, params: Dict):
        for param in params.keys():
            if param not in self._known_fmt_params:
                raise TypeError(f"'{param}' is an invalid csv formatting parameter")
        self._fmtparams = params


# CLASSES
[docs]class CsvWriter(_CsvMixin, WriterBase):
    """Writes extracted or calculated data to .csv format files."""

    extension = "csv"

    def __init__(
        self,
        destination: Union[str, Path],
        mode: str = "x",
        include_header: bool = True,
        dialect: Union[str, csv.Dialect] = "excel",
        **fmtparams,
    ):
        """
        Parameters
        ----------
        destination: str or pathlib.Path
            Directory, to which generated files should be written.
        mode: str
            Specifies how writing to file should be handled. Should be one of
            characters: 'a' (append to existing file), 'x' (only write if file doesn't
            exist yet), or 'w' (overwrite file if it already exists).
        include_header: bool, optional
            Determines if file should contain a header with column names, ``True`` by
            default.
        dialect: str or csv.Dialect
            Name of a dialect or :class:`csv.Dialect` object, which will be used by
            underlying :class:`csv.writer`.
        fmtparams: dict, optional
            Additional formatting parameters for underlying csv.writer to use.
            For list of valid parameters consult :class:`csv.Dialect` documentation.
        """
        super().__init__(
            destination=destination,
            mode=mode,
            dialect=dialect,
            fmtparams=fmtparams,
            include_header=include_header,
        )

[docs]    @contextmanager
    def _get_handle(
        self,
        template: Union[str, Template],
        template_params: dict,
        open_params: Optional[dict] = None,
    ) -> Iterator[IO[AnyStr]]:
        open_params = open_params or {"newline": ""}
        with super()._get_handle(template, template_params, open_params) as handle:
            yield handle

[docs]    def _iter_handles(
        self,
        filenames: Iterable[str],
        template: Union[str, Template],
        template_params: dict,
        open_params: Optional[dict] = None,
    ) -> Iterator[IO[AnyStr]]:
        open_params = open_params or {"newline": ""}
        yield from super()._iter_handles(
            filenames, template, template_params, open_params
        )

[docs]    def generic(
        self,
        data: List[_GenericArray],
        name_template: Union[str, Template] = "${cat}.${det}.${ext}",
    ):
        """Writes generic data from multiple :class:`.DataArray`-like objects to a
        single file. Said objects should provide a single value for each conformer.

        Parameters
        ----------
        data
            :class:`.DataArray` objects that are to be exported.
        name_template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.
        """
        genres = [arr.genre for arr in data]
        header = ["Gaussian output file"] + [self._header[genre] for genre in genres]
        values = [arr.values for arr in data]
        rows = zip(data[0].filenames, *values)
        types = [type(arr).__name__.lower().replace("array", "") for arr in data]
        detail = "various" if len(set(types)) > 1 else types[0]
        genre = "misc" if len(genres) > 1 else genres[0]
        template_params = {
            "cat": "generic",
            "conf": "multiple",
            "det": detail,
            "genre": genre,
        }
        with self._get_handle(name_template, template_params) as handle:
            csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams)
            if self.include_header:
                csvwriter.writerow(header)
            for row in rows:
                csvwriter.writerow(row)

[docs]    def energies(
        self,
        energies: Energies,
        corrections: Optional[FloatArray] = None,
        name_template: Union[str, Template] = "distribution-${genre}.${ext}",
    ):
        """Writes Energies object to csv file. The output also contains derived values:
        populations, min_factors, deltas. Corrections are added only when explicitly
        given.

        Parameters
        ----------
        energies: glassware.Energies
            Energies objects that is to be serialized
        corrections: glassware.DataArray, optional
            DataArray objects containing energies corrections
        name_template : str or string.Template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.
        """
        header = ["Gaussian output file"]
        header += "population min_factor delta energy".split(" ")
        if corrections is not None:
            header += ["corrections"]
            corr = corrections.values
        else:
            corr = []
        rows = zip_longest(
            energies.filenames,
            energies.populations,
            energies.min_factors,
            energies.deltas,
            energies.values,
            corr,
        )
        template_params = {
            "conf": "multiple",
            "genre": energies.genre,
            "cat": "populations",
        }
        with self._get_handle(name_template, template_params) as handle:
            csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams)
            if self.include_header:
                csvwriter.writerow(header)
            for row in rows:
                csvwriter.writerow(v for v in row if v is not None)
        logger.info("Energies export to csv files done.")

    def _energies_handler(self, data: List[Energies], extras: Dict[str, Any]) -> None:
        # TODO: return to WriterBase's implementation when `.overview()` added
        for en in data:
            self.energies(
                en, corrections=extras.get("corrections", dict()).get(en.genre)
            )

[docs]    def single_spectrum(
        self,
        spectrum: SingleSpectrum,
        name_template: Union[str, Template] = "${cat}.${genre}-${det}.${ext}",
    ):
        """Writes SingleSpectrum object to csv file.

        Parameters
        ----------
        spectrum: glassware.SingleSpectrum
            spectrum, that is to be serialized
        name_template : str or string.Template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.
        """
        template_params = {
            "genre": spectrum.genre,
            "cat": "spectrum",
            "det": spectrum.averaged_by,
        }
        with self._get_handle(name_template, template_params) as handle:
            csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams)
            if self.include_header:
                csvwriter.writerow([spectrum.units["y"], spectrum.units["x"]])
            for row in zip(spectrum.x, spectrum.y):
                csvwriter.writerow(row)
        logger.info("Spectrum export to csv files done.")

[docs]    def spectral_activities(
        self,
        band: SpectralActivities,
        data: List[SpectralActivities],
        name_template: Union[str, Template] = "${conf}.${cat}-${det}.${ext}",
    ):
        """Writes SpectralActivities objects to csv files (one file for each conformer).

        Parameters
        ----------
        band: glassware.SpectralActivities
            Object containing information about band at which transitions occur;
            it should be frequencies for vibrational data and wavelengths or
            excitation energies for electronic data.
        data: list of glassware.SpectralActivities
            SpectralActivities objects that are to be serialized; all should contain
            information for the same set of conformers and correspond to given band.
            Assumes that all *data*'s elements have the same *spectra_type*, which is
            passed to the *name_template* as "det".
        name_template : str or string.Template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.

        Raises
        ------
        ValueError
            if *data* is an empty sequence
        """
        self._spectral(
            band=band,
            data=data,
            name_template=name_template,
            category="activities",
        )

[docs]    def spectral_data(
        self,
        band: SpectralData,
        data: List[SpectralData],
        name_template: Union[str, Template] = "${conf}.${cat}-${det}.${ext}",
    ):
        """Writes SpectralData objects to csv files (one file for each conformer).

        Parameters
        ----------
        band: glassware.SpectralData
            Object containing information about band at which transitions occur;
            it should be frequencies for vibrational data and wavelengths or
            excitation energies for electronic data.
        data: list of glassware.SpectralData
            SpectralData objects that are to be serialized; all should contain
            information for the same set of conformers and correspond to given band.
            Assumes that all *data*'s elements have the same *spectra_type*, which is
            passed to the *name_template* as "det".
        name_template : str or string.Template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.

        Raises
        ------
        ValueError
            if *data* is an empty sequence
        """
        self._spectral(
            band=band, data=data, name_template=name_template, category="data"
        )

    def _spectral(
        self,
        band: SpectralActivities,
        data: Union[List[SpectralData], List[SpectralActivities]],
        name_template: Union[str, Template],
        category: str,
    ):
        """Writes SpectralData objects to csv files (one file for each conformer).

        Parameters
        ----------
        band: glassware.SpectralData
            Object containing information about band at which transitions occur;
            it should be frequencies for vibrational data and wavelengths or
            excitation energies for electronic data.
        data: list of glassware.SpectralData
            SpectralData objects that are to be serialized; all should contain
            information for the same set of conformers and correspond to given band.
            Assumes that all *data*'s elements have the same *spectra_type*, which is
            passed to the *name_template* as "det".
        name_template : str or string.Template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.
        category : str
            category of exported data genres

        Raises
        ------
        ValueError
            if *data* is an empty sequence
        """
        try:
            spectra_type = data[0].spectra_type
        except IndexError:
            raise ValueError("No data to export.")
        data = [band] + data
        headers = [self._header[bar.genre] for bar in data]
        values = zip(*[bar.values for bar in data])
        template_params = {"genre": band.genre, "cat": category, "det": spectra_type}
        for values_, handle in zip(
            values,
            self._iter_handles(band.filenames, name_template, template_params),
        ):
            csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams)
            if self.include_header:
                csvwriter.writerow(headers)
            for row in zip(*values_):
                csvwriter.writerow(row)
        logger.info(f"{category.title()} export to csv files done.")

[docs]    def spectra(
        self,
        spectra: Spectra,
        name_template: Union[str, Template] = "${conf}.${genre}.${ext}",
    ):
        """Writes Spectra object to .csv files (one file for each conformer).

        Parameters
        ----------
        spectra: glassware.Spectra
            Spectra object, that is to be serialized.
        name_template : str or string.Template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.
        """
        abscissa = spectra.x
        header = [spectra.units["y"], spectra.units["x"]]
        template_params = {"genre": spectra.genre, "cat": "spectra"}
        for values, handle in zip(
            spectra.y,
            self._iter_handles(spectra.filenames, name_template, template_params),
        ):
            csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams)
            if self.include_header:
                csvwriter.writerow(header)
            for row in zip(abscissa, values):
                csvwriter.writerow(row)
        logger.info("Spectra export to csv files done.")

[docs]    def transitions(
        self,
        transitions: Transitions,
        wavelengths: Bands,
        only_highest=True,
        name_template: Union[str, Template] = "${conf}.${cat}-${det}.${ext}",
    ):
        """Writes electronic transitions data to CSV files (one for each conformer).

        Parameters
        ----------
        transitions : glassware.Transitions
            Electronic transitions data that should be serialized.
        wavelengths : glassware.ElectronicActivities
            Object containing information about wavelength at which transitions occur.
        only_highest : bool
            Specifies if only transition of highest contribution to given band should
            be reported. If ``False`` all transition are saved to file.
            Defaults to ``True``.
        name_template : str or string.Template
            Template that will be used to generate filenames. Refer to
            :meth:`.make_name` documentation for details on supported placeholders.
        """
        transtions_data = (
            transitions.highest_contribution
            if only_highest
            else (
                transitions.ground,
                transitions.excited,
                transitions.values,
                transitions.contribution,
            )
        )
        header = ["wavelength/nm", "ground", "excited", "coefficient", "contribution"]
        template_params = {
            "genre": transitions.genre,
            "cat": "transitions",
            "det": "highest" if only_highest else "all",
        }
        for grounds, exciteds, values, contribs, bands, handle in zip(
            *transtions_data,
            wavelengths.wavelen,
            self._iter_handles(transitions.filenames, name_template, template_params),
        ):
            csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams)
            if self.include_header:
                csvwriter.writerow(header)
            for g, e, v, c, b in zip(grounds, exciteds, values, contribs, bands):
                try:
                    listed = [
                        d
                        for d in zip(repeat(b), g, e, v, c)
                        # omit entry if any value is masked
                        if all(x is not np.ma.masked for x in d)
                    ]
                except TypeError:
                    # transition_data is transitions.highest_contribution
                    listed = [(b, g, e, v, c)]
                for data in listed:
                    csvwriter.writerow(data)