Source code for tesliper.writing.csv_writer

"""Data export to CSV format."""
import csv
import logging as lgg
from contextlib import contextmanager
from itertools import repeat, zip_longest
from pathlib import Path
from string import Template
from typing import IO, Any, AnyStr, Dict, Iterable, Iterator, List, Optional, Union

import numpy as np

from ..glassware.arrays import (
    Bands,
    Energies,
    FloatArray,
    SpectralActivities,
    SpectralData,
    Transitions,
)
from ..glassware.spectra import SingleSpectrum, Spectra
from .writer_base import WriterBase, _GenericArray

# LOGGER
logger = lgg.getLogger(__name__)
logger.setLevel(lgg.DEBUG)


class _CsvMixin:
    """Mixin class for csv writers.

    This class takes care of setting up format of produced csv files.
    It should be used as a first base class to ensure proper cooperation with other
    base classes. It will pass all given *args and **kwargs to the next base class
    in MRO.
    """

    _known_fmt_params = {
        "delimiter",
        "doublequote",
        "escapechar",
        "lineterminator",
        "quotechar",
        "quoting",
        "skipinitialspace",
        "strict",
    }

    def __init__(
        self,
        *args,
        dialect: Union[str, csv.Dialect] = "excel",
        fmtparams: Optional[Dict] = None,
        include_header: bool = True,
        **kwargs,
    ):
        """
        Parameters
        ----------
        dialect: str or csv.Dialect
            Name of a dialect or :class:`csv.Dialect` object, which will be used by
            :class:`.csv.writer`.
        fmtparams: dict, optional
            Additional formatting parameters for :class:`.csv.writer` to use.
            For list of valid parameters consult :class:`csv.Dialect` documentation.
        include_header: bool, optional
            Determines if file should contain a header with column names, ``True`` by
            default.
        """
        self.dialect = dialect
        self.fmtparams = fmtparams or {}
        self.include_header = include_header
        super().__init__(*args, **kwargs)

    @property
    def dialect(self):
        """Name of a dialect (as string) or csv.Dialect object,
        which will be used by csv.writer.
        """
        return self._dialect

    @dialect.setter
    def dialect(self, dialect: Union[str, csv.Dialect]):
        self._dialect = (
            dialect if isinstance(dialect, csv.Dialect) else csv.get_dialect(dialect)
        )

    @property
    def fmtparams(self):
        """Dict of additional formatting parameters for csv.writer to use.
        For list of valid parameters consult csv.Dialect documentation.

        Raises
        ------
        TypeError
            if invalid parameter is given
        """
        return self._fmtparams

    @fmtparams.setter
    def fmtparams(self, params: Dict):
        for param in params.keys():
            if param not in self._known_fmt_params:
                raise TypeError(f"'{param}' is an invalid csv formatting parameter")
        self._fmtparams = params


# CLASSES
[docs]class CsvWriter(_CsvMixin, WriterBase): """Writes extracted or calculated data to .csv format files.""" extension = "csv" def __init__( self, destination: Union[str, Path], mode: str = "x", include_header: bool = True, dialect: Union[str, csv.Dialect] = "excel", **fmtparams, ): """ Parameters ---------- destination: str or pathlib.Path Directory, to which generated files should be written. mode: str Specifies how writing to file should be handled. Should be one of characters: 'a' (append to existing file), 'x' (only write if file doesn't exist yet), or 'w' (overwrite file if it already exists). include_header: bool, optional Determines if file should contain a header with column names, ``True`` by default. dialect: str or csv.Dialect Name of a dialect or :class:`csv.Dialect` object, which will be used by underlying :class:`csv.writer`. fmtparams: dict, optional Additional formatting parameters for underlying csv.writer to use. For list of valid parameters consult :class:`csv.Dialect` documentation. """ super().__init__( destination=destination, mode=mode, dialect=dialect, fmtparams=fmtparams, include_header=include_header, )
[docs] @contextmanager def _get_handle( self, template: Union[str, Template], template_params: dict, open_params: Optional[dict] = None, ) -> Iterator[IO[AnyStr]]: open_params = open_params or {"newline": ""} with super()._get_handle(template, template_params, open_params) as handle: yield handle
[docs] def _iter_handles( self, filenames: Iterable[str], template: Union[str, Template], template_params: dict, open_params: Optional[dict] = None, ) -> Iterator[IO[AnyStr]]: open_params = open_params or {"newline": ""} yield from super()._iter_handles( filenames, template, template_params, open_params )
[docs] def generic( self, data: List[_GenericArray], name_template: Union[str, Template] = "${cat}.${det}.${ext}", ): """Writes generic data from multiple :class:`.DataArray`-like objects to a single file. Said objects should provide a single value for each conformer. Parameters ---------- data :class:`.DataArray` objects that are to be exported. name_template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. """ genres = [arr.genre for arr in data] header = ["Gaussian output file"] + [self._header[genre] for genre in genres] values = [arr.values for arr in data] rows = zip(data[0].filenames, *values) types = [type(arr).__name__.lower().replace("array", "") for arr in data] detail = "various" if len(set(types)) > 1 else types[0] genre = "misc" if len(genres) > 1 else genres[0] template_params = { "cat": "generic", "conf": "multiple", "det": detail, "genre": genre, } with self._get_handle(name_template, template_params) as handle: csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams) if self.include_header: csvwriter.writerow(header) for row in rows: csvwriter.writerow(row)
[docs] def energies( self, energies: Energies, corrections: Optional[FloatArray] = None, name_template: Union[str, Template] = "distribution-${genre}.${ext}", ): """Writes Energies object to csv file. The output also contains derived values: populations, min_factors, deltas. Corrections are added only when explicitly given. Parameters ---------- energies: glassware.Energies Energies objects that is to be serialized corrections: glassware.DataArray, optional DataArray objects containing energies corrections name_template : str or string.Template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. """ header = ["Gaussian output file"] header += "population min_factor delta energy".split(" ") if corrections is not None: header += ["corrections"] corr = corrections.values else: corr = [] rows = zip_longest( energies.filenames, energies.populations, energies.min_factors, energies.deltas, energies.values, corr, ) template_params = { "conf": "multiple", "genre": energies.genre, "cat": "populations", } with self._get_handle(name_template, template_params) as handle: csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams) if self.include_header: csvwriter.writerow(header) for row in rows: csvwriter.writerow(v for v in row if v is not None) logger.info("Energies export to csv files done.")
def _energies_handler(self, data: List[Energies], extras: Dict[str, Any]) -> None: # TODO: return to WriterBase's implementation when `.overview()` added for en in data: self.energies( en, corrections=extras.get("corrections", dict()).get(en.genre) )
[docs] def single_spectrum( self, spectrum: SingleSpectrum, name_template: Union[str, Template] = "${cat}.${genre}-${det}.${ext}", ): """Writes SingleSpectrum object to csv file. Parameters ---------- spectrum: glassware.SingleSpectrum spectrum, that is to be serialized name_template : str or string.Template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. """ template_params = { "genre": spectrum.genre, "cat": "spectrum", "det": spectrum.averaged_by, } with self._get_handle(name_template, template_params) as handle: csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams) if self.include_header: csvwriter.writerow([spectrum.units["y"], spectrum.units["x"]]) for row in zip(spectrum.x, spectrum.y): csvwriter.writerow(row) logger.info("Spectrum export to csv files done.")
[docs] def spectral_activities( self, band: SpectralActivities, data: List[SpectralActivities], name_template: Union[str, Template] = "${conf}.${cat}-${det}.${ext}", ): """Writes SpectralActivities objects to csv files (one file for each conformer). Parameters ---------- band: glassware.SpectralActivities Object containing information about band at which transitions occur; it should be frequencies for vibrational data and wavelengths or excitation energies for electronic data. data: list of glassware.SpectralActivities SpectralActivities objects that are to be serialized; all should contain information for the same set of conformers and correspond to given band. Assumes that all *data*'s elements have the same *spectra_type*, which is passed to the *name_template* as "det". name_template : str or string.Template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. Raises ------ ValueError if *data* is an empty sequence """ self._spectral( band=band, data=data, name_template=name_template, category="activities", )
[docs] def spectral_data( self, band: SpectralData, data: List[SpectralData], name_template: Union[str, Template] = "${conf}.${cat}-${det}.${ext}", ): """Writes SpectralData objects to csv files (one file for each conformer). Parameters ---------- band: glassware.SpectralData Object containing information about band at which transitions occur; it should be frequencies for vibrational data and wavelengths or excitation energies for electronic data. data: list of glassware.SpectralData SpectralData objects that are to be serialized; all should contain information for the same set of conformers and correspond to given band. Assumes that all *data*'s elements have the same *spectra_type*, which is passed to the *name_template* as "det". name_template : str or string.Template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. Raises ------ ValueError if *data* is an empty sequence """ self._spectral( band=band, data=data, name_template=name_template, category="data" )
def _spectral( self, band: SpectralActivities, data: Union[List[SpectralData], List[SpectralActivities]], name_template: Union[str, Template], category: str, ): """Writes SpectralData objects to csv files (one file for each conformer). Parameters ---------- band: glassware.SpectralData Object containing information about band at which transitions occur; it should be frequencies for vibrational data and wavelengths or excitation energies for electronic data. data: list of glassware.SpectralData SpectralData objects that are to be serialized; all should contain information for the same set of conformers and correspond to given band. Assumes that all *data*'s elements have the same *spectra_type*, which is passed to the *name_template* as "det". name_template : str or string.Template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. category : str category of exported data genres Raises ------ ValueError if *data* is an empty sequence """ try: spectra_type = data[0].spectra_type except IndexError: raise ValueError("No data to export.") data = [band] + data headers = [self._header[bar.genre] for bar in data] values = zip(*[bar.values for bar in data]) template_params = {"genre": band.genre, "cat": category, "det": spectra_type} for values_, handle in zip( values, self._iter_handles(band.filenames, name_template, template_params), ): csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams) if self.include_header: csvwriter.writerow(headers) for row in zip(*values_): csvwriter.writerow(row) logger.info(f"{category.title()} export to csv files done.")
[docs] def spectra( self, spectra: Spectra, name_template: Union[str, Template] = "${conf}.${genre}.${ext}", ): """Writes Spectra object to .csv files (one file for each conformer). Parameters ---------- spectra: glassware.Spectra Spectra object, that is to be serialized. name_template : str or string.Template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. """ abscissa = spectra.x header = [spectra.units["y"], spectra.units["x"]] template_params = {"genre": spectra.genre, "cat": "spectra"} for values, handle in zip( spectra.y, self._iter_handles(spectra.filenames, name_template, template_params), ): csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams) if self.include_header: csvwriter.writerow(header) for row in zip(abscissa, values): csvwriter.writerow(row) logger.info("Spectra export to csv files done.")
[docs] def transitions( self, transitions: Transitions, wavelengths: Bands, only_highest=True, name_template: Union[str, Template] = "${conf}.${cat}-${det}.${ext}", ): """Writes electronic transitions data to CSV files (one for each conformer). Parameters ---------- transitions : glassware.Transitions Electronic transitions data that should be serialized. wavelengths : glassware.ElectronicActivities Object containing information about wavelength at which transitions occur. only_highest : bool Specifies if only transition of highest contribution to given band should be reported. If ``False`` all transition are saved to file. Defaults to ``True``. name_template : str or string.Template Template that will be used to generate filenames. Refer to :meth:`.make_name` documentation for details on supported placeholders. """ transtions_data = ( transitions.highest_contribution if only_highest else ( transitions.ground, transitions.excited, transitions.values, transitions.contribution, ) ) header = ["wavelength/nm", "ground", "excited", "coefficient", "contribution"] template_params = { "genre": transitions.genre, "cat": "transitions", "det": "highest" if only_highest else "all", } for grounds, exciteds, values, contribs, bands, handle in zip( *transtions_data, wavelengths.wavelen, self._iter_handles(transitions.filenames, name_template, template_params), ): csvwriter = csv.writer(handle, dialect=self.dialect, **self.fmtparams) if self.include_header: csvwriter.writerow(header) for g, e, v, c, b in zip(grounds, exciteds, values, contribs, bands): try: listed = [ d for d in zip(repeat(b), g, e, v, c) # omit entry if any value is masked if all(x is not np.ma.masked for x in d) ] except TypeError: # transition_data is transitions.highest_contribution listed = [(b, g, e, v, c)] for data in listed: csvwriter.writerow(data)