"""Data export to excel files."""
import logging as lgg
from itertools import chain, repeat, zip_longest
from pathlib import Path
from string import Template
from typing import Iterable, List, Optional, Sequence, Union
import numpy as np
import openpyxl as oxl
from ..glassware.arrays import (
Bands,
DataArray,
Energies,
FloatArray,
InfoArray,
SpectralActivities,
SpectralData,
Transitions,
)
from ..glassware.spectra import SingleSpectrum, Spectra
from .writer_base import WriterBase, _GenericArray
# LOGGER
logger = lgg.getLogger(__name__)
logger.setLevel(lgg.DEBUG)
# CLASSES
[docs]class XlsxWriter(WriterBase):
"""Writes extracted data to .xlsx file."""
extension = "xlsx"
def __init__(
self,
destination: Union[str, Path],
mode: str = "x",
filename: str = "tesliper-output.${ext}",
):
"""
Parameters
----------
destination : str or pathlib.Path
Directory, to which generated files should be written.
mode : str
Specifies how writing to file should be handled. Should be one of
characters: 'a' (append to existing file), 'x' (only write if file doesn't
exist yet), or 'w' (overwrite file if it already exists).
filename : str or string.Template
Filename of created .xlsx file or a template for generation of the name
using :meth:`.make_name` method.
"""
super().__init__(destination=destination, mode=mode)
file = self.destination / Template(filename).substitute(ext=self.extension)
self.file = self.check_file(file)
self.manual_save = False
if self.mode == "a":
self.workbook = oxl.load_workbook(self.file)
else:
self.workbook = oxl.Workbook()
self.workbook.remove(self.workbook.active)
[docs] def write(self, data: List) -> None:
self.manual_save, orig_manual = True, self.manual_save
try:
super().write(data)
if not orig_manual:
self.workbook.save(self.file)
finally:
self.manual_save = orig_manual
[docs] def generic(
self,
data: List[_GenericArray],
name_template: Union[str, Template] = "${cat}.${det}",
):
"""Writes generic data from multiple :class:`.DataArray`-like objects to a
single sheet. Said objects should provide a single value for each conformer.
Parameters
----------
data
:class:`.DataArray` objects that are to be exported.
name_template
Template that will be used to generate filenames. Refer to
:meth:`.make_name` documentation for details on supported placeholders.
"""
wb = self.workbook
genres = [arr.genre for arr in data]
headers = ["Gaussian output file"] + [self._header[genre] for genre in genres]
values = [arr.values for arr in data]
lines = list(zip(data[0].filenames, *values))
types = [type(arr).__name__.lower().replace("array", "") for arr in data]
detail = "various" if len(set(types)) > 1 else types[0]
genre = "misc" if len(genres) > 1 else genres[0]
template_params = {
"cat": "generic",
"conf": "multiple",
"det": detail,
"genre": genre,
}
ws = wb.create_sheet(title=self.make_name(name_template, **template_params))
ws.append(headers)
for line in lines:
ws.append(line)
if not self.manual_save:
wb.save(self.file)
logger.info("Generic export to xlsx files done.")
[docs] def overview(
self,
energies: Sequence[Energies],
frequencies: Optional[DataArray] = None,
stoichiometry: Optional[InfoArray] = None,
name_template: Union[str, Template] = "${cat}",
):
"""Writes summarized information from multiple :class:`.Energies` objects to
xlsx file. Creates a worksheet with energy values and calculated populations for
each energy object given, as well as number of imaginary frequencies and
stoichiometry of conformers if *frequencies* and *stoichiometry* are provided,
respectively.
Parameters
----------
energies: list of glassware.Energies
Energies objects that are to be exported
frequencies: glassware.DataArray, optional
DataArray object containing frequencies
stoichiometry: glassware.InfoArray, optional
InfoArray object containing stoichiometry information
name_template : str or string.Template
Template that will be used to generate filenames, defaults to "${cat}".
Refer to :meth:`.make_name` documentation for details on supported
placeholders.
"""
wb = self.workbook
template_params = {"cat": "overview", "conf": "multiple"}
ws = wb.create_sheet(title=self.make_name(name_template, **template_params))
ens_no = len(energies)
headers = ["Gaussian output file", "Populations / %", "Energies / hartree"]
headers += ["Imag"] if frequencies is not None else []
headers += ["Stoichiometry"] if stoichiometry is not None else []
cells = [
"A1",
"B1",
f"{chr(66+ens_no)}1",
f"{chr(66+2*ens_no)}1",
f"{chr(67+2*ens_no)}1",
]
for header, cell in zip(headers, cells):
ws[cell] = header
names = [self._header[en.genre] for en in energies]
ws.append([""] + names + names)
ws.merge_cells("A1:A2")
ws.merge_cells(f"B1:{chr(65+ens_no)}1")
ws.merge_cells(f"{chr(66+ens_no)}1:{chr(65+2*ens_no)}1")
if frequencies is not None or stoichiometry is not None:
ws.merge_cells(f"{chr(66+2*ens_no)}1:{chr(66+2*ens_no)}2")
if frequencies is not None and stoichiometry is not None:
ws.merge_cells(f"{chr(67+2*ens_no)}1:{chr(67+2*ens_no)}2")
ws.freeze_panes = "A3"
filenames = energies[0].filenames
fmts = (
["0"]
+ ["0.00%"] * len(energies)
+ ["0." + "0" * (8 if en.genre == "scf" else 6) for en in energies]
+ ["0", "0"]
)
values = [en.values for en in energies]
populs = [en.populations for en in energies]
imag = frequencies.imaginary if frequencies is not None else []
stoich = stoichiometry.values if stoichiometry is not None else []
rows = zip_longest(filenames, *populs, *values, imag, stoich)
for row_num, values in enumerate(rows):
filtered_values = ((f, v) for f, v in zip(fmts, values) if v is not None)
for col_num, (fmt, value) in enumerate(filtered_values):
cell = ws.cell(row=row_num + 3, column=col_num + 1)
cell.value = value
cell.number_format = fmt
# set cells width
widths = [0] + [10] * ens_no + [16] * ens_no
widths += [6] if frequencies is not None else []
widths += [0] if stoichiometry is not None else []
for column, width in zip(ws.columns, widths):
if not width:
width = max(len(str(cell.value)) for cell in column) + 2
column_letter = oxl.utils.get_column_letter(column[0].column)
ws.column_dimensions[column_letter].width = width
if not self.manual_save:
wb.save(self.file)
logger.info("Overview export to xlsx files done.")
[docs] def energies(
self,
energies: Energies,
corrections: Optional[FloatArray] = None,
name_template: Union[str, Template] = "distribution-${genre}",
):
"""Writes detailed information from multiple :class:`.Energies` objects to xlsx
file. Creates one worksheet for each :class:`.Energies` object provided. The
sheet contains energy values, energy difference to lowest-energy conformer,
Boltzmann factor, population of each conformer and corrections, if those are
provided.
Parameters
----------
energies: list of glassware.Energies
Energies objects that are to be exported
corrections: list of glassware.DataArray
DataArray objects containing energies corrections
name_template : str or string.Template
Template that will be used to generate filenames, defaults to
"distribution-${genre}". Refer to :meth:`.make_name` documentation for
details on supported placeholders.
"""
wb = self.workbook
fmts = (
["0", "0.00%"]
+ ["0.0000"] * 2
+ ["0.00000000" if energies.genre == "scf" else "0.000000"] * 2
)
template_params = {
"conf": "multiple",
"genre": energies.genre,
"cat": "populations",
}
ws = wb.create_sheet(title=self.make_name(name_template, **template_params))
ws.freeze_panes = "A2"
header = [
"Gaussian output file",
"Population / %",
"Min. B. Factor",
"DE / (kcal/mol)",
"Energy / Hartree",
]
header += ["Correction / Hartree"] if corrections is not None else []
ws.append(header)
corr_values = corrections.values if corrections is not None else []
rows = zip_longest(
energies.filenames,
energies.populations,
energies.min_factors,
energies.deltas,
energies.values,
corr_values,
)
for row_num, values in enumerate(rows):
filtered_values = ((f, v) for f, v in zip(fmts, values) if v is not None)
for col_num, (fmt, value) in enumerate(filtered_values):
cell = ws.cell(row=row_num + 2, column=col_num + 1)
cell.value = value
cell.number_format = fmt
# set cells width
widths = [0, 15, 14, 15, 16, 19]
for column, width in zip(ws.columns, widths):
if not width:
width = max(len(str(cell.value)) for cell in column) + 2
column_letter = oxl.utils.get_column_letter(column[0].column)
ws.column_dimensions[column_letter].width = width
if not self.manual_save:
wb.save(self.file)
logger.info("Energies export to xlsx files done.")
[docs] def spectral_data(
self,
band: SpectralActivities,
data: Iterable[SpectralData],
name_template: Union[str, Template] = "${conf}.${cat}-${det}",
):
"""Writes :class:`.SpectralData` objects to xlsx file (one sheet for each
conformer).
Parameters
----------
band: glassware.SpectralActivities
object containing information about band at which transitions occur;
it should be frequencies for vibrational data and wavelengths or
excitation energies for electronic data
data: iterable of glassware.SpectralData
SpectralData objects that are to be serialized; all should contain
information for the same conformers. Assumes that all *data*'s elements have
the same *spectra_type*, which is passed to the *name_template* as "det".
name_template : str or string.Template
Template that will be used to generate filenames, defaults to
"${conf}.${cat}-${det}". Refer to :meth:`.make_name` documentation for
details on supported placeholders.
Raises
------
ValueError
if *data* is an empty sequence
"""
self._spectral(
band=band, data=data, name_template=name_template, category="data"
)
[docs] def spectral_activities(
self,
band: SpectralActivities,
data: Iterable[SpectralActivities],
name_template: Union[str, Template] = "${conf}.${cat}-${det}",
):
"""Writes :class:`.SpectralActivities` objects to xlsx file (one sheet for each
conformer).
Parameters
----------
band: glassware.SpectralActivities
object containing information about band at which transitions occur;
it should be frequencies for vibrational data and wavelengths or
excitation energies for electronic data
data: iterable of glassware.SpectralActivities
SpectralActivities objects that are to be serialized; all should contain
information for the same conformers. Assumes that all *data*'s elements have
the same *spectra_type*, which is passed to the *name_template* as "det".
name_template : str or string.Template
Template that will be used to generate filenames, defaults to
"${conf}.${cat}-${det}". Refer to :meth:`.make_name` documentation for
details on supported placeholders.
Raises
------
ValueError
if *data* is an empty sequence
"""
self._spectral(
band=band, data=data, name_template=name_template, category="activities"
)
def _spectral(
self,
band: SpectralActivities,
data: Union[Iterable[SpectralData], Iterable[SpectralActivities]],
name_template: Union[str, Template],
category: str,
):
"""Writes spectral data to .xlsx file (one sheet for each conformer).
Parameters
----------
band: glassware.SpectralActivities
object containing information about band at which transitions occur;
it should be frequencies for vibrational data and wavelengths or
excitation energies for electronic data
data: iterable of glassware.SpectralActivities or iterable of SpectralData
SpectralActivities or SpectralData objects that are to be serialized; all
should contain information for the same conformers. Assumes that all
*data*'s elements have the same *spectra_type*, which is passed to the
*name_template* as "det".
name_template : str or string.Template
Template that will be used to generate filenames. Refer to
:meth:`.make_name` documentation for details on supported placeholders.
category : str
Category of exported data genres.
Raises
------
ValueError
if *data* is an empty sequence
"""
wb = self.workbook
data = [band] + list(data)
try:
spectra_type = data[1].spectra_type
except IndexError:
raise ValueError("No data to export.")
genres = [bar.genre for bar in data]
headers = [self._header[genre] for genre in genres]
widths = [max(len(h), 10) for h in headers]
fmts = [self._excel_formats[genre] for genre in genres]
values = list(zip(*[bar.values for bar in data]))
template_params = {"genre": band.genre, "cat": category, "det": spectra_type}
for num, (fname, values_) in enumerate(zip(data[0].filenames, values)):
template_params.update({"conf": fname, "num": num})
ws = wb.create_sheet(title=self.make_name(name_template, **template_params))
ws.append(headers)
ws.freeze_panes = "B2"
for column, width in zip(ws.columns, widths):
column_letter = oxl.utils.get_column_letter(column[0].column)
ws.column_dimensions[column_letter].width = width
for col_num, (vals, fmt) in enumerate(zip(values_, fmts)):
for row_num, v in enumerate(vals):
cell = ws.cell(row=row_num + 2, column=col_num + 1)
cell.value = v
cell.number_format = fmt
if not self.manual_save:
wb.save(self.file)
logger.info("SpectralActivities export to xlsx files done.")
[docs] def spectra(
self,
spectra: Spectra,
name_template: Union[str, Template] = "${genre}",
):
"""Writes given spectra collectively to one sheet of xlsx workbook.
Parameters
----------
spectra: glassware.Spectra
Spectra object, that is to be serialized
name_template : str or string.Template
Template that will be used to generate filenames, defaults to "${genre}".
Refer to :meth:`.make_name` documentation for details on supported
placeholders.
"""
wb = self.workbook
template_params = {"genre": spectra.genre, "cat": "spectra"}
ws = wb.create_sheet(title=self.make_name(name_template, **template_params))
ws.freeze_panes = "B2"
A0 = spectra.units["x"]
ws.append([A0] + list(spectra.filenames))
title = (
f"{spectra.genre} calculated with peak width = "
f'{spectra.width} {spectra.units["width"]} and '
f"{spectra.fitting} fitting, shown as "
f'{spectra.units["x"]} vs. {spectra.units["y"]}'
)
ws["A1"].comment = oxl.comments.Comment(title, "Tesliper")
for line in zip(spectra.x, *spectra.y):
ws.append(line)
if not self.manual_save:
wb.save(self.file)
logger.info("Spectra export to xlsx file done.")
[docs] def single_spectrum(
self,
spectrum: SingleSpectrum,
name_template: Union[str, Template] = "${cat}.${genre}-${det}",
):
"""Writes SingleSpectrum object to new sheet of xlsx workbook.
Parameters
----------
spectrum: glassware.SingleSpectrum
spectrum, that is to be serialized
name_template : str or string.Template
Template that will be used to generate sheet names, defaults to
"${cat}.${genre}-${det}". Refer to :meth:`.make_name` documentation for
details on supported placeholders.
"""
# TODO: add comment as in txt export
wb = self.workbook
template_params = {
"genre": spectrum.genre,
"cat": "spectrum",
"det": spectrum.averaged_by,
}
ws = wb.create_sheet(title=self.make_name(name_template, **template_params))
ws.append([spectrum.units["x"], spectrum.units["y"]])
for row in zip(spectrum.x, spectrum.y):
ws.append(row)
if not self.manual_save:
wb.save(self.file)
logger.info("Spectrum export to xlsx files done.")
[docs] def transitions(
self,
transitions: Transitions,
wavelengths: Bands,
only_highest=True,
name_template: Union[str, Template] = "${conf}.${cat}-${det}",
):
"""Writes electronic transitions data to xlsx file
(one sheet for each conformer).
Parameters
----------
transitions : glassware.Transitions
Electronic transitions data that should be serialized.
wavelengths : glassware.ElectronicActivities
Object containing information about wavelength at which transitions occur.
only_highest : bool
Specifies if only transition of highest contribution to given band should
be reported. If ``False`` all transition are saved to file.
Defaults to ``True``.
name_template : str or string.Template
Template that will be used to generate filenames, defaults to
"${conf}.${cat}-${det}". Refer to :meth:`.make_name` documentation for
details on supported placeholders.
"""
transtions_data = (
transitions.highest_contribution
if only_highest
else (
transitions.ground,
transitions.excited,
transitions.values,
transitions.contribution,
)
)
wb = self.workbook
headers = [
self._header[wavelengths.genre],
"Ground",
"Excited",
"Coefficient",
"Contribution",
]
widths = [len(h) for h in headers]
fmts = [self._excel_formats[wavelengths.genre], "0", "0", "0.0000", "0%"]
template_params = {
"genre": transitions.genre,
"cat": "transitions",
"det": "highest" if only_highest else "all",
}
for num, (fname, grounds, exciteds, values, contribs, bands) in enumerate(
zip(
transitions.filenames,
*transtions_data,
wavelengths.wavelen,
)
):
template_params.update({"conf": fname, "num": num})
ws = wb.create_sheet(title=self.make_name(name_template, **template_params))
ws.append(headers)
ws.freeze_panes = "B2"
for column, width in zip(ws.columns, widths):
column_letter = oxl.utils.get_column_letter(column[0].column)
ws.column_dimensions[column_letter].width = width
row_num = 1
for g, e, v, c, b in zip(grounds, exciteds, values, contribs, bands):
try:
values_ = [
# print wavelength value only once
d
for d in zip(chain([b], repeat(None)), g, e, v, c)
# omit entry if any value is masked
if all(x is not np.ma.masked for x in d)
]
except TypeError:
# transition_data is transitions.highest_contribution
values_ = [(b, g, e, v, c)]
for vals in values_:
row_num += 1
for col_num, (v_, fmt) in enumerate(zip(vals, fmts), start=1):
cell = ws.cell(row=row_num, column=col_num)
cell.value = v_
cell.number_format = fmt
if not self.manual_save:
wb.save(self.file)
logger.info("Transitions export to xlsx files done.")