# -*- coding: utf-8 -*-
"""An ad-hoc treatment of orca output files
This should implement a grammar, but currently consists of a number of utility
structures and functions to parse data from the orca output format
Example:
See the tests for more
$ poetry run
Some more details.
Todo:
* Make grammar
* Make classes
* Test the experiments more
* Pass a list of experiments to ignore
* Test the ordering of variables
* Make classes for the order
* Handle split jobs
* Propagate exceptions instead of passing the buck with warnings
* Setup proper logging
* Scrape NIST Web book for spectra, properties https://webbook.nist.gov/cgi/cbook.cgi?ID=C71432&Mask=800#Electronic-Spec
* Document more things (e.g. SP -> Single point calculation a.k.a. energy
calculation)
* Return interesting things
.. _Google Python Style Guide:
http://google.github.io/styleguide/pyguide.html
"""
import wailord.io as waio
import wailord.utils as wau
import numpy as np
import pandas as pd
import itertools as itertt
import re
import os
import textwrap
import pint
import pint_pandas
import warnings
import vg
from pathlib import Path
from functools import reduce
from collections import namedtuple, OrderedDict
from operator import itemgetter
from pandas.api.types import CategoricalDtype
from konfik import Konfik
# Pint setup
PA_ = pint_pandas.PintArray
ureg = pint.UnitRegistry()
pint_pandas.PintType.ureg = ureg
Q_ = ureg.Quantity
ureg.define("kcal_mol = kcal / 6.02214076e+23 = kcm")
inpcart = namedtuple("inpcart", "atype x y z")
orcaout = namedtuple("orcaout", "final_energy fGeom basis filename system spin theory")
ORDERED_THEORY = ["HF", "UHF", "QCISD", "CCSD", "QCISD(T)", "CCSD(T)"]
ORDERED_BASIS = [
"STO-3G",
"3-21G",
"6-31G",
"6-311G",
"6-311G*",
"6-311G**",
"6-311++G**",
"6-311++G(2d,2p)",
"6-311++G(2df,2pd)",
"6-311++G(3df,3pd)",
]
OUT_REGEX = {
"cartesian_coord": re.compile(r"CARTESIAN\s*COORDINATES\s*\(ANGSTROEM\)\s*"),
"final_single_point_e": re.compile(r"(?<=FINAL SINGLE POINT ENERGY)\s*-\d*.?\d*"),
"basis_set": re.compile(r"Orbital\s*basis\s*set\s*information"),
"MDCI": re.compile(r"The\s*Calculated\s*Surface\s*using\s*the\s*MDCI\s*energy\n"),
"MDCI w/o Triples": re.compile(
r"The Calculated Surface using the MDCI energy minus triple correction\s*"
),
"Actual Energy": re.compile(r"The Calculated Surface using the 'Actual Energy'"),
"SCF Energy": re.compile(r"The Calculated Surface using the SCF energy"),
"energy_evals": re.compile(r"There will be\s*\d* energy evaluations"),
"Mulliken": re.compile(r"MULLIKEN ATOMIC CHARGES"),
"Loewdin": re.compile(r"LOEWDIN ATOMIC CHARGES"),
"irSpectrum": re.compile(r"IR SPECTRUM"),
"vpt2trans": re.compile(r"Fundamental transition"),
"Vibrational Frequency": re.compile(r"VIBRATIONAL FREQUENCIES"),
}
# ------------ Refactor
[docs]def parseOut(filename, plotter=False):
"""Handles orca outputs with regex for energy and coordinates"""
intcreg, fsp_ereg, get_basis = itemgetter(
"cartesian_coord", "final_single_point_e", "basis_set"
)(OUT_REGEX)
get_spec = re.compile(r"Number\s*of\s*atoms\s*.*\s*\d*")
with open(filename) as f:
fInp = f.read()
fin_energ = float(fsp_ereg.findall(fInp)[-1].split()[-1]) * ureg.hartree
if plotter == True:
energ = [float(x.split()[-1]) for x in fsp_ereg.findall(fInp)]
num_species = int(get_spec.search(fInp).group(0).split()[-1])
with open(filename) as f:
flines = f.readlines()
allAtoms = []
for linum, line in enumerate(flines):
if get_basis.search(line):
basis = flines[linum + 1].split()[-1]
if intcreg.search(line):
offset = linum + 2
for i in range(num_species):
p = flines[offset + i].split()
myAtom = inpcart(
atype=p[0],
x=float(p[1]) * ureg.angstrom,
y=float(p[2]) * ureg.angstrom,
z=float(p[3]) * ureg.angstrom,
)
allAtoms.append(myAtom)
runinfo = getRunInfo(Path(filename).parent)
if runinfo["spin"] == "spin_01":
spin = "singlet"
elif runinfo["spin"] == "spin_03":
spin = "triplet"
else:
raise (NotImplementedError(f"Not yet implemented {runinfo['spin']}"))
finGeom = []
for i in reversed(range(1, num_species + 1)):
finGeom.append(allAtoms[-i])
# Creates a dictionary of the system H num O num
systr = pd.DataFrame(finGeom).atype.value_counts().to_dict()
# Flattens the dictionary to a list
listdict = list(itertt.chain.from_iterable(systr.items()))
# Flattens the list to a single string
liststr = "".join(map(str, listdict))
oout = orcaout(
final_energy=fin_energ,
fGeom=finGeom,
basis=basis,
filename=filename,
system=liststr,
spin=spin,
theory=runinfo["theory"],
)
if plotter == True:
return oout, energ
else:
return oout
[docs]def get_e(orcaoutdat, basis, system):
"""
This takes in an orcaout data frame and spits out the energy
"""
return orcaoutdat[
(orcaoutdat.basis.isin([basis]) & (orcaoutdat.system.isin([system])))
]["final_energy"].to_list()[0]
[docs]def getBL(dat, x, y, z, indi=[0, 1]):
"""Takes in a data frame of xyz coordinates and uses it to calculate the bond length"""
v1 = np.array(
[dat.x[indi[0]].magnitude, dat.y[indi[0]].magnitude, dat.z[indi[0]].magnitude]
)
v2 = np.array(
[dat.x[indi[1]].magnitude, dat.y[indi[1]].magnitude, dat.z[indi[1]].magnitude]
)
return Q_(vg.euclidean_distance(v1, v2), x[indi[0]].units)
[docs]def getBA(dat, x, y, z, indi=[0, 1, 2]):
"""Takes in a data frame of xyz coordinates and uses it to generate the
plane angle, indices are used such that the first is the relative center"""
v1 = np.array(
[dat.x[indi[0]].magnitude, dat.y[indi[0]].magnitude, dat.z[indi[0]].magnitude]
)
v2 = np.array(
[dat.x[indi[1]].magnitude, dat.y[indi[1]].magnitude, dat.z[indi[1]].magnitude]
)
v3 = np.array(
[dat.x[indi[2]].magnitude, dat.y[indi[2]].magnitude, dat.z[indi[2]].magnitude]
)
v12 = v2 - v1
v13 = v3 - v1
return Q_(vg.angle(v12, v13, units="deg"), "degrees")
[docs]def genEBASet(
rootdir,
deci=3,
latex=False,
full=False,
order_basis=ORDERED_BASIS,
order_theory=ORDERED_THEORY,
):
"""Takes in a Path object, and typically returns bond angles and energies.
Optionally returns a TeX table or a full dataset with the filenames and
geometries. Depreciate this eventually."""
outs = []
for root, dirs, files in os.walk(rootdir.resolve()):
for filename in files:
if "out" in filename and "slurm" not in filename:
outs.append(parseOut(f"{root}/{filename}"))
outdat = pd.DataFrame(data=outs)
basis_type = CategoricalDtype(categories=order_basis, ordered=True)
theory_type = CategoricalDtype(categories=order_theory, ordered=True)
outdat["basis"] = outdat["basis"].astype(basis_type)
outdat["theory"] = outdat["theory"].astype(theory_type)
# print(outdat.basis[0])
# print(pd.DataFrame(outdat.fGeom[0]))
outdat["angle"] = outdat.fGeom.apply(
lambda geom: getBA(
pd.DataFrame(geom),
pd.DataFrame(geom).x,
pd.DataFrame(geom).y,
pd.DataFrame(geom).z,
[0, 1, 2],
)
)
outdat.sort_values(by=["theory", "basis"], ignore_index=True, inplace=True)
outdat.final_energy = outdat.final_energy.apply(
lambda x: np.around(x, decimals=deci)
)
outdat.angle = outdat.angle.apply(lambda x: np.around(x, decimals=deci))
if latex == True:
return outdat.drop(["filename", "fGeom"], axis=1).to_latex(
caption="Calculated systems at all basis sets", index=True
)
elif full == True:
return outdat
else:
outdat.drop(["filename", "fGeom"], axis=1, inplace=True)
return outdat
# ------------- Keep ------------------
[docs]def getRunInfo(runf):
"""Determines the runtime parameters from the output path
The implementation uses an ordered dictionary to ensure that the path
fragments are matched to the correct keys.
Note:
This will only work with wailord experiments at the moment
Args:
run (:obj:`Path`): Runtime output path
Returns:
runinf (:obj:`dict`): A simple unordered dictionary of paramters
"""
runinf = OrderedDict(
{"basis": None, "calc": None, "spin": None, "theory": None, "slug": None}
)
rfparts = runf.parts
for num, od in enumerate(runinf, start=1):
runinf[od] = rfparts[-num]
runinf["basis"] = runinf["basis"].replace("PP", "++").replace("8", "*")
runinf["theory"] = runinf["theory"].replace("_", " ")
return dict(runinf)
[docs]def calc_htst(product, reactant, transition_state, temperature):
"""Calculates the HTST rate constant.
This is calculated as:
.. math::
k^{HTST} = \frac{∏_{i=1 … N}𝜈ᵢ}{∏_{j=1 … N-1}𝜈ⱼ^‡}e^{\frac{-𝛥E}{k_BT}}
Where
* :math:`𝜈ᵢ` are the final/initial state vibrational frequencies and
:math:`𝜈ⱼ^‡` are the vibrational frequencies at the saddle point
* :math:`𝛥E` is the difference between the energy of the state and the
saddle point :math:`𝛥E=V_{saddle}-V_{other}`
It is important to note that the :math:`k_f` and :math:`k_b` are relative
measures and the rate constant is the ratio of the two. In some cases it
might be necessary to weigh the results by the magnitude of 'c'
Args:
product(:obj:`orcaVis`): The product visitor class
reactant(:obj:`orcaVis`): The reactant visitor class
transition_state(:obj:`orcaVis`): The transition state visitor class
temperature(`float`): The temperature
Returns:
kout(:obj:`rateconst`) : Returns a named tuple of forward and backward rates
"""
prod = product.vib_freq()
react = reactant.vib_freq()
ts = transition_state.vib_freq()
ppnum = prod.freq.loc[~(prod.freq <= 0)].to_numpy()
reactnum = react.freq.loc[~(react.freq <= 0)].to_numpy()
tsdenom = ts.freq.loc[~(ts.freq <= 0)].to_numpy()
temp = Q_(temperature, "K")
# FIXME: Should this be the ureg.speed_of_light instead?
conv = Q_(1, "m/s")
preProd = ppnum.prod() / tsdenom.prod()
preReact = reactnum.prod() / tsdenom.prod()
delProd = transition_state.fin_sp_e - product.fin_sp_e
delReact = transition_state.fin_sp_e - reactant.fin_sp_e
prodExp = -1 * (delProd / (ureg.boltzmann_constant * temp))
reactExp = -1 * (delReact / (ureg.boltzmann_constant * temp))
kf = (preProd * conv).to_base_units() * np.exp(prodExp.to_base_units())
kb = (preReact * conv).to_base_units() * np.exp(reactExp.to_base_units())
return (kf, kb)
[docs]class orcaExp:
"""The class meant to handle experiments generated with wailord.
The general concept is that this is meant to work with the setup wailord
generates. Remember to use `df.round()` for pretty printing!
"""
def __init__(
self, expfolder, deci=3, order_basis=ORDERED_BASIS, order_theory=ORDERED_THEORY
):
"""Initializes base parameters
Args:
expfolder (:obj:`Path`): Output path to the generated wailord experiment
order_basis (:obj:`list`, optional): An ordered list for the basis
sets. Defaults to `ORDERED_BASIS`
order_theory (:obj:`list`, optional): An ordered list for the basis
sets. Defaults to `ORDERED_THEORY`. Unlike `order_basis` is can
vary significantly across experiments.
"""
self.inpconf = None #: Populated by `handle_exp`
self.orclist = None #: Populated by `handle_exp`
self.order_basis = order_basis
self.order_theory = order_theory
self.handle_exp(expfolder)
def __repr__(self):
string = f"""
Experiment: {self.inpconf}
Outputs: {self.orclist}
Ordered Theory: {self.order_theory}
Ordered Basis: {self.order_basis}
"""
return textwrap.dedent(string)
[docs] def handle_exp(self, efol):
"""Populates the internal file variables from the path
Alert:
This is *not* meant to be called by the user!!!!
Args:
efol (:obj:`Path`): Output path
"""
fnames = []
self.inpconf = Konfik(config_path=efol / "orca.yml").config
for root, dirs, files in os.walk(efol.resolve()):
for filename in files:
if "out" in filename and "slurm" not in filename:
fnames.append(Path(f"{root}/{filename}"))
self.orclist = fnames
return
[docs] def get_final_sp_energy(self):
"""Returns a datframe of only the final single point energies
Proxies calls to the base orcaVis class over a series of generated files
Args:
None
Returns:
pd.DataFrame: Returns a data frame of final energies
"""
edatl = []
for runf in self.orclist:
runorc = orcaVis(runf).final_sp_e()
edatl.append(runorc)
fe = pd.DataFrame(edatl)
basis_type = CategoricalDtype(categories=self.order_basis, ordered=True)
theory_type = CategoricalDtype(categories=self.order_theory, ordered=True)
fe["basis"] = fe["basis"].astype(basis_type)
fe["theory"] = fe["theory"].astype(theory_type)
fe.sort_values(
by=["theory", "basis", "final_sp_energy"], ignore_index=True, inplace=True
)
return fe
[docs] def get_ir_spec(self):
"""Returns a datframe of the "ir spectrum"
Proxies calls to the base orcaVis class over a series of generated files
Args:
None
Returns:
pd.DataFrame: Returns a data frame of frequencies
"""
vdatl = []
for runf in self.orclist:
runorc = orcaVis(runf).ir_spec()
vdatl.append(runorc)
ve = pd.concat(vdatl, axis=0)
ve = ve.drop_duplicates()
basis_type = CategoricalDtype(categories=self.order_basis, ordered=True)
theory_type = CategoricalDtype(categories=self.order_theory, ordered=True)
ve["basis"] = ve["basis"].astype(basis_type)
ve["theory"] = ve["theory"].astype(theory_type)
ve.sort_values(by=["theory", "basis"], ignore_index=True, inplace=True)
return ve
[docs] def get_vib_freq(self):
"""Returns a datframe of the vibrational modes
Proxies calls to the base orcaVis class over a series of generated files
Args:
None
Returns:
pd.DataFrame: Returns a data frame of frequencies
"""
vdatl = []
for runf in self.orclist:
runorc = orcaVis(runf).vib_freq()
vdatl.append(runorc)
ve = pd.concat(vdatl, axis=0)
ve = ve.drop_duplicates()
basis_type = CategoricalDtype(categories=self.order_basis, ordered=True)
theory_type = CategoricalDtype(categories=self.order_theory, ordered=True)
ve["basis"] = ve["basis"].astype(basis_type)
ve["theory"] = ve["theory"].astype(theory_type)
ve.sort_values(by=["theory", "basis"], ignore_index=True, inplace=True)
return ve
[docs] def get_vpt2_transitions(self):
"""Returns a datframe of the fundamental transitions table
Proxies calls to the base orcaVis class over a series of generated files
Args:
None
Returns:
pd.DataFrame: Returns a data frame of frequencies
"""
vdatl = []
for runf in self.orclist:
runorc = orcaVis(runf).vpt2_transitions()
vdatl.append(runorc)
ve = pd.concat(vdatl, axis=0)
ve = ve.drop_duplicates()
basis_type = CategoricalDtype(categories=self.order_basis, ordered=True)
theory_type = CategoricalDtype(categories=self.order_theory, ordered=True)
ve["basis"] = ve["basis"].astype(basis_type)
ve["theory"] = ve["theory"].astype(theory_type)
ve.sort_values(by=["theory", "basis"], ignore_index=True, inplace=True)
return ve
[docs] def get_energy_surface(self, etype=["Actual Energy", "SCF Energy"]):
"""Populates an energy surface dataframe
This essentially walks over the generated set of files, and fills out
calls to the base orcaVis class.
Args:
etype (:obj:`list` of :obj:`str`, optional): This is passed to the base
`OrcaVis` class call
Returns:
pd.DataFrame: Returns a data frame of etype energies
"""
if type(etype) == str:
etype = [etype]
edatl = []
for runf in self.orclist:
runsurf = orcaVis(runf).mult_energy_surface(etype=etype)
edatl.append(runsurf)
edat = pd.concat(edatl, axis=0)
edat = edat.drop_duplicates()
basis_type = CategoricalDtype(categories=self.order_basis, ordered=True)
theory_type = CategoricalDtype(categories=self.order_theory, ordered=True)
edat["basis"] = edat["basis"].astype(basis_type)
edat["theory"] = edat["theory"].astype(theory_type)
edat.sort_values(
by=["theory", "basis", "bond_length"], ignore_index=True, inplace=True
)
return edat
[docs] def get_population(self, poptype=["Mulliken", "Loewdin"], /):
"""Populates a population dataframe
This essentially walks over the generated set of files, and fills out
calls to the base orcaVis class.
Args:
poptype (:obj:`list` of :obj:`str`, optional): This is passed to the base
`OrcaVis` class call
Returns:
pd.DataFrame: Returns a data frame of etype energies
"""
if type(poptype) == str:
poptype = [poptype]
popdatl = []
for runf in self.orclist:
runsurf = orcaVis(runf).mult_population_analysis(poptype)
popdatl.append(runsurf)
popdat = pd.concat(popdatl, axis=0)
popdat = popdat.drop_duplicates()
basis_type = CategoricalDtype(categories=self.order_basis, ordered=True)
theory_type = CategoricalDtype(categories=self.order_theory, ordered=True)
popdat["basis"] = popdat["basis"].astype(basis_type)
popdat["theory"] = popdat["theory"].astype(theory_type)
popdat.sort_values(by=["theory", "basis"], ignore_index=True, inplace=True)
return popdat
[docs] def visit_coord_block(self, node, visited_children):
""" Makes a dict of the section (as key) and the key/value pairs. """
cb = node.text.split("\n")
for i, aline in enumerate(cb):
each = aline.split()
cb[i] = " ".join(each)
self.coord_block = "\n".join(cb)
# Could have also just returned and assigned node.text
return node.text
[docs]class orcaVis:
"""The class meant to handle ORCA output files.
Todo:
* Add a grammar and recursive descent later
"""
def __init__(self, ofile):
"""Output file initialization.
This is meant to return base objects to the experiment level class.
Note:
Do not include the `self` parameter in the ``Args`` section.
Args:
ofile (str): The output file generated by ORCA.
eeval (int): The number of energy evaluations
"""
self.eeval = None
self.ofile = ofile
self.runinfo = getRunInfo(self.ofile.parent)
self.fin_sp_e = None
self.get_evals(self.ofile)
self.get_final_e()
def __repr__(self):
return f"{self.ofile}"
[docs] def get_evals(self, ofile):
with open(self.ofile) as of:
flines = of.readlines()
for line in flines:
if OUT_REGEX["energy_evals"].search(line):
self.eeval = int(line.split()[3])
return
[docs] def get_final_e(self, dat=False):
with open(self.ofile) as of:
fInp = of.read()
try:
self.fin_sp_e = (
float(
OUT_REGEX["final_single_point_e"].findall(fInp)[-1].split()[-1]
)
* ureg.hartree
)
except:
raise (
ValueError(f"Final single point energy not found for {self.ofile}")
)
pass
[docs] def final_sp_e(self):
erow = self.runinfo
erow["final_sp_energy"] = self.fin_sp_e.m
erow["unit"] = self.fin_sp_e.u
return erow
[docs] def mult_energy_surface(
self,
etype=["Actual Energy", "MDCI", "MDCI w/o Triples", "SCF Energy"],
npoints=None,
):
"""Multiple Energy surface dataframe generator
This is a helper function to obtain a dataframe which contains multiple
energy surfaces. The implementation leverages the `reduce` function from
`functools` to merge a list of dataframes generated from the
`single_energy_surface` calls.
Args:
etype (str,optional): The type of calculated energy surface to
return. Defaults to `["Actual Energy", "MDCI", "MDCI w/o Triples",
"SCF Energy"]` but can be any valid subset of the same.
npoints (int,optional): The number of points over which a scan has
taken place. Defaults to the number of evaluations calculated in
the output file.
Returns:
pd.DataFrame: Returns a data frame of bond_length and energies
.. _MDCI:
https://www.its.hku.hk/services/research/hpc/software/orca
"""
if isinstance(etype, str) or len(etype) == 1:
if isinstance(etype, list):
etype = etype[0]
single = self.single_energy_surface(
etype=etype
) #: Short circuit if single type is requested
for key in self.runinfo.keys():
single[key] = self.runinfo[key]
return single
elist = []
for et in etype:
runsurf = self.single_energy_surface(etype=et)
elist.append(runsurf)
eDat_all = reduce(lambda df1, df2: pd.merge(df1, df2, on="bond_length"), elist)
for key in self.runinfo.keys():
eDat_all[key] = self.runinfo[key]
return eDat_all
[docs] def single_energy_surface(self, etype="Actual Energy", npoints=None):
"""Single energy surface dataframe generator
For say, QCISD(T), this is essentially the same as a QCISD calculation.
Note:
`MDCI`_ types are meant to work with single reference correlation
methods
Args:
etype (str,optional): The type of calculated energy surface to
return. Defaults to 'Actual Energy' and can be any of `["Actual Energy", "MDCI", "MDCI w/o Triples", "SCF Energy"]`
npoints (int,optional): The number of points over which a scan has
taken place. Defaults to the number of evaluations calculated in
the output file.
Returns:
pd.DataFrame: Returns a data frame of energy surfaces
.. _MDCI:
https://www.its.hku.hk/services/research/hpc/software/orca
"""
if etype not in OUT_REGEX:
raise (NotImplementedError(f"{etype} has not been implemented yet"))
if npoints == None:
npoints = self.eeval
xaxis = []
yaxis = []
sregexp = OUT_REGEX[etype]
with open(self.ofile) as of:
flines = of.readlines()
for lnum, line in enumerate(flines):
if sregexp.search(line):
offset = lnum + 1
for i in range(npoints):
x, y = flines[offset + i].split()
xaxis.append(x)
yaxis.append(y)
edat = pd.DataFrame(
data=zip(xaxis, yaxis), columns=["bond_length", etype], dtype="float64"
)
if edat.empty:
raise (
ValueError(f"{etype} surface not found for {self.runinfo['theory']}")
)
return edat
[docs] def vib_freq(self):
"""Get the vibrational frequencies, and fails if there are more than one
imaginary frequency"""
sregexp = OUT_REGEX["Vibrational Frequency"]
vline = namedtuple("vline", "Mode freq imaginary")
accumulate = []
with open(self.ofile) as of:
flines = of.readlines()
for lnum, line in enumerate(flines):
if sregexp.search(line):
offset = lnum + 5
i = 0
while flines[offset + i] != "\n":
raw = flines[offset + i].split()
if len(raw) == 3:
v = vline(
Mode=int(raw[0].replace(":", "")),
freq=float(raw[1]),
imaginary=False,
)
elif len(raw) == 5:
v = vline(
Mode=int(raw[0].replace(":", "")),
freq=float(raw[1]),
imaginary=True,
)
accumulate.append(v)
i = i + 1
vdat = pd.DataFrame(accumulate)
vdat["freq"] = vdat["freq"].astype("pint[cm_1]")
# TODO: Add experiment layer
# TODO: Error if more than one imaginary
# Check if greater 100 then it is not numerical
# if vdat.empty:
# raise (
# ValueError(
# f"Spectra not found for {self.runinfo['theory']}, did you run FREQ?"
# )
# )
# elif vdat.imaginary.value_counts()[1] > 1:
# warnings.warn(
# f"More than one imaginary mode, you did not find a saddle point",
# UserWarning,
# )
# elif vdat.imaginary.value_counts()[1] == 0:
# raise (
# ValueError(
# "No imaginary frequencies found, check the geometry of reactant and product configurations"
# )
# )
return vdat
[docs] def vpt2_transitions(self):
"""Grabs the fundamental transition analysis from a VPT2 calculation"""
sregexp = OUT_REGEX["vpt2trans"]
vline = namedtuple("vline", "Mode harmonic_freq vpt2_freq freq_diff")
accumulate = []
with open(self.ofile) as of:
flines = of.readlines()
for lnum, line in enumerate(flines):
if sregexp.search(line):
offset = lnum + 4
i = 0
while "---" not in flines[offset + i]:
raw = flines[offset + i].split()
v = vline(
Mode=int(raw[0]),
harmonic_freq=float(raw[1]),
vpt2_freq=float(raw[2]),
freq_diff=float(raw[3]),
)
accumulate.append(v)
i = i + 1
vdat = pd.DataFrame(accumulate)
vdat["harmonic_freq"] = vdat["harmonic_freq"].astype("pint[cm_1]")
vdat["vpt2_freq"] = vdat["vpt2_freq"].astype("pint[cm_1]")
vdat["freq_diff"] = vdat["freq_diff"].astype("pint[cm_1]")
if vdat.empty:
raise (
ValueError(
f"Data not found for {self.runinfo['theory']}, did you run VPT2?"
)
)
else:
for key in self.runinfo.keys():
vdat[key] = self.runinfo[key]
return vdat
[docs] def ir_spec(self):
"""Grabs the non-ZPE corrected IR Spectra and the dipole derivatives for
intensities"""
sregexp = OUT_REGEX["irSpectrum"]
vline = namedtuple("vline", "Mode freq T2 TX TY TZ")
accumulate = []
with open(self.ofile) as of:
flines = of.readlines()
for lnum, line in enumerate(flines):
if sregexp.search(line):
offset = lnum + 5
i = 0
while flines[offset + i] != "\n":
raw = flines[offset + i].split()
raw = [i for i in raw if i != ":" if i != "(" if i != ")"]
v = vline(
Mode=int(raw[0].replace(":", "")),
freq=float(raw[1]),
T2=float(raw[2]),
TX=float(raw[3].replace("(", "")),
TY=float(raw[4]),
TZ=float(raw[5].replace(")", "")),
)
accumulate.append(v)
i = i + 1
vdat = pd.DataFrame(accumulate)
vdat["T2"] = vdat["T2"].astype("pint[km/mol]")
vdat["freq"] = vdat["freq"].astype("pint[cm_1]")
if vdat.empty:
raise (
ValueError(
f"Spectra not found for {self.runinfo['theory']}, did you run FREQ?"
)
)
else:
for key in self.runinfo.keys():
vdat[key] = self.runinfo[key]
return vdat
[docs] def single_population_analysis(self, poptype="Mulliken", /):
"""Single population analysis dataframe generator
Args:
poptype (str,optional): The type of population analysis to
return. Defaults to 'Mulliken'.
Returns:
pd.DataFrame: Returns a data frame of the population analysis
"""
if poptype not in OUT_REGEX:
raise (NotImplementedError(f"{poptype} has not been implemented yet"))
sregexp = OUT_REGEX[poptype]
chargeline = namedtuple("chargeline", "anum atype pcharge")
fulline = namedtuple("fulline", "anum atype pcharge pspin")
accumulate = []
with open(self.ofile) as of:
flines = of.readlines()
for lnum, line in enumerate(flines):
if sregexp.search(line):
offset = lnum + 2
i = 0
while (
"Sum" not in flines[offset + i]
and "--" not in flines[offset + i + 1]
):
raw = flines[offset + i].split()
if "SPIN" in line:
c = fulline(
anum=raw[0],
atype=raw[1],
pcharge=float(raw[-2]),
pspin=float(raw[-1]),
)
else:
c = chargeline(
anum=raw[0],
atype=raw[1],
pcharge=float(raw[-1]),
)
accumulate.append(c)
i = i + 1
popdat = pd.DataFrame(accumulate)
step = popdat.anum.count() / popdat.anum.nunique()
popdat["step"] = np.asarray(
np.repeat(np.arange(1, step + 1), popdat.anum.nunique()), dtype=int
)
popdat["population"] = poptype
if popdat.empty:
raise (ValueError(f"{poptype} not found for {self.runinfo['theory']}"))
return popdat
[docs] def mult_population_analysis(self, poptype=["Mulliken", "Loewdin"], /):
"""Multiple population analysis dataframe generator
This is a helper function to obtain a dataframe which contains multiple
population analysis outputs. The implementation is similar to the energy surface helper.
Args:
poptype (str,optional): The type of calculated energy surface to
return.
Returns:
pd.DataFrame: Returns a data frame of population analysis types
"""
if isinstance(poptype, str) or len(poptype) == 1:
if isinstance(poptype, list):
poptype = poptype[0]
single = self.single_population_surface(poptype)
for key in self.runinfo.keys():
single[key] = self.runinfo[key]
return single
poplist = []
for pt in poptype:
runsurf = self.single_population_analysis(pt)
poplist.append(runsurf)
popdat = reduce(lambda df1, df2: pd.concat([df1, df2]), poplist)
for key in self.runinfo.keys():
popdat[key] = self.runinfo[key]
return popdat