gitlab | materialsmodeling.org
Source code for strongcoca.response.nwchem

import logging
import os
from collections import OrderedDict
from typing import Any, Dict, IO, List, Union

import numpy as np
from ase import Atom, Atoms
from ase.calculators.singlepoint import (SinglePointDFTCalculator,
                                         SinglePointKPoint)

from ..units import eV_to_au, au_to_eV, au_to_eVA
from ..types import Path_str, Vector
from .casida import CasidaResponse
from .utilities import Broadening, NoArtificialBroadening


logger = logging.getLogger(__name__)



[docs]
def read_nwchem_casida(filename: Path_str,
                       broadening: Broadening = NoArtificialBroadening(),
                       name: str = 'NWChem Casida Response') -> CasidaResponse:
    """Returns a freshly initialized :class:`~strongcoca.response.CasidaResponse` object
    using an NWChem output file to set up the components of the Casida representation.

    Parameters
    ----------
    filename
        Name of NWChem output file; can be compressed.
    broadening
        Artificial broadening used for the continuous response;
        defaults to no broadening.
    name
        Name of subsystem.
    """
    logger.debug(f'Entering read_nwchem_casida: filename= {filename}')
    try:
        atoms = _read_nwchem_logfile(filename)
    except Exception as e:
        raise Exception(f'Failed to parse NWChem output file {filename}\n{str(e)}')
    if 'excited_states' not in atoms.info or len(atoms.info['excited_states']) is False:
        raise ValueError(f'NWChem output file {filename} does'
                         ' not contain excited states information')

    # include only singlet excitations
    excited_states = atoms.info['excited_states']
    excited_states = [e for e in excited_states if e['type'] == 'singlet']
    n = len(excited_states)

    D_n = np.zeros(n)
    K_nn = np.zeros((n, n))
    mu_nv = np.zeros((n, 3))

    for i, state in enumerate(excited_states):
        D_n[i] = state['energy'] * eV_to_au
        mu_nv[i, :] = state['dipole_moment']

    response = CasidaResponse(D_n, K_nn, mu_nv, broadening=broadening,
                              name=name, units='au')
    response._atoms = atoms
    response._pbc = atoms.pbc
    return response



def _read_nwchem_logfile(filename: Path_str) -> Atoms:
    """This function parses the log file from a NWChem calculation and stores the
    data as an :class`ASE Atoms <~ase.Atoms>` object. Each element of the list
    corresponds to one ionic step. Additional information pertaining e.g., to
    eigenvalues or excitations are stored in the 'info' dictionary of the ASE
    atoms class.

    Parameters
    ----------
    filename
        Input file name.

    Returns
    -------
    Atomic configuration with associated information.
    """
    import gzip
    # GzipFile doesn't appear as IO type. If it does at some point,
    # it should suffice to do just
    # fd: IO
    fd: Union[IO, gzip.GzipFile]

    filename = os.path.expanduser(filename)
    if filename.endswith('.gz'):
        fd = gzip.open(filename)
    elif filename.endswith('.bz2'):
        import bz2
        fd = bz2.open(filename, 'rb')
    else:
        fd = open(filename, 'rb')
    data = [row.decode() if isinstance(row, bytes) else row for row in fd]

    atoms = Atoms(pbc=False)
    energy: float = np.nan
    forces: List[Vector] = []
    eKS_energies: List[float] = []
    eKS_occupations: List[float] = []
    parameters: Dict[str, Any] = OrderedDict()

    for n, line in enumerate(data):

        if 'echo of input deck' in line:
            input_deck = []
            k = 1
            while '=====================' not in data[n+k]:
                line = data[n+k].strip()
                k += 1
                if len(line) == 0:
                    continue
                input_deck.append(line)
            parameters['input_deck'] = input_deck

        # Computational parameters
        elif 'XC Information' in line:
            parameters['xc'] = data[n+2].strip()

        elif 'Charge           :' in line:
            flds = line.split()
            assert len(flds) == 3, \
                f'NWChem parser failed to read charge from line {n+1}\n --> {line}'
            parameters['charge'] = float(flds[2])

        elif 'Spin multiplicity:' in line:
            flds = line.split()
            assert len(flds) == 3, \
                f'NWChem parser failed to read spin multiplicity from line {n+1}\n --> {line}'
            parameters['spin_multiplicity'] = int(flds[2])

        # Atomic configuration
        elif 'Output coordinates in angstroms' in line:
            atoms = Atoms(pbc=False)
            k = 4
            while len(data[n+k].strip()) > 0:
                flds = data[n+k].split()
                assert len(flds) == 6, \
                    'NWChem parser failed to read atomic number and' \
                    f' coordinates from line {n+k+1}\n --> {data[n+k]}'
                atom_type = flds[1]
                position = np.array([float(flds[3]), float(flds[4]), float(flds[5])])
                atoms.append(Atom(atom_type, position))
                k += 1

        # Total energy and initialization of Atoms object
        elif 'Total DFT energy' in line:
            flds = line.split()
            assert len(flds) == 5, \
                f'NWChem parser failed to read energy from line {n+1}\n --> {line}'
            energy = float(flds[4]) * au_to_eV

        # Forces
        elif 'DFT ENERGY GRADIENTS' in line:
            k = 4
            forces = []
            while len(data[n+k].strip()) > 0:
                flds = data[n+k].split()
                assert len(flds) == 8, \
                    f'NWChem parser failed to read force vector from line {n+k}\n --> {data[n+k]}'
                force = np.array([float(flds[5]), float(flds[6]), float(flds[7])])
                force *= au_to_eVA
                forces.append(force)
                k += 1

        # Kohn-Sham eigen energies
        elif 'DFT Final Molecular Orbital Analysis' in line:
            eKS_energies = []
            eKS_occupations = []

        elif 'Vector' in line and 'Occ' in line:
            s = line
            s = s.replace('Vector', ' ')
            s = s.replace('Occ=', ' ')
            s = s.replace('E=', ' ')
            s = s.replace('D+', 'E+').replace('D-', 'E-')
            flds = s.split()
            assert len(flds) >= 3, \
                f'NWChem parser failed to read KS eigenstate from line {n+1}\n --> {line}'
            eKS_occupations.append(float(flds[1]))
            eKS_energies.append(float(flds[2]) * au_to_eV)

        # Excitations
        elif 'NWChem TDDFT Module' in line:
            maintag = 'excited_states'
            atoms.info[maintag] = []

        elif 'Root' in line:
            flds = line.replace('=', ' ').split()
            assert len(flds) > 5, \
                f'NWChem parser failed to read Casida root from line {n+1}\n --> {line}'

            maintag = 'excited_states'
            if maintag not in atoms.info:
                continue

            transition: Dict[str, Any] = {}
            atoms.info[maintag].append(transition)

            # basic information (energy, type, symmetry, ...)
            transition['type'] = flds[2]
            transition['symmetry'] = flds[3]
            transition['energy'] = float(flds[4]) * au_to_eV

            transition['dipole_moment'] = np.zeros(3)
            transition['quadrupole_moment'] = np.zeros((3, 3))

            if 'Spin forbidden' in data[n+2]:
                continue

            # transition moment, dipole
            lineno = n + 2
            flds = data[lineno].split()
            assert len(flds) == 8, \
                'NWChem parser failed to read transition dipole moment' \
                f' from line {lineno+1}\n --> {data[lineno]}'
            transition['dipole_moment'][0] = float(flds[3])
            transition['dipole_moment'][1] = float(flds[5])
            transition['dipole_moment'][2] = float(flds[7])

            # transition moment, quadrupole
            lineno = n + 3
            flds = data[lineno].split()
            assert len(flds) == 8, \
                'NWChem parser failed to read transition quadrupole moment' \
                f' from line {lineno+1}\n --> {data[lineno]} (1)'
            transition['quadrupole_moment'][0, 0] = float(flds[3])
            transition['quadrupole_moment'][0, 1] = float(flds[5])
            transition['quadrupole_moment'][0, 2] = float(flds[7])
            lineno = n + 4
            flds = data[lineno].split()
            assert len(flds) == 8, \
                'NWChem parser failed to read transition quadrupole moment' \
                f' from line {lineno+1}\n --> {data[lineno]} (2)'
            transition['quadrupole_moment'][1, 1] = float(flds[3])
            transition['quadrupole_moment'][1, 2] = float(flds[5])
            transition['quadrupole_moment'][2, 2] = float(flds[7])
            transition['quadrupole_moment'][1, 0] = transition['quadrupole_moment'][0, 1]
            transition['quadrupole_moment'][2, 0] = transition['quadrupole_moment'][0, 2]
            transition['quadrupole_moment'][2, 1] = transition['quadrupole_moment'][1, 2]

        # Parsing Hessian from 'frequencies' run
        elif 'P.Frequency' in line:
            maintag = 'normal_modes'
            atoms.info[maintag] = {}
            offset = 0
            if len(atoms.info[maintag].keys()) > 0:
                offset = sorted(atoms.info[maintag].keys())[-1]

            flds = line.split()
            for m, fld in enumerate(flds[1:]):
                mode = offset + m + 1
                atoms.info[maintag][mode] = {}
                atoms.info[maintag][mode]['frequency'] = float(fld)
                atoms.info[maintag][mode]['normal_mode'] = np.zeros((len(atoms), 3))

            for k in range(3 * len(atoms)):
                lineno = n + k + 2
                line = data[lineno].strip()
                flds = line.split()
                assert len(flds) > 2, \
                    f'NWChem parser failed to read normal mode from line {lineno+1}\n' \
                    f' --> {line} (1)'
                iat = int(k / 3)
                assert iat <= len(atoms), \
                    f'NWChem parser failed to read normal mode from line {lineno+1}\n' \
                    f' --> {line} (2)'
                alph = k % 3
                for m, fld in enumerate(flds[1:]):
                    mode = offset + m + 1
                    assert mode <= 3 * len(atoms), \
                        f'NWChem parser failed to read normal mode from line {lineno+1}\n' \
                        f' --> {line} (3)'
                    atoms.info[maintag][mode]['normal_mode'][iat, alph] = float(fld)

    # Compile Atoms object
    if energy is np.nan or len(atoms) == 0:
        raise ValueError(f'Failed to extract energy and configuration from input file {filename}')
    atoms.calc = SinglePointDFTCalculator(atoms, energy=energy, forces=forces)
    atoms.calc.name = 'nwchem'
    atoms.calc.parameters = parameters
    if len(eKS_energies) > 0 and len(eKS_occupations) > 0:
        atoms.calc.kpts = [SinglePointKPoint(1, 0, 0, eKS_energies, eKS_occupations)]

    return atoms