Source code for scine_autocas.interfaces.molcas

"""Provide interface to molcas.

Currently, no Python-Bindings exist to the Molcas electronic structure
programe, hence this whole folder exists. Only here implemented
functionalities are supported by this interface and "pymolcas" is called
via subprocess.
"""
# -*- coding: utf-8 -*-
__copyright__ = """This code is licensed under the 3-clause BSD license.
Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group.
See LICENSE.txt for details.
"""

import os
import re
import subprocess
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np

from scine_autocas.autocas_utils.molecule import Molecule
from scine_autocas.interfaces import Interface
from scine_autocas.interfaces.molcas.dumper import Dumper
from scine_autocas.interfaces.molcas.environment import Environment
from scine_autocas.interfaces.molcas.input_handler import InputHandler
from scine_autocas.interfaces.molcas.molcas_hdf5_utils import MolcasHdf5Utils
from scine_autocas.interfaces.qcmaquis import Qcmaquis


[docs]class Molcas(Interface): """Interface to the electronic structure program Molcas. Since there exists no Python interface right now, 'pymolcas' is called via subprocess, input files have to be written and hdf5 files have to be parsed and modified. Attributes ---------- settings : Molcas.Settings provides all settings for molcas environment : Environment handles molcas environment variables qc_maquis : Qcmaquis handles qcmaquis related files and build entropies hdf5_utils : MolcasHdf5Utils handles molcas hdf5 files orbital_file : str the path of the orbital file project_name : str the molcas project name dump : bool flag to enable dumping dumper : Dumper handles dumping of molcas calculations input_handler : InputHandler handles molcas input files s1_list : List[np.ndarray] stores all s1_entropies from large cas calculations s2_list : List[np.ndarra] stores all s2_entropies from large cas calculations mutual_information_list : List[np.ndarray] stores all mutual information from large cas calculations caspt2_energies : List[float] stores all caspt2 energies Notes ----- The settings object is already in the __slots__ of the base interface. See Also -------- settings : Interface.Settings """
[docs] class Settings(Interface.Settings): """Controll the input parameters and writes input file for Molcas. Attributes ---------- point_group : str, default = "C1" point group of the molecule in molcas notation cholesky : bool, default = True enable cholesky decomposition uhf : bool, default = False enable unrestricted hf active_electrons : int number of active electrons active_orbitals : int number of active orbitals ci_root_string : str root string for excited states alpha_or_beta_string : int, default = 1 UNUSED fiedler : bool, default = False enable fiedler ordering for dmrg ipea : float, default = 0.00 IPEA shift for CASPT2 initial_orbitals : bool, default = True flag to turn off hf calculations for dmrg and final calculations only_hf : bool, default = False stop molcas from doing cas calculations orbital_localisation : bool, default = False flag for orbital localisation localisation_space : str, default = "OCCUpied" the orbital localisation space localisation_method : str, default = "PIPEk-Mezet" the localisation method n_excited_states : int, default = 0 number of excited states ci_size : List[int] size of the davidson weights : List[float] weight for each state roots : List[int] molcas nroots Notes ----- For most attributes defined here, please see also the MOLCAS manual. """ __slots__ = ( "point_group", "cholesky", "uhf", "active_electrons", "active_orbitals", "ci_root_string", "alpha_or_beta_string", "fiedler", "ipea", "initial_orbitals", "only_hf", "orbital_localisation", "localisation_space", "localisation_method", "ci_size", "weights", "roots", "skip_scf" )
[docs] def __init__(self, molecules: List[Molecule], settings_dict: Optional[Dict] = None): """Construct class.""" super().__init__(molecules=molecules, settings_dict=settings_dict) self.point_group: str = "C1" """ point group of the molecule in Molcas notation, e.g. "": C1 "XY": C2 "X": Cs "XYZ": Ci "XY Y": C2v "XY XYZ": C2h "XY XZ": D2 "XY XZ XYZ": C2h """ self.cholesky: bool = True """toggle Cholesky decomposition""" self.uhf: bool = False """enable unrestricted calculation, for an uneven number of electrons it is automatically enabled""" self.active_electrons: int = 0 """number of active electrons""" self.active_orbitals: int = 0 """number of active orbitals, only used if no orbital file with typeindices is provided""" self.ci_root_string: str = "" """string of the reference determinant""" self.alpha_or_beta_string: int = 1 """string of determinants, NOT used""" self.fiedler: bool = False """enable fiedler ordering""" self.ipea: float = 0.0 """IPEA shift for CASPT2, only used if CASPT2 is in method""" self.initial_orbitals: bool = True """flag to turn of subsequent HF calculations""" self.ci_size: Optional[int] = None """size of the davidson""" self.roots: Optional[List[int]] = None """molcas nroots""" self.weights: Optional[List[int]] = None """weight for each state""" self.only_hf: bool = False """stop molcas from doing cas calculations""" self.orbital_localisation: bool = False """enable orbital localisation""" self.localisation_space: str = "OCCUpied" """localisation space, e.g. OCCUpied, VIRTual """ self.localisation_method: str = "PIPEk-Mezey" """localisation method, e.g. PIPEk-Mezey, BOYS, EDMIston-Ruedenberg, CHOLesky, PAO """ self.skip_scf = False """If true, only one SCF iteration is performed.""" if settings_dict: self.apply_settings(settings_dict)
# Molcas __slots__ = ( "environment", "qc_maquis", "hdf5_utils", "orbital_file", "project_name", "dump", "dumper", "input_handler", "s1_list", "s2_list", "mutual_information_list" "caspt2_energies", )
[docs] def __init__(self, molecules: List[Molecule], settings_dict: Optional[Dict[str, Any]] = None): """Construct a molcas interface. Parameters ---------- molecule : Molecule contains molecular information settings_dict : Dict[str, Any], optional holds all settings provided by a yaml input See Also -------- settings_dict : InputHandler """ self.settings: Molcas.Settings """provides all settings for molcas""" # initialize base interface super().__init__(molecules=molecules, settings_dict=settings_dict) self.environment: Environment = Environment() """coltrols the enviroment for Molcas""" self.qc_maquis: Qcmaquis = Qcmaquis() """builds s1, s2 and Ipq from QCMaquis HDF5 files""" self.hdf5_utils: MolcasHdf5Utils = MolcasHdf5Utils() """handles Molcas HDF5 files""" self.orbital_file: str = "" """name of the orbital file, which has to be modified to enable Molcas "Typeindex" """ self.project_name: str = "autocas_project" """name of the project""" self.dump: bool = True """allows to dump all calculations in a separate directory""" self.dumper: Dumper = Dumper() """controls the dumping of calculations by creating a directory structure""" self.input_handler: InputHandler = InputHandler() """handles molcas input files""" self.s1_list: List[np.ndarray] """stores all s1_entropies from large cas calculations""" self.s2_list: List[np.ndarray] """stores all s2_entropies from large cas calculations""" self.mutual_information_list: List[np.ndarray] """stores all mutual information from large cas calculations""" self.caspt2_energies: Optional[List[float]] = None """stores all caspt2 energies""" # make ci string if self.settings.n_excited_states > 0: self.make_ci_root( self.settings.n_excited_states, self.settings.ci_size, self.settings.roots, self.settings.weights, ) # apply settings to subclasses if settings_dict is not None: for key in settings_dict: if key == "environment": self.environment = Environment(settings_dict[key]) elif key == "qc_maquis": pass # self.qc_maquis = Qcmaquis(settings_dict[key]) elif key == "hdf5_utils": pass # self.hdf5_utils = MolcasHdf5Utils(settings_dict[key]) elif key == "dumper": pass # self.dumper = Dumper(settings_dict[key]) elif key == "input_handler": pass # self.input_handler = InputHandler(settings_dict[key]) elif key == "settings": pass elif hasattr(self, key): setattr(self, key, settings_dict[key])
[docs] def make_ci_root( self, n_roots: Optional[int] = None, ci_size: Optional[int] = None, roots: Optional[List[int]] = None, weights: Optional[List[int]] = None, ): """Make the ci root string for excited state calculations. Parameters ---------- n_roots : int number of roots ci_size : List[int] size of ci matrix in davidson roots : List[int] roots to use for calculation weights : List[float] weight of each root Returns ------- root_string : str The full ci_root string, required for excited state calculations """ if n_roots is None: n_roots = self.settings.n_excited_states root_string = str(n_roots) + " " if roots is not None and ci_size is not None: assert max(roots) >= ci_size root_string += str(ci_size) + "; " for root in roots: root_string += str(root) + " " root_string += "; " if weights is not None: assert len(weights) == len(roots) for weight in weights: root_string += str(weight) + " " else: for _ in range(len(roots)): root_string += str(1) + " " elif ci_size is not None: root_string += str(ci_size) + " " + str(1) else: root_string += str(n_roots) + " " + str(1) self.settings.ci_root_string = root_string return root_string
[docs] def check_calculate_input(self, cas_occupation: Optional[List[int]] = None, cas_indices: Optional[List[int]] = None) -> Tuple[List[int], List[int]]: """Check the input for the calculate function. Parameters ---------- cas_occupation: : List[int], optional a list which contains the occupation of each orbital in the active space cas_indices: : List[int], optional a list which contains the indice of each orbital in the active space Returns ------- cas_occupation: : List[int] a list which contains the occupation of each orbital in the active space cas_indices: : List[int] a list which contains the indice of each orbital in the active space Raises ------ ValueError if different number of occupations and indices """ if cas_occupation is None or cas_indices is None: print("Calculating only mean field.") self.settings.only_hf = True return [], [] if len(cas_occupation) is not len(cas_indices): raise ValueError( "Not the same number of orbital indices and orbital occupations" ) return cas_occupation, cas_indices
[docs] def set_cas_variables(self, cas_occupation: List[int]): """Set spin_multiplicity, active_electrons, active_orbitals. Parameters ---------- cas_occupation : List[int] a list which contains the occupation of each orbital in the active space """ # check multiplicity spin_mult = 1 for i in cas_occupation: if i == 1: spin_mult += 1 self.settings.active_orbitals = len(cas_occupation) self.settings.active_electrons = int(sum(cas_occupation)) self.settings.spin_multiplicity = spin_mult
[docs] def run_molcas(self): """Run molcas, from current state. The function calls the molcas binary through a subprocess """ # write input input_file = self.settings.work_dir + "/" + self.project_name + ".input" self.input_handler.write_input(self.settings, input_file, self.orbital_file) # setup environment self.environment.project_name = self.project_name environment = self.environment.make_environment(self.settings.work_dir) # save current location current_dir = os.getcwd() os.chdir(self.settings.work_dir) pymolcas_string = self.environment.molcas_binary + " " + self.environment.molcas_flags print("Starting MOLCAS:") print(self.environment.molcas_binary, self.environment.molcas_flags) print("MOLCAS environment:") print(self.environment) calculation_process = subprocess.Popen( [ f"{pymolcas_string} {input_file}" ], env=environment, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) calculation_process.communicate() # go back to current location os.chdir(current_dir)
[docs] def reorder_indices_from_symmetry(self, cas_indices: List[int]) -> List[int]: """Reorder cas indices based on mo energies. Molcas changes the index order for calculations exploiting point group symmetry. Hence, these indices have to be reordered to fit entropies and other variables. Parameters ---------- cas_indices : List[int], optional a list which contains the indice of each orbital in the active space Returns ------- cas_indices_sym : List[int], optional a list which contains the indice of each orbital in the active space Raises ------ ValueError if no MO energies can be read by the MolcasHdf5Utils instance """ # check for mo energies if self.hdf5_utils.mo_energies: pass elif self.orbital_file != "": self.hdf5_utils.read_hdf5(self.orbital_file) else: raise ValueError("Please provide a scf.h5 file") # sort orbitals for correct indices tmp_mo_energies = np.array(self.hdf5_utils.mo_energies) sort_key = tmp_mo_energies.argsort() sort_key = sort_key.astype(int, copy=False) mo_energies_sorted = np.array(self.hdf5_utils.mo_energies)[sort_key].tolist() # get correct orbital indices cas_indices_sym = [] for i in cas_indices: cas_indices_sym.append( self.hdf5_utils.mo_energies.index(mo_energies_sorted[i]) ) # for i in cas_indices: # cas_mos.append(mo_energies_sorted.index(self.hdf5_utils.mo_energies[i])) # cas_mos = np.array(cas_mos) return cas_indices_sym
[docs] def run_initial_orbitals(self): """Generate initial orbitals.""" # check for dumping if self.dump: self.dumper.setup_sub_dir() self.settings.work_dir = self.dumper.current_dir # start calculation print("Starting initial orbital calculation.") self.run_molcas() print("Initial orbitals finished.") # set class variables self.orbital_file = (self.settings.work_dir + "/" + self.project_name + ".scf.h5") self.dumper.orbital_file = self.orbital_file # get typeindice (and other variables) self.hdf5_utils.read_hdf5(self.orbital_file) self.settings.initial_orbitals = False
[docs] def reorder_measurements_for_symmetry(self, cas_indices: List[int]): """Reorder dmrg measurements with respect to cas indices. If point group symmetry is used, the measurements usually are in an different order than the indices. Hence, this function is used to reorder them. Parameters ---------- cas_indices : List[int], optional a list which contains the indice of each orbital in the active space """ sort_key = (np.array(cas_indices).argsort()).argsort() self.qc_maquis.s1_entropy = self.qc_maquis.s1_entropy[sort_key] self.qc_maquis.s2_entropy = self.qc_maquis.s2_entropy[:, sort_key][sort_key, :] self.qc_maquis.mutual_information = self.qc_maquis.mutual_information[sort_key][:, sort_key]
[docs] def calculate(self, cas_occupation: Optional[List[int]] = None, cas_indices: Optional[List[int]] = None ) -> Tuple[Union[float, np.ndarray], Union[np.ndarray, List[np.ndarray]], Union[np.ndarray, List[np.ndarray]], Union[np.ndarray, List[np.ndarray]]]: """Calculate a system with the correpsonding setting. If symmetry is enabled the cas_occupation and cas_indices are still expected to be ordered symmetry independent. Also the s1, s2 and mutual information matrices are reordered to be symmetry indipenedent. Parameters ---------- cas_occupation : List[int], optional a list which contains the occupation of each orbital in the active space cas_indices : List[int], optional a list which contains the indice of each orbital in the active space Returns ------- energy : Union[float, np.ndarray] energy, or enery for each state s1 : Union[np.ndarray, List[np.ndarray]] single orbital entropy, or s1 for each state s2 : Union[np.ndarray, List[np.ndarray]] two orbital entropy, or s2 for each state mutual_information : Union[np.ndarray, List[np.ndarray]] mutual information, or mutual information for each state """ # check input cas_occupation, cas_indices = self.check_calculate_input(cas_occupation, cas_indices) # setup dumping if self.dump: self.dumper.project_dir = self.settings.work_dir self.dumper.project_name = self.project_name self.dumper.create_project_dir() # set internal variables if not self.settings.only_hf: self.set_cas_variables(cas_occupation) # check for initial orbitals if self.settings.initial_orbitals: self.run_initial_orbitals() # make everything safe for symmetry if self.settings.point_group != "C1": cas_indices = self.reorder_indices_from_symmetry(cas_indices) # stop after hartree fock if self.settings.only_hf: self.settings.work_dir = self.dumper.project_dir self.settings.only_hf = False return (0.0, np.array([]), np.array([]), np.array([])) # prepare dmrg calculations if self.dump: # ensure project dir exists and set corrects path self.dumper.create_project_dir() self.dumper.setup_sub_dir() self.settings.work_dir = self.dumper.current_dir # modify orbital file in dmrg or final self.orbital_file = ( self.settings.work_dir + "/" + self.project_name + ".scf.h5_sel" ) self.hdf5_utils.modify_hdf5(self.orbital_file, cas_indices) # start calculation print("Starting calculation.") self.run_molcas() print("Calculation finished.") # make measurements try: self.make_dmrg_measurements() # sort entropies if self.settings.point_group != "C1": # self.reorder_indices_from_symmetry(cas_indices) self.reorder_measurements_for_symmetry(cas_indices) self.settings.work_dir = self.dumper.project_dir energy = self.hdf5_utils.energy if self.caspt2_energies is not None: energy = np.array(self.caspt2_energies) if self.settings.n_excited_states > 0: return (energy, self.s1_list, self.s2_list, self.mutual_information_list) return ( energy, self.qc_maquis.s1_entropy, self.qc_maquis.s2_entropy, self.qc_maquis.mutual_information ) except FileNotFoundError as exc: raise FileNotFoundError("Calculation failed") from exc
[docs] def make_dmrg_measurements(self): """Build all entropies from QCMaquis HDF5 file.""" if self.settings.n_excited_states > 0: s1_list = [] s2_list = [] mutual_information_list = [] if os.path.isfile(self.settings.work_dir + "/" + self.project_name + ".results_state.0.h5"): for i in range(self.settings.n_excited_states): self.qc_maquis.read_hdf5( self.settings.work_dir + "/" + self.project_name + ".results_state." + str(i) + ".h5" ) self.qc_maquis.make_diagnostics() s1_list.append(self.qc_maquis.s1_entropy) s2_list.append(self.qc_maquis.s2_entropy) mutual_information_list.append(self.qc_maquis.mutual_information) self.s1_list = s1_list self.s2_list = s2_list self.mutual_information_list = mutual_information_list else: # no dmrg pass else: if os.path.isfile(self.settings.work_dir + "/" + self.project_name + ".results_state.0.h5"): self.qc_maquis.read_hdf5(self.settings.work_dir + "/" + self.project_name + ".results_state.0.h5") self.qc_maquis.make_diagnostics() else: # no dmrg pass if os.path.isfile(self.settings.work_dir + "/" + self.project_name + ".dmrgscf.h5"): self.hdf5_utils.get_energy(self.settings.work_dir + "/" + self.project_name + ".dmrgscf.h5") elif os.path.isfile(self.settings.work_dir + "/" + self.project_name + ".rasscf.h5"): self.hdf5_utils.get_energy(self.settings.work_dir + "/" + self.project_name + ".rasscf.h5") else: pass self.get_caspt2_from_log(self.settings.work_dir + "/" + self.project_name + ".log")
[docs] def _convert_type_indices(self, type_indices): indices = [] for i, index in enumerate(type_indices): if ( index.decode("UTF-8") == "2" or index.decode("UTF-8") == "1" ): indices.append(i) return indices
[docs] def get_caspt2_from_log(self, file_name: str): """Get energy from molcas log. Currently used for CASPT2. """ energies = [] with open(file_name, "r", encoding="UTF-8") as molcas_log: caspt2_flag = False for line in molcas_log: if caspt2_flag is True: if re.search("Total energy:", line): energies.append(float(line.split()[-1])) if re.search("Total CASPT2 energies:", line): caspt2_flag = True if caspt2_flag is True: self.caspt2_energies = energies
[docs] def analyze( self, qcmaquis_result_file: Optional[str] = None, molcas_hdf5_file: Optional[str] = None ) -> Union[Tuple[np.ndarray, np.ndarray, np.ndarray, List[int], List[int]], Tuple[List[int], List[int]], Tuple[np.ndarray, np.ndarray, np.ndarray]]: """Analyze output files.""" orbital_indices: Optional[List[int]] = None if molcas_hdf5_file: self.hdf5_utils.read_hdf5(molcas_hdf5_file) orbital_indices = self._convert_type_indices(self.hdf5_utils.type_indices) if qcmaquis_result_file: self.qc_maquis.read_hdf5(qcmaquis_result_file) self.qc_maquis.make_diagnostics() if molcas_hdf5_file is not None and qcmaquis_result_file is not None and orbital_indices is not None: return ( self.qc_maquis.s1_entropy, self.qc_maquis.s2_entropy, self.qc_maquis.mutual_information, orbital_indices, self.hdf5_utils.occupations, ) if molcas_hdf5_file is not None and qcmaquis_result_file is None and orbital_indices is not None: return orbital_indices, self.hdf5_utils.occupations if molcas_hdf5_file is None and qcmaquis_result_file: return ( self.qc_maquis.s1_entropy, self.qc_maquis.s2_entropy, self.qc_maquis.mutual_information, ) raise ValueError("analyze function requires at least one argument!")
[docs] def resource_estimate(self): """Estimate if DMRG is feasible.""" raise NotImplementedError("Resource estimation is not implemented yet")