Source code for scine_puffin.jobs.orca_geometry_optimization

# -*- coding: utf-8 -*-
from __future__ import annotations
__copyright__ = """ This code is licensed under the 3-clause BSD license.
Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group.
See LICENSE.txt for details.
"""

import os
import re
import subprocess
import glob
from typing import Any, List, Tuple, TYPE_CHECKING
from scine_puffin.config import Configuration
from .templates.job import Job, calculation_context, job_configuration_wrapper
from scine_puffin.utilities.imports import module_exists, MissingDependency

if module_exists("scine_database") or TYPE_CHECKING:
    import scine_database as db
else:
    db = MissingDependency("scine_database")


[docs]class OrcaGeometryOptimization(Job): """ A job optimizing the geometry of a given structure with the Orca program, in search of a local minimum on the potential energy surface. Optimizing a given structure's geometry, generating a new minimum energy structure, if successful. **Order Name** ``orca_geometry_optimization`` **Optional Settings** Optional settings are read from the ``settings`` field, which is part of any ``Calculation`` stored in a SCINE Database. Possible settings for this job are: convergence_max_iterations : int The maximum number of geometry optimization cycles. cartesian_constraints : List[int] A list of atom indices of the atoms which positions will be constrained during the optimization. max_scf_iterations : int The number of allowed SCF cycles until convergence. self_consistence_criterion : float The self consistence criterion corresponding to the maximum energy change between two SCF cycles resulting in convergence. scf_damping : bool Switches damping on or off during the SCF by employing the Orca keyword SlowConv. The default is False. calculate_hirshfeld_charges : bool Calculates atomic partial charges based on the Hirshfeld population analysis for the optimized structure and stores these into the database. The default is False. transform_coordinates : bool Switch to transform the input coordinates from Cartesian to redundant internal coordinates. Setting this value to False and hence performing the calculation in Cartesian coordinates is helpful in rare occasions where the calculation with redundant internal coordinates fails. The optimization will take more time but is more likely to end successfully. The default is True. **Required Packages** - SCINE: Database (present by default) - SCINE: Utils (present by default) - The Orca program has to be available **Generated Data** If successful the following data will be generated and added to the database: Structures A new minimum energy structure. Properties The ``electronic_energy`` associated with the new structure. """ def __init__(self) -> None: super().__init__() self.input_file = "orca_calc.inp" self.output_file = "orca_calc.out" self.input_structure = "system.xyz" self.output_structure = "orca_calc.xyz"
[docs] def write_input_file(self, structure, model, job, id: str, settings: dict): import scine_utilities.settings_names as sn with open(self.input_file, "w") as input: # Method input.write("! {} {} Opt\n".format(model.method, model.basis_set)) # Add damping for SCF if requested if sn.scf_damping in settings: if settings[sn.scf_damping]: input.write("! SlowConv\n") # Perform optimization in Cartesian coordinates if "geoopt_coordinate_system" in settings: if settings["geoopt_coordinate_system"].lower() == "cartesian": input.write("! COPT\n") # Title with puffin id input.write("# Orca calculation created by {}\n\n".format(id)) # Memory for one core input.write("%maxcore {}\n".format(int(job.memory * 1024 / job.cores))) # Number of cores if job.cores > 1: input.write("%pal\nnprocs {}\nend\n".format(job.cores)) # Hirshfeld charges if "calculate_hirshfeld_charges" in settings: if settings["calculate_hirshfeld_charges"]: input.write("%output\nPrint[P_Hirshfeld] 1\nend\n") # Settings for geom max_iterations_set = "convergence_max_iterations" in settings constraints_set = "cartesian_constraints" in settings if max_iterations_set or constraints_set: input.write("%geom\n") if max_iterations_set: input.write("MaxIter {}\n".format(int(settings["convergence_max_iterations"]))) if constraints_set: # The Cartesian constraints settings is given as a string containing the # indices of the atoms to be constrained separated by whitespaces constraint_atoms = settings["cartesian_constraints"] if len(constraint_atoms) > 0: input.write("Constraints\n") for a in constraint_atoms: input.write("{{C {} C}}\n".format(a)) input.write("end\n") input.write("end\n\n") # Settings for SCF self_consistence_criterion_set = sn.self_consistence_criterion in settings max_scf_iterations_set = sn.max_scf_iterations in settings if self_consistence_criterion_set or max_scf_iterations_set: input.write("%SCF\n") if self_consistence_criterion_set: input.write("TolE {}\n".format(settings[sn.self_consistence_criterion])) if max_scf_iterations_set: input.write("MaxIter {}\n".format(int(settings[sn.max_scf_iterations]))) input.write("end\n\n") # Structure file, charge and multiplicity input.write( "*xyzfile {} {} {}\n".format( structure.get_charge(), structure.get_multiplicity(), self.input_structure, ) )
[docs] def parse_output_file(self) -> Tuple[Any, float, list]: import scine_utilities as utils successful = False atomic_charges: List[float] = [] optimized_energy = None with open(self.output_file, "r") as out: lines = out.readlines() for line_index, line in enumerate(lines): # Check for convergence if ("OPTIMIZATION" in line) and ("CONVERGED" in line): successful = True # Parse final energy elif "FINAL SINGLE POINT ENERGY" in line: optimized_energy = re.findall(r"[-+]?\d*\.?\d+|[-+]?\d+", line)[0] # Parse Hirshfeld charges elif "HIRSHFELD ANALYSIS" in line: # Clear atomic charges list atomic_charges = [] current_line_index = line_index # Go forward until charges start while "CHARGE" not in lines[current_line_index]: current_line_index += 1 current_line_index += 1 # Add the charges to the list until empty line while lines[current_line_index].strip(): charge = lines[current_line_index].split()[2] atomic_charges.append(float(charge)) current_line_index += 1 if successful: if optimized_energy is None: raise RuntimeError("Optimization converged but no energy found in output file.") optimized_structure, _ = utils.io.read(self.output_structure) return optimized_structure, float(optimized_energy), atomic_charges else: raise RuntimeError
[docs] def execute_program(self): env = os.environ.copy() exe = env["ORCA_BINARY_PATH"] with open(self.output_file, 'w') as f: subprocess.run( # ["{}".format(exe), "{}".format(self.input_file)], "{}".format(exe) + " " + "{}".format(self.input_file), env=env, stdout=f, shell=True )
[docs] @job_configuration_wrapper def run(self, manager: db.Manager, calculation: db.Calculation, config: Configuration) -> bool: import scine_utilities as utils # Gather all required collections structures = manager.get_collection("structures") calculations = manager.get_collection("calculations") properties = manager.get_collection("properties") # Link calculation if needed if not calculation.has_link(): calculation.link(calculations) # Get structure and number of atoms structure = db.Structure(calculation.get_structures()[0]) structure.link(structures) natoms = len(structure.get_atoms()) # Get model model = calculation.get_model() # Get job job = calculation.get_job() # New label # TODO: These labels are not necessarily correct; during the optimization, a # complex coul be created label = structure.get_label() if label == db.Label.MINIMUM_GUESS or label == db.Label.MINIMUM_OPTIMIZED: new_label = db.Label.MINIMUM_OPTIMIZED elif label == db.Label.USER_GUESS or label == db.Label.USER_OPTIMIZED: new_label = db.Label.USER_OPTIMIZED elif label == db.Label.SURFACE_GUESS or label == db.Label.SURFACE_OPTIMIZED: new_label = db.Label.SURFACE_OPTIMIZED else: with open(os.path.join(self.work_dir, "errors"), "a") as f: error = "Unknown label of input structure: '" + str(label) + "'\n" f.write(error) calculation.set_comment(error) calculation.set_status(db.Status.FAILED) return False # Do calculation with calculation_context(self): # Write xyz file utils.io.write(self.input_structure, structure.get_atoms()) # Write input file self.write_input_file( structure, model, job, config["daemon"]["uuid"], dict(calculation.get_settings()), ) # Execute ORCA self.execute_program() # Parse output file ( optimized_structure, parsed_energy, atomic_charges, ) = self.parse_output_file() # After the calculation, verify that the connection to the database still exists self.verify_connection() # Capture raw output stdout_path = os.path.join(self.work_dir, "output") stderr_path = os.path.join(self.work_dir, "errors") with open(stdout_path, "r") as stdout, open(stderr_path, "r") as stderr: error = stderr.read() calculation.set_raw_output(stdout.read() + "\n" + error) # Get the error message to be eventually added to the calculation comment if len(error.strip()) > 0: error_msg = error.replace("\n", " ") else: error_msg = ( "Geometry Optimization Job Error: Orca Geometry Optimization job failed with an unspecified error." ) if not os.path.exists(os.path.join(self.work_dir, "success")): calculation.set_comment(error_msg) calculation.set_status(db.Status.FAILED) # Clean up .tmp files tmp_files = glob.glob(os.path.join(self.work_dir, "*.tmp")) for f in tmp_files: # type: ignore os.remove(str(f)) return False # A sanity check for the optimized structure if len(optimized_structure) != natoms: calculation.set_comment( "Geometry Optimization Job Error: Optimized structure has incorrect number of atoms" ) calculation.set_status(db.Status.FAILED) return False # A sanity check for the atomic charges if "calculate_hirshfeld_charges" in calculation.get_settings(): if calculation.get_settings()["calculate_hirshfeld_charges"]: if len(atomic_charges) != natoms: calculation.set_comment("Geometry Optimization Job Error: Incorrect number of atomic charges.") calculation.set_status(db.Status.FAILED) return False # Update model model.program = "orca" model.version = config.programs()[model.program]["version"] calculation.set_model(model) # Generate database results db_results = calculation.get_results() db_results.clear() # New structure new_structure = db.Structure() new_structure.link(structures) new_structure.create( optimized_structure, structure.get_charge(), structure.get_multiplicity(), model, new_label, ) db_results.add_structure(new_structure.id()) calculation.set_results(db_results) # Store energy self.store_property( properties, "electronic_energy", "NumberProperty", parsed_energy, model, calculation, new_structure, ) # Store atomic charges if available if len(atomic_charges) > 0: self.store_property( properties, "hirshfeld_charges", "VectorProperty", atomic_charges, model, calculation, new_structure, ) calculation.set_status(db.Status.COMPLETE) return True
[docs] @staticmethod def required_programs() -> List[str]: return ["database", "utils", "orca"]