Source code for scine_puffin.jobs.rms_kinetic_modeling

# -*- coding: utf-8 -*-
from __future__ import annotations
__copyright__ = """ This code is licensed under the 3-clause BSD license.
Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group.
See LICENSE.txt for details.
"""

from multiprocessing import Pool
from typing import Optional, List, TYPE_CHECKING, Any, Dict

import numpy as np

from .templates.job import breakable, calculation_context, job_configuration_wrapper
from .templates.kinetic_modeling_jobs import KineticModelingJob
from scine_puffin.config import Configuration
from ..utilities.rms_kinetic_model import RMSKineticModel
from ..utilities.kinetic_modeling_sensitivity_analysis import RMSKineticModelingSensitivityAnalysis
from scine_puffin.utilities.imports import module_exists, MissingDependency

if module_exists("scine_database") or TYPE_CHECKING:
    import scine_database as db
else:
    db = MissingDependency("scine_database")


[docs]class RmsKineticModeling(KineticModelingJob): """ Micro-kinetic modeling with the puffin-interface to the reaction mechanism simulator (RMS). Note: Running jobs with RMS as a backend requires an installation of RMS (including its Python bindings). This is not supported through the Puffin bootstrapping. See programs/rms.py for more information. **Order Name** ``rms_kinetic_modeling`` **Required Input** model : db.Model The electronic structure model to flag the new properties with. **Required Settings** aggregate_ids : List[str] The aggregate IDs (as strings). reaction_ids : List[str] The reaction IDs (as strings). aggregate_types : List[int] The aggregate types. 0 for compounds, 1 for flasks. ea : List[float] The activation energies for each reaction as the free energy difference to the reaction LHS (in J/mol). enthalpies : List[float] The enthalpy of each aggregate (in J/mol). entropies : List[float] The entropy of each aggregate (in J/mol). arrhenius_prefactors : List[float] The exponential prefactors. arrhenius_temperature_exponents : List[float] The temperature exponents in the Arrhenius equation. start_concentrations : List[float The start concentrations of each aggregate. **Optional Settings** Optional settings are read from the ``settings`` field, which is part of any ``Calculation`` stored in a SCINE Database. The following options are available: solver : str ODE solver. Currently only "CVODE_BDF" is supported. phase_type : str The reactor phase. Options are ideal_gas (assumes P=const, T=const), ideal_dilute_solution (assumes V=const, T=const). Default is "ideal_gas". max_time : float Maximum integration time in seconds. Default 3600.0. energy_model_program : str The program with which the electronic structure model should be flagged. Default any. viscosity : float The solvent viscosity (in Pa s). Needs phase=ideal_dilute_solution and diffusion_limited=true. If "none", the viscosity is taken from tabulated values. reactor_solvent : str The reactor solvent. If "none", the solvent in the electronic structure model is used if any. site_density : float The density of surface sites. Default is "none". Requires phase=ideal_surface. Not fully supported yet. diffusion_limited : bool If true, diffusion limits are enforced. Requires phase=ideal_dilute_solution. May lead to numerical instability of the ODE solver. Default False. reactor_temperature : float The reactor temperature (in K). If "none", the temperature in the model is used. Default "none". reactor_pressure : float The reactor pressure (in Pa). If none, the pressure in the model is used. Default "none". absolute_tolerance : float The absolute tolerance of the ODE solver. High values lead to a faster but less reliable integration. Default 1e-20. relative_tolerance : float The relative tolerance of the ODE solver. High values lead to a faster but less reliable integration. Default 1e-6. solvent_aggregate_str_id : str The aggregate ID of the solvent as a string. If "none", the solvent is assumed to be unreactive. solvent_concentration : float The solvent concentration. Default is 55.3 (mol/L). enforce_mass_balance : bool If true, an error is raised for any non-balanced reaction. screen_sensitivities : bool If true, only parameters associated to aggregates and reactions with significant concentration flux are considered in the sensitivity analysis (flux > oaat_vertex_flux_threshold | flux > oaat_vertex_flux_threshold). **Required Packages** - SCINE: Database (present by default) - rms **Generated Data** If successful (technically and chemically) the following data will be generated and added to the database: Properties The maximum and final concentration, and the vertex flux of each aggregate is added to its centroid. The edge flux for each reaction is added to the centroid of the first aggregate on the reaction's LHS. Note, that the properties are NOT listed in the results to avoid large DB documents. """ def __init__(self) -> None: super().__init__() self.name: str = "RMS kinetic modeling job" self.settings: Dict[str, Any] = { "solver": "Recommended", "phase_type": "ideal_gas", "max_time": 3600.0, "energy_model_program": "any", "viscosity": "none", # in Pa s "reactor_solvent": "none", "site_density": "none", "diffusion_limited": False, "reactor_temperature": "none", # in K "reactor_pressure": "none", # in Pa, "absolute_tolerance": 1e-20, "relative_tolerance": 1e-6, "solvent_aggregate_str_id": "none", "solvent_concentration": 55.3, "sensitivity_analysis": "none", "adjoined_sensitivity_threshold": 1e-2, "absolute_tolerance_sensitivity": 1e-6, "relative_tolerance_sensitivity": 1e-3, "oaat_vertex_flux_threshold": 1e-2, "oaat_edge_flux_threshold": 1e-2, "sample_size": 10, "local_sensitivities": False, "adjoined_sensitivities": False, "save_oaat_var": False, "distribution_shape": "truncnorm", "enforce_mass_balance": True, "screen_global_sens_size": 1e+3 } self.model: db.Model = db.Model("PM6", "PM6", "") self._rms_file_name: str = "chem.rms" self._solvent_a_index: Optional[int] = None self._viscosity: Optional[float] = None self._solvent: Optional[str] = None self._site_density: Optional[float] = None self._pressure: Optional[float] = None self._temperature: Optional[float] = None self._phase_type: Optional[str] = None self._phase_options = ["ideal_dilute_solution", "ideal_gas"] self._sensitivity_options = ["none", "adjoined_sensitivities", "one_at_a_time_sensitivities", "morris", "sobol"] self._solvent_aggregate_str_id: Optional[str] = None self._solvent_species_added: bool = False self.reaction_ids: List[db.ID] = [] self.aggregate_id_list: List[db.ID] = [] self.aggregate_types: List[db.CompoundOrFlask] = [] self.max_time: float = float(self.settings["max_time"]) self.abs_tol: float = float(self.settings["absolute_tolerance"]) self.rel_tol: float = float(self.settings["relative_tolerance"]) self._sample_size: int = 50 self._n_cores: int = 1 self.force_parallel = False self.rms_kinetic_model: Optional[RMSKineticModel] = None
[docs] def use_n_cores(self, n_cores: int) -> int: assert isinstance(self.rms_kinetic_model, RMSKineticModel) if self.force_parallel: return n_cores if self.rms_kinetic_model.get_n_parameters() > 100: return n_cores if self.settings["sensitivity_analysis"] in ["morris", "sobol"]: return n_cores return 1
[docs] @staticmethod def required_programs() -> List[str]: return ["database", "rms"]
[docs] @job_configuration_wrapper def run(self, manager: db.Manager, calculation: db.Calculation, config: Configuration) -> bool: with breakable(calculation_context(self)): self._calculation = calculation self.settings.update(calculation.get_settings()) self._resolve_default_settings() rms_path = config["programs"]["rms"]["root"] self.rms_kinetic_model = RMSKineticModel(self.settings, manager, self.model, rms_path, self._rms_file_name) # Importing Julia has a significant overhead. If we parallelize we have to import Julia potentially more # than once (i.e., once in each sub process). Therefore, we should only do this if the kinetic model is # large enough that the parallelization actually accelerates the calculation. self._n_cores = self.use_n_cores(int(config["resources"]["cores"])) if self._n_cores > 1: # In the case we parallelize the kinetic modeling, we must ensure that Julia is only imported in the # worker processes and NEVER in the main process. with Pool(1) as pool: res = pool.map(self.rms_kinetic_model.calculate_fluxes_and_concentrations, [self._rms_file_name]) else: res = [self.rms_kinetic_model.calculate_fluxes_and_concentrations(self._rms_file_name)] c_max, c_final, c_flux, r_flux, adjoined_sens, _ = res[0] print("Maximum concentrations") print(c_max) print("Final concentrations") print(c_final) print("Absolute edge flux") print(r_flux) print("Absolute vertex flux") print(c_flux) results = calculation.get_results() if self.settings["sensitivity_analysis"] == "one_at_a_time_sensitivities"\ or self.settings['local_sensitivities']: vertex_t = float(self.settings["oaat_vertex_flux_threshold"]) edge_t = float(self.settings["oaat_edge_flux_threshold"]) flux_replace = 10.0 * vertex_t sens = RMSKineticModelingSensitivityAnalysis(self.rms_kinetic_model, self._n_cores, self._sample_size) max_sens, sens_c_final, flux_sens, var_max, var_final, var_flux = sens.one_at_a_time_differences( c_flux, r_flux, vertex_t, edge_t, flux_replace, c_max, c_final) self._write_sensitivities_to_database(flux_sens, "oaat_flux") self._write_sensitivities_to_database(max_sens, "oaat_max") self._write_sensitivities_to_database(sens_c_final, "oaat_final") if self.settings["save_oaat_var"]: self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, self.reaction_ids, [var_final, var_max, var_flux], [], ["var_final_c", "var_max_c", "var_flux_c"], [], results) if adjoined_sens is not None: print("Adjoined Sensitivities") print(adjoined_sens) self._write_sensitivities_to_database(adjoined_sens, "adjoined") salib = RMSKineticModelingSensitivityAnalysis(self.rms_kinetic_model, self._n_cores, self._sample_size, self.settings["distribution_shape"]) if self.settings["sensitivity_analysis"] == "morris": if self.settings["screen_global_sens_size"] < salib.get_n_parameters(): vertex_t = float(self.settings["oaat_vertex_flux_threshold"]) edge_t = float(self.settings["oaat_edge_flux_threshold"]) salib.set_prescreening_condition(c_flux, r_flux, vertex_t, edge_t) mu, mu_star, sigma, _ = salib.morris_sensitivities() m_v = salib.analyse_runs() self._write_sensitivities_to_database(mu['c_max'], "morris_mu_c_max") self._write_sensitivities_to_database(mu['c_final'], "morris_mu_c_final") self._write_sensitivities_to_database(mu_star['c_max'], "morris_mu_star_c_max") self._write_sensitivities_to_database(mu_star['c_final'], "morris_mu_star_c_final") self._write_sensitivities_to_database(sigma['c_max'], "morris_sigma_c_max") self._write_sensitivities_to_database(sigma['c_final'], "morris_sigma_c_final") self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, self.reaction_ids, [m_v[0][0], m_v[1][0], m_v[2][0], m_v[0][1], m_v[1][1], m_v[2][1]], [], ["morris_mean_c_max", "morris_mean_c_final", "morris_mean_c_flux", "morris_var_c_max", "morris_var_c_final", "morris_var_c_flux"], [], results) elif self.settings["sensitivity_analysis"] == "sobol": st, s1, _ = salib.sobol_sensitivities() m_v = salib.analyse_runs() self._write_sensitivities_to_database(st['c_max'], "sobol_st_c_max") self._write_sensitivities_to_database(st['c_final'], "sobol_st_c_final") self._write_sensitivities_to_database(s1['c_max'], "sobol_s1_c_max") self._write_sensitivities_to_database(s1['c_final'], "sobol_s1_c_final") self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, self.reaction_ids, [m_v[0][0], m_v[1][0], m_v[2][0], m_v[0][1], m_v[1][1], m_v[2][1]], [], ["sobol_mean_c_max", "sobol_mean_c_final", "sobol_mean_c_flux", "sobol_var_c_max", "sobol_var_c_final", "sobol_var_c_flux"], [], results) self._write_concentrations_to_centroids(self.aggregate_id_list, self.aggregate_types, self.reaction_ids, [c_max, c_final, c_flux], [r_flux], [self.c_max_label, self.c_final_label, self.c_flux_label], [self.r_flux_label], results) self._disable_all_aggregates() self.complete_job() return self.postprocess_calculation_context()
def _write_sensitivities_to_database(self, absolute_sensitivities: np.ndarray, prop_label: str): counter = 0 results = self._calculation.get_results() for a_id, a_type in zip(self.aggregate_id_list, self.aggregate_types): centroid = self._get_aggregate_centroid(a_id, a_type) label = "max_free_energy_sensitivity_" + prop_label self._write_concentration_property(centroid, label, absolute_sensitivities[counter], results) counter += 1 # The last species free energy sensitivity may be for a potentially added solvent. if absolute_sensitivities.shape[0] > len(self.reaction_ids) + len(self.aggregate_id_list): counter += 1 for r_id in self.reaction_ids: centroid = self._get_reaction_centroid(r_id) label = r_id.string() + "_reaction_barrier_sensitivity_" + prop_label self._write_concentration_property(centroid, label, absolute_sensitivities[counter], results) counter += 1 def _resolve_default_settings(self): self.model = self._calculation.get_model() self.model.program = self.settings["energy_model_program"] self._sample_size = self.settings["sample_size"] self.reaction_ids = [db.ID(r_id_str) for r_id_str in self.settings["reaction_ids"]] self.aggregate_id_list = [db.ID(c_id_str) for c_id_str in self.settings["aggregate_ids"]] self.aggregate_types = [db.CompoundOrFlask(a_type) for a_type in self.settings["aggregate_types"]]