Source code for scine_chemoton.reaction_rules.distance_rules

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
__copyright__ = """ This code is licensed under the 3-clause BSD license.
Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group.
See LICENSE.txt for details.
"""

# Standard library imports
from abc import abstractmethod
from typing import List, Optional, Tuple, Dict

# Third party imports
import scine_molassembler as masm

# Local application imports
from . import valid_element, RuleSet, BaseRule


[docs]class DistanceBaseRule(BaseRule): """ A rule that decides for each atom index if it is reactive or not according to the defined rule. The rules are all based on graph distances. """
[docs] @abstractmethod def filter_by_rule(self, molecules: List[masm.Molecule], idx_map: List[Tuple[int, int]], elements: List[str], atom_index: int) -> bool: """ Method to be implemented for each rule, giving back whether atom_index is reactive or not. Parameters ---------- molecules : List[masm.Molecule] A list of molassembler molecules representing the total system idx_map : List[Tuple[int, int]] An index map generated by molassembler that allows to map the atom_index to the atom in the molecules elements : List[str] A list of all elements atom_index : int The index we rule out or not Returns ------- reactive : bool """
def __and__(self, o): if not isinstance(o, DistanceBaseRule): raise TypeError(f"{self.__class__.__name__} expects DistanceBaseRule " f"(or derived class) to chain with.") return DistanceRuleAndArray([self, o]) def __or__(self, o): if not isinstance(o, DistanceBaseRule): raise TypeError(f"{self.__class__.__name__} expects DistanceBaseRule " f"(or derived class) to chain with.") return DistanceRuleOrArray([self, o])
[docs]class DistanceRuleSet(RuleSet): """ A dictionary holding elements as keys and a respective DistanceBasedRule for it. All keys are checked for valid element types. Lists as values are automatically transformed to a DistanceRuleAndArray. Booleans as values are automatically transformed to `AlwaysTrue` or `AlwaysFalse` """ def __init__(self, kwargs: Dict[str, DistanceBaseRule]) -> None: super().__init__(kwargs) for k, v in kwargs.items(): if not isinstance(k, str): raise TypeError(f"{self.__class__.__name__} expects strings as keys") if not valid_element(k): raise TypeError(f"{k} is not a valid element symbol") if not isinstance(v, DistanceBaseRule): if isinstance(v, bool): rule = AlwaysTrue() if v else AlwaysFalse() self.data[k] = rule elif isinstance(v, list): self.data[k] = DistanceRuleAndArray(v) else: raise TypeError(f"{self.__class__.__name__} expects distance based rules as values")
[docs]class DistanceRuleAndArray(DistanceBaseRule): """ An array of logically 'and' connected rules. """ def __init__(self, rules: Optional[List[DistanceBaseRule]] = None) -> None: """ Parameters ---------- rules : Optional[List[DistanceBaseRule]] A list of bond distance based rules that have all to be fulfilled. """ super().__init__() if rules is None: rules = [] self._rules = rules self._join_names(self._rules)
[docs] def filter_by_rule(self, *args, **kwargs) -> bool: return all(rule.filter_by_rule(*args, **kwargs) for rule in self._rules)
def __repr__(self) -> str: return f"{self.__class__.__name__}({repr(self._rules)})"
[docs]class DistanceRuleOrArray(DistanceBaseRule): """ An array of logically 'or' connected rules. """ def __init__(self, rules: Optional[List[DistanceBaseRule]] = None) -> None: """ Parameters ---------- rules : Optional[List[DistanceBaseRule]] A list of bond distance based rules of which at least one has to be fulfilled. """ super().__init__() if rules is None: rules = [] self._rules = rules self._join_names(self._rules)
[docs] def filter_by_rule(self, *args, **kwargs) -> bool: return any(rule.filter_by_rule(*args, **kwargs) for rule in self._rules)
def __repr__(self) -> str: return f"{self.__class__.__name__}({repr(self._rules)})"
[docs]class AlwaysTrue(DistanceBaseRule): """ Makes each given index reactive. """
[docs] def filter_by_rule(self, *args, **kwargs) -> bool: return True
[docs]class AlwaysFalse(DistanceBaseRule): """ Makes each given index unreactive. """
[docs] def filter_by_rule(self, *args, **kwargs) -> bool: return False
[docs]class SimpleDistanceRule(DistanceBaseRule): """ Allows to define a rule simply based on a distance to another specified element. SimpleDistanceRule('O', 1) defines each atom that is directly bonded to an oxygen as reactive. """ def __init__(self, element: str, distance: int) -> None: """ Parameters ---------- element : str Another element defining if the given index is reactive or not distance : int The distance to the other element that defines the rule """ super().__init__() if not valid_element(element): raise ValueError(f"{element} is not a valid element symbol") self._element_key = element self._distance = distance def __repr__(self) -> str: return f"{self.__class__.__name__}('{self._element_key}', {self._distance})"
[docs] def filter_by_rule(self, molecules: List[masm.Molecule], idx_map: List[Tuple[int, int]], elements: List[str], atom_index: int): mol_idx, i = idx_map[atom_index] distances = masm.distance(i, molecules[mol_idx].graph) for j, e in enumerate(elements): other_mol_idx, shifted_index = idx_map[j] if mol_idx != other_mol_idx: continue if e == self._element_key and distances[shifted_index] == self._distance: return True return False
[docs]class FunctionalGroupRule(DistanceBaseRule): """ The functional group is encoded in terms of a central atom type, a dictionary of bonding atoms to the central atom and how often they are bound to the central atom, and the minimum and maximum number of bonds to the central atom.\n An atom is then termed reactive, if the specified distance in this rule, is the distance between the atom and the central atom. E.g. a hydroxyl group can be specified with an O as a central atom with exactly 2 bonds, one to H and one to C. Based on the distance then different atoms in relation to any found hydroxyl group are termed reactive.\n 0 makes the O of all hydroxyl groups reactive, 1 makes the C and H of all hydroxyl groups reactive and 2 makes the bonding partners of the C and H of all hydroxyl groups reactive etc. hydroxyl_d0 = FunctionalGroupRule(0, 'O', (2, 2), {'H': 1, 'C': 1}, True) carbonyl_group_d2 = FunctionalGroupRule(2, 'C', (3, 3), {'O': 1}, True) imine_group_d0 = FunctionalGroupRule(0, 'C', (3, 3), {'N': 1}, True) acetal_group_d1 = FunctionalGroupRule(0, 'C', (4, 4), {'O': 2, 'H': 1, 'C': 1}, True) acetal_like_group_d1 = FunctionalGroupRule(0, 'C', (4, 4), {'O': 1, 'N': 1, 'C': 1, 'H': 1}, True) four_or_five_bonded_fe = FunctionalGroupRule(0, 'Fe', (4, 5)) acetonitrile_d1 = FunctionalGroupRule(1, 'C', (2, 2), {'N': 1, 'C': 1}, True) chaining definitions with `or` is also possible for general bonding pattern with different bonding atoms:: general_acetal_like_d1 = DistanceRuleOrArray([ FunctionalGroupRule(1, 'C', (4, 4), {'O': 2, 'H': 1, 'C': 1}, True), FunctionalGroupRule(1, 'C', (4, 4), {'O': 1, 'N': 1, 'H': 1, 'C': 1}, True), FunctionalGroupRule(1, 'C', (4, 4), {'N': 2, 'H': 1, 'C': 1}, True), FunctionalGroupRule(1, 'C', (4, 4), {'S': 1, 'N': 1, 'H': 1, 'C': 1}, True), FunctionalGroupRule(1, 'C', (4, 4), {'S': 2, 'H': 1, 'C': 1}, True), ]) """ def __init__(self, distance: int, central_atom: str, n_bonds: Tuple[int, int], specified_bond_partners: Optional[Dict[str, int]] = None, strict_counts: bool = False) -> None: """ Parameters ---------- distance : int The bond distance to the functional group that must be matched. central_atom : str The central atom element symbol. n_bonds : Tuple[int, int] The minimum and maximum number of bonds to the central atom. specified_bond_partners : Dict[str, int] An optional dictionary specifying the elements that further specify the reactive group by being directly bonded to the central atom together with how often they are bonded to the central atom. E.g. a carbonyl can be specified with the central atom O that has exactly 1 bond partner being C, while an ether group can be specified with the central atom O that has exactly 2 bond partner being C. In general, the choice of the central atom is arbitrary, i.e. both groups could also be specified with C being the central atom, see class description. strict_counts : bool A flag specifying whether the specified bond partner counts must hold strictly (True) or constitute a minimum number (False). Default: False. """ super().__init__() if not valid_element(central_atom): raise ValueError(f"{central_atom} is not a valid element symbol") if specified_bond_partners is None: specified_bond_partners = {} if any(not valid_element(e) for e in specified_bond_partners.keys()): raise ValueError(f"{specified_bond_partners} contains invalid element symbols") self._distance = distance self._central_atom = central_atom self._n_bonds_min = n_bonds[0] self._n_bonds_max = n_bonds[1] self._specified_bond_partners = specified_bond_partners self._strict_counts = strict_counts def __repr__(self) -> str: return f"{self.__class__.__name__}({self._distance}, '{self._central_atom}', " \ f"({self._n_bonds_min}, {self._n_bonds_max}), {self._specified_bond_partners}, {self._strict_counts})"
[docs] def filter_by_rule(self, molecules: List[masm.Molecule], idx_map: List[Tuple[int, int]], elements: List[str], atom_index: int) -> bool: mol_idx, i = idx_map[atom_index] # index i is the atom on which we apply the rule distances_i = masm.distance(i, molecules[mol_idx].graph) for atom_j, e_j in enumerate(elements): # loop over possible central atoms mol_jdx, j = idx_map[atom_j] if mol_jdx != mol_idx: # atom j is part of another molecule, there cannot be a valid graph distance continue if e_j == self._central_atom and distances_i[j] == self._distance: # distances_j are all distances from any atoms in the molecule to atom j, # which is the central atom of the functional group: distances_j = masm.distance(j, molecules[mol_jdx].graph) n_bonds_found = 0 bond_partners_found = {elem: 0 for elem in self._specified_bond_partners} for atom_k, e_k in enumerate(elements): # loop over possible atoms bound to the central atom mol_kdx, k = idx_map[atom_k] if mol_kdx != mol_jdx: # atom k is part of another molecule, there cannot be a valid graph distance continue if distances_j[k] == 1: n_bonds_found += 1 if e_k in self._specified_bond_partners: bond_partners_found[e_k] += 1 if n_bonds_found > self._n_bonds_max: break bond_partners_fulfilled = all(defined_count == bond_partners_found[e] if self._strict_counts else defined_count <= bond_partners_found[e] for e, defined_count in self._specified_bond_partners.items()) n_bonds_fulfilled = (self._n_bonds_min <= n_bonds_found) and (n_bonds_found <= self._n_bonds_max) if bond_partners_fulfilled and n_bonds_fulfilled: return True return False