Molassembler  3.0.0
Molecule graph and conformer library
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
SmilesMoleculeBuilder.h
Go to the documentation of this file.
1 
8 #ifndef INCLUDE_MOLASSEMBLER_IO_SMILES_MOLECULE_BUILDER_H
9 #define INCLUDE_MOLASSEMBLER_IO_SMILES_MOLECULE_BUILDER_H
10 
14 #include "boost/variant.hpp"
15 #include "boost/bimap.hpp"
16 #include "boost/graph/subgraph.hpp"
17 #include <stack>
18 
19 namespace Scine {
20 
21 namespace Utils {
22 enum class ElementType : unsigned;
23 } // namespace Utils
24 
25 
26 namespace Molassembler {
27 
28 class Molecule;
29 
30 namespace IO {
31 
42 struct PiSubgraph {
45  using BaseGraph = boost::adjacency_list<
46  boost::vecS,
47  boost::vecS,
48  boost::undirectedS,
49  boost::no_property,
50  boost::property<boost::edge_index_t, int>
51  >;
52  using Graph = boost::subgraph<BaseGraph>;
53  using Vertex = typename BaseGraph::vertex_descriptor;
54  using Edge = typename BaseGraph::edge_descriptor;
55  using IndexMap = boost::bimap<Vertex, Vertex>;
56  using VertexSet = std::unordered_set<Vertex>;
58 
60  static bool hasUnpairedElectrons(Vertex i, int charge, const PrivateGraph& g);
61 
67  static bool permittedElementType(Utils::ElementType e) {
68  switch(e) {
69  case Utils::ElementType::C:
70  case Utils::ElementType::N:
71  case Utils::ElementType::O:
72  case Utils::ElementType::S:
73  case Utils::ElementType::P:
74  case Utils::ElementType::As:
75  case Utils::ElementType::Sb:
76  case Utils::ElementType::Se:
77  case Utils::ElementType::Te: {
78  return true;
79  }
80  default:
81  return false;
82  }
83  }
84 
88  bool eligible;
90  bool omissible;
91  };
92 
108  static EligibleOmissible eligibleOmissible(
109  Vertex i,
110  const PrivateGraph& component,
111  const AtomData& atomData
112  );
113 
114  static boost::optional<EligibleOmissible> multipleOrderAdjacent(
115  Vertex i,
116  const PrivateGraph& component,
117  const AtomData& atomData,
118  const boost::optional<unsigned>& neighborCount
119  );
120 
121  static boost::optional<EligibleOmissible> threeNeighborChargedCarbon(
122  Vertex i,
123  const PrivateGraph& component,
124  const AtomData& atomData,
125  const boost::optional<unsigned>& neighborCount
126  );
127 
128  static boost::optional<EligibleOmissible> neutralTrivalents(
129  Vertex i,
130  const PrivateGraph& component,
131  const AtomData& atomData,
132  const boost::optional<unsigned>& neighborCount
133  );
134 
135  static boost::optional<EligibleOmissible> neutralDivalents(
136  Vertex i,
137  const PrivateGraph& component,
138  const AtomData& atomData,
139  const boost::optional<unsigned>& neighborCount
140  );
142 
146  Vertex findOrAdd(Vertex i) {
147  const auto iter = index.left.find(i);
148  if(iter == index.left.end()) {
149  const Vertex a = boost::add_vertex(graph);
150  index.insert(IndexMap::relation(i, a));
151  return a;
152  }
153  return iter->second;
154  }
155 
156  /* Find a perfect matching of the subgraph, considering optionally omissible
157  * vertices.
158  *
159  * @note This is non-const because creating subgraphs mutates the parent
160  * subgraph instances
161  */
162  boost::optional<VertexSet> match();
164 
168  Graph graph;
170  IndexMap index;
172  VertexSet omissible;
174 };
175 
183 public:
186  /* @brief Parsing trigger on encountering an atom
187  *
188  * Adds an atom with the last set bond information.
189  */
190  void addAtom(AtomData atom);
191 
193  void addRingClosure(const BondData& bond);
194 
196  inline void branchOpen() {
197  vertexStack.push(vertexStack.top());
198  }
199 
201  inline void branchClose() {
202  assert(!vertexStack.empty());
203  vertexStack.pop();
204  }
205 
207  inline void setNextAtomUnbonded() {
208  lastBondData = boost::none;
209  }
210 
212  inline void setNextAtomBondInformation(const BondData& bond) {
213  lastBondData = bond;
214  }
216 
218  std::vector<Molecule> interpret(const std::string& smiles);
219 
220 private:
223 
230  static BondType mutualBondType(
231  const boost::optional<BondType>& a,
232  const boost::optional<BondType>& b
233  );
234 
240  static std::vector<Shapes::Vertex> shapeMap(const ChiralData& chiralData);
242 
246  std::unordered_set<PrivateGraph::Vertex> matchAromatics(
247  std::vector<PrivateGraph>& precursors,
248  const std::vector<unsigned>& componentMap,
249  const std::vector<PrivateGraph::Vertex>& indexInComponentMap
250  ) const;
251 
253  void setShapes(
254  std::vector<Molecule>& molecules,
255  const std::vector<unsigned>& componentMap,
256  const std::vector<PrivateGraph::Vertex>& indexInComponentMap
257  );
258 
260  void setAtomStereo(
261  std::vector<Molecule>& molecules,
262  const std::vector<unsigned>& componentMap,
263  const std::vector<PrivateGraph::Vertex>& indexInComponentMap,
264  const std::string& smiles
265  );
266 
268  void setBondStereo(
269  std::vector<Molecule>& molecules,
270  const std::vector<unsigned>& componentMap,
271  const std::vector<PrivateGraph::Vertex>& indexInComponentMap,
272  const std::string& smiles
273  );
274 
276  void addAromaticBondStereo(
277  std::vector<Molecule>& molecules,
278  const std::vector<unsigned>& componentMap,
279  const std::vector<PrivateGraph::Vertex>& indexInComponentMap
280  );
282 
286  boost::optional<BondData> lastBondData;
287 
290 
292  std::stack<PrivateGraph::Vertex> vertexStack;
293 
295  using StereoMarkedBondTuple = std::tuple<PrivateGraph::Vertex, PrivateGraph::Vertex, SmilesBondType>;
296  std::vector<StereoMarkedBondTuple> stereoMarkedBonds;
297 
299  std::vector<PrivateGraph::Edge> piSubgraphEdges;
300 
302  std::unordered_map<
303  unsigned,
304  std::pair<PrivateGraph::Vertex, boost::optional<BondType>>
306 
308  std::vector<AtomData> vertexData;
310 };
311 
312 } // namespace IO
313 } // namespace Molassembler
314 } // namespace Scine
315 
316 #endif
bool omissible
Can be omitted from the pi subgraph to find a perfect matching.
Definition: SmilesMoleculeBuilder.h:90
MoleculesResult molecules(const Utils::ElementTypeCollection &elements, const AngstromPositions &angstromWrapper, const Utils::BondOrderCollection &bondOrders, BondDiscretizationOption discretization=BondDiscretizationOption::Binary, const boost::optional< double > &stereopermutatorThreshold=1.4)
The function that actually does all the work with the library-internal wrapper.
Semantic interpreter of the smiles grammar.
Definition: SmilesMoleculeBuilder.h:182
void branchClose()
Parsing trigger on branch close.
Definition: SmilesMoleculeBuilder.h:201
void setNextAtomBondInformation(const BondData &bond)
Parsing trigger on encountering non-default bond information.
Definition: SmilesMoleculeBuilder.h:212
boost::optional< BondData > lastBondData
State for last stored bond data.
Definition: SmilesMoleculeBuilder.h:286
Data struct for eligibleOmissible()
Definition: SmilesMoleculeBuilder.h:86
Definition: SmilesParseData.h:37
Class to help validate aromatic subgraphs in parsed smiles strings.
Definition: SmilesMoleculeBuilder.h:42
Centralizes basic shape data in runtime types.
static bool permittedElementType(Utils::ElementType e)
Decide whether an element type is allowed to be in the pi subgraph.
Definition: SmilesMoleculeBuilder.h:67
std::stack< PrivateGraph::Vertex > vertexStack
State to track the vertex a new vertex is bound to.
Definition: SmilesMoleculeBuilder.h:292
std::tuple< PrivateGraph::Vertex, PrivateGraph::Vertex, SmilesBondType > StereoMarkedBondTuple
Storage for bonds marked with stereo indicators (&quot;/&quot; and &quot;\&quot;)
Definition: SmilesMoleculeBuilder.h:295
Graph graph
Subgraph data structure.
Definition: SmilesMoleculeBuilder.h:168
IndexMap index
Index mapping from precursor to subgraph vertex index.
Definition: SmilesMoleculeBuilder.h:170
VertexSet omissible
Set of omissible subgraph vertices.
Definition: SmilesMoleculeBuilder.h:172
Library internal graph class wrapping BGL types.
Definition: PrivateGraph.h:26
PrivateGraph graph
Possibly disconnected tracking graph.
Definition: SmilesMoleculeBuilder.h:289
bool eligible
Should be added to the pi subgraph.
Definition: SmilesMoleculeBuilder.h:88
Definition: SmilesParseData.h:42
BondType
Discrete bond type numeration.
Definition: Types.h:26
Vertex findOrAdd(Vertex i)
Find a precursor vertex&#39;s subgraph index, inserting it if not present.
Definition: SmilesMoleculeBuilder.h:146
void setNextAtomUnbonded()
Parsing trigger on finding a dot (molecule separator) in place of a bond.
Definition: SmilesMoleculeBuilder.h:207
std::vector< PrivateGraph::Edge > piSubgraphEdges
Storage for pi-subgraph edges.
Definition: SmilesMoleculeBuilder.h:299
Definition: SmilesParseData.h:73
std::unordered_map< unsigned, std::pair< PrivateGraph::Vertex, boost::optional< BondType > > > ringClosures
Storage for ring closure bond indicators.
Definition: SmilesMoleculeBuilder.h:305
Define data accrued in smiles parser for consumption in the accompanying molecule builder...
void branchOpen()
Parsing trigger on branch open.
Definition: SmilesMoleculeBuilder.h:196
std::vector< StereoMarkedBondTuple > stereoMarkedBonds
Storage for bonds marked with stereo indicators (&quot;/&quot; and &quot;\&quot;)
Definition: SmilesMoleculeBuilder.h:296
std::vector< AtomData > vertexData
AtomData for each created vertex.
Definition: SmilesMoleculeBuilder.h:308
Boost Graph Library wrapper to help in concealing underlying type.