Scine::Readuct  4.0.0
This is the SCINE module Readuct.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
BSplineInterpolationTask.h
Go to the documentation of this file.
1 
7 #ifndef READUCT_BSPLINEINTERPOLATIONTASK_H_
8 #define READUCT_BSPLINEINTERPOLATIONTASK_H_
9 
10 #include "../../Readuct/ElementaryStepOptimization/ElementaryStepOptimizer.h"
11 #include "../../Readuct/ElementaryStepOptimization/ReactionProfile.h"
12 #include "Tasks/Task.h"
24 #include <boost/filesystem.hpp>
25 #include <fstream>
26 
27 namespace Scine {
28 namespace Readuct {
29 
31  public:
38  BSplineInterpolationTask(std::vector<std::string> input, std::vector<std::string> output,
39  std::shared_ptr<Core::Log> logger = nullptr)
40  : Task(std::move(input), std::move(output), std::move(logger)) {
41  }
42 
43  std::string name() const override {
44  return "BSpline Interpolation";
45  }
46 
47  bool run(std::map<std::string, std::shared_ptr<Core::Calculator>>& systems,
48  Utils::UniversalSettings::ValueCollection taskSettings, bool testRunOnly = false) const final {
49  if (_input.size() != 2) {
50  throw std::logic_error("The B-Spline task needs two input systems.");
51  }
53 
54  // Read and set user-specified settings
55  alignStructuresBeforeInterpolation_ = taskSettings.extract("align_structures", alignStructuresBeforeInterpolation_);
56  numberStructuresForMolecularTrajectory_ = taskSettings.extract("num_structures", numberStructuresForMolecularTrajectory_);
57  optimize_ = taskSettings.extract("optimize", optimize_);
58  numberControlPointsForInterpolation_ = taskSettings.extract("num_control_points", numberControlPointsForInterpolation_);
59  optimizer_ = taskSettings.extract("optimizer", optimizer_);
60  std::transform(optimizer_.begin(), optimizer_.end(), optimizer_.begin(), ::tolower);
61  extractTsGuess_ = taskSettings.extract("extract_ts_guess", extractTsGuess_);
62  extractTsGuessNeighbours_ = taskSettings.extract("extract_ts_guess_neighbours", extractTsGuessNeighbours_);
63  tangentFileName_ = taskSettings.extract("tangent_file", tangentFileName_);
64  coordinateThresholdForMaximumExtraction_ =
65  taskSettings.extract("extract_threshold", coordinateThresholdForMaximumExtraction_);
66  bool silentCalculator = taskSettings.extract("silent_stdout_calculator", true);
67 
68  // If no errors encountered until here, the basic settings should be alright
69  if (testRunOnly) {
70  return true; // leave out rest in case of task chaining
71  }
72 
73  // Note: _input is guaranteed not to be empty by Task constructor
74  auto calc = copyCalculator(systems, _input.front(), name());
75  auto secondCalculator = systems.at(_input.back());
76  Utils::CalculationRoutines::setLog(*calc, true, true, !silentCalculator);
77  Utils::CalculationRoutines::setLog(*secondCalculator, true, true, !silentCalculator);
78  if (calc->settings() != secondCalculator->settings()) {
79  _logger->warning
80  << " Warning: The given systems have different settings. Only taking first and ignoring second.\n";
81  }
82 
83  // Get inputs
84  auto start = calc->getStructure();
85  auto end = secondCalculator->getStructure();
86  if (start->getElements() != end->getElements()) {
87  throw std::logic_error("The provided structures have different elements. Impossible to find path between them.");
88  }
89 
90  // Check system size
91  if (start->size() == 1) {
92  throw std::runtime_error("Cannot perform " + name() + " for monoatomic systems.");
93  }
94 
95  Utils::MolecularTrajectory trajectoryGuess = Utils::MolecularTrajectory();
96  trajectoryGuess.setElementTypes(start->getElements());
97  if (taskSettings.valueExists("trajectory_guess")) {
98  std::vector<std::string> paths = taskSettings.getStringList("trajectory_guess");
99  for (unsigned long i = 0; i < paths.size(); ++i) {
100  Utils::MolecularTrajectory traj = readTrajectory(paths[i]);
101  if (traj.getElementTypes() != trajectoryGuess.getElementTypes()) {
102  throw std::logic_error("The provided trajectory guess no. " + std::to_string(i) +
103  " has different elements than the provided input structures.");
104  }
105  for (auto& step : traj) {
106  trajectoryGuess.push_back(step);
107  }
108  }
109  taskSettings.dropValue("trajectory_guess");
110  }
111 
112  // Interpolate between start and end structure
113  auto cout = _logger->output;
114  cout << " Interpolating Reaction Path\n";
115  ElementaryStepOptimization::ReactionProfile interpolatedProfile = interpolateElementaryStep(*start, *end, trajectoryGuess);
116  Utils::MolecularTrajectory interpolatedTrajectory = convertProfileToTrajectory(interpolatedProfile);
117  // Print/Store results
118  using Writer = Utils::ChemicalFileHandler;
119  const std::string& outputSystem = ((!_output.empty()) ? _output[0] : _input[0]);
120  boost::filesystem::path dir(outputSystem);
121  boost::filesystem::create_directory(dir);
122  systems[outputSystem] = calc;
123  boost::filesystem::path xyzfile(outputSystem + "_interpolated.xyz");
124  writeTrajectory(interpolatedTrajectory, (dir / xyzfile).string());
125  cout << " Interpolating Complete\n\n";
126 
127  // Optimize the interpolated path
128  Utils::MolecularTrajectory optimizedTrajectory;
129  ElementaryStepOptimization::ReactionProfile optimizedProfile;
130  bool converged = true;
131  if (optimize_) {
132  cout << " Optimizing Reaction Path\n";
133  auto result = optimizeElementaryStep(std::move(interpolatedProfile), calc, taskSettings);
134  optimizedProfile = result.first;
135  converged = result.second;
136  optimizedTrajectory = convertProfileToTrajectory(optimizedProfile);
137  // Output
138  boost::filesystem::path xyzfile2(outputSystem + "_optimized.xyz");
139  writeTrajectory(optimizedTrajectory, (dir / xyzfile2).string());
140  std::string info = (converged) ? "Complete" : "Stopped";
141  cout << " Optimization " + info + "\n\n";
142  }
143 
144  std::vector<Utils::AtomCollection> tsGuess;
145  if (optimize_ && extractTsGuess_) {
146  cout << " Extracting Transition State Guess\n";
147  tsGuess = extractTransitionStateGuessStructure(optimizedProfile, calc, dir);
148  // Output
149  boost::filesystem::path xyzfile3 = dir / (outputSystem + "_tsguess.xyz");
150  Writer::write(xyzfile3.string(), tsGuess.at(0));
151 
152  if (extractTsGuessNeighbours_) {
153  boost::filesystem::path xyzfile3_1 = dir / (outputSystem + "_tsguess-1.xyz");
154  Writer::write(xyzfile3_1.string(), tsGuess.at(1));
155 
156  boost::filesystem::path xyzfile3_2 = dir / (outputSystem + "_tsguess+1.xyz");
157  Writer::write(xyzfile3_2.string(), tsGuess.at(2));
158  }
159 
160  cout << " Extraction Complete\n\n";
161  }
162  cout << Core::Log::nl << Core::Log::endl;
163  return converged;
164  }
165 
166  private:
167  ElementaryStepOptimization::ReactionProfile
168  interpolateElementaryStep(const Utils::AtomCollection& start, Utils::AtomCollection end,
169  Utils::MolecularTrajectory trajectoryGuess = Utils::MolecularTrajectory()) const {
170  auto cout = _logger->output;
171  if (alignStructuresBeforeInterpolation_) {
172  auto positionsToAlign = end.getPositions();
173  Utils::Geometry::Manipulations::alignPositions(start.getPositions(), positionsToAlign);
174  end.setPositions(std::move(positionsToAlign));
175  }
176 
177  Eigen::VectorXd startVector = Eigen::Map<const Eigen::VectorXd>(
178  start.getPositions().data(), start.getPositions().cols() * start.getPositions().rows());
179  Eigen::VectorXd endVector =
180  Eigen::Map<const Eigen::VectorXd>(end.getPositions().data(), end.getPositions().cols() * end.getPositions().rows());
181  Utils::BSplines::BSpline spline;
182  if (trajectoryGuess.empty()) {
183  spline = Utils::BSplines::LinearInterpolator::generate(startVector, endVector, numberControlPointsForInterpolation_);
184  }
185  else {
186  if (start.size() != trajectoryGuess.molecularSize()) {
187  throw std::logic_error("The given trajectory guess has a different size than the given start system.");
188  }
189  /* Remove frames from trajectory at end and beginning which are too similar to given endpoints */
190  bool viableTrajectory = true;
191  double rmsdThreshold = 1.0;
192  bool stop = false;
193  while (!stop) {
194  Utils::QuaternionFit fit(start.getPositions(), trajectoryGuess.front());
195  if (fit.getRMSD() <= rmsdThreshold) {
196  trajectoryGuess.erase(trajectoryGuess.begin());
197  if (trajectoryGuess.empty()) {
198  _logger->warning
199  << "Warning: no viable trajectory guess provided, all frames are too similar to the endpoints, now "
200  "performing linear interpolation."
201  << Core::Log::endl;
202  viableTrajectory = false;
203  stop = true;
204  }
205  }
206  else {
207  stop = true;
208  }
209  }
210  if (viableTrajectory) {
211  stop = false;
212  }
213  while (!stop) {
214  Utils::QuaternionFit fit(end.getPositions(), trajectoryGuess.back());
215  if (fit.getRMSD() <= rmsdThreshold) {
216  trajectoryGuess.erase(trajectoryGuess.end());
217  if (trajectoryGuess.empty()) {
218  _logger->warning
219  << "WARNING, no viable trajectory guess provided, all frames are too similar to the endpoints, now "
220  "performing linear interpolation."
221  << Core::Log::endl;
222  viableTrajectory = false;
223  stop = true;
224  }
225  }
226  else {
227  stop = true;
228  }
229  }
230  /* Combine input for spline generator or perform linear interpolation if no viable trajectory given */
231  if (viableTrajectory) {
232  Eigen::MatrixXd interpolationPoints(trajectoryGuess.size() + 2, startVector.size());
233  interpolationPoints.row(0) = startVector;
234  interpolationPoints.row(trajectoryGuess.size() + 1) = endVector;
235  for (int i = 1; i < trajectoryGuess.size() + 1; ++i) {
236  interpolationPoints.row(i) = Eigen::Map<const Eigen::VectorXd>(
237  trajectoryGuess.at(i - 1).data(), trajectoryGuess.at(i - 1).cols() * trajectoryGuess.at(i - 1).rows());
238  }
239  Utils::BSplines::FixedEndsPenalizedLeastSquaresGenerator generator(interpolationPoints,
240  numberControlPointsForInterpolation_);
241  spline = generator.generateBSpline();
242  }
243  else {
244  spline = Utils::BSplines::LinearInterpolator::generate(startVector, endVector, numberControlPointsForInterpolation_);
245  }
246  }
247  auto molecularSpline = Utils::BSplines::MolecularSpline{start.getElements(), std::move(spline)};
248  auto profile = ElementaryStepOptimization::ReactionProfile{std::move(molecularSpline)};
249 
250  return profile;
251  }
252 
253  std::pair<ElementaryStepOptimization::ReactionProfile, bool>
254  optimizeElementaryStep(ElementaryStepOptimization::ReactionProfile interpolatedProfile,
255  std::shared_ptr<Core::Calculator> calculator,
256  Utils::UniversalSettings::ValueCollection& taskSettings) const {
257  auto cout = _logger->output;
258  // Read and delete special settings
259  bool stopOnError = stopOnErrorExtraction(taskSettings);
260  std::shared_ptr<ElementaryStepOptimization::ElementaryStepOptimizerBase> optimizer;
261  if (optimizer_ == "steepestdescent" || optimizer_ == "sd") {
262  auto tmp = std::make_shared<ElementaryStepOptimization::ElementaryStepOptimizer<Utils::SteepestDescent>>(
263  *calculator, std::move(interpolatedProfile));
264  // The original ReaDuct implementation's settings converted into the new form.
265  tmp->check.requirement = 4;
266  tmp->check.gradRMS = 1.0e-3;
267  tmp->check.stepMaxCoeff = 1.0e+10;
268  tmp->check.stepRMS = 1.0e+10;
269  tmp->check.gradMaxCoeff = 1.0e+10;
270  tmp->check.deltaValue = 1.0e+10;
271  optimizer = std::move(tmp);
272  }
273  else if (optimizer_ == "lbfgs") {
274  auto tmp = std::make_shared<ElementaryStepOptimization::ElementaryStepOptimizer<Utils::Lbfgs>>(
275  *calculator, std::move(interpolatedProfile));
276  tmp->optimizer.linesearch = false;
277  // The original ReaDuct implementation's settings converted into the new form.
278  tmp->check.requirement = 4;
279  tmp->check.gradRMS = 1.0e-3;
280  tmp->check.stepMaxCoeff = 1.0e+10;
281  tmp->check.stepRMS = 1.0e+10;
282  tmp->check.gradMaxCoeff = 1.0e+10;
283  tmp->check.deltaValue = 1.0e+10;
284  optimizer = std::move(tmp);
285  }
286  else {
287  throw std::runtime_error("Optimizer not supported.");
288  }
289 
290  // Apply user settings
291  auto settings = optimizer->getSettings();
292  settings.merge(taskSettings);
293  if (!settings.valid()) {
294  settings.throwIncorrectSettings();
295  }
296  optimizer->setSettings(settings);
297  if (!Utils::GeometryOptimization::settingsMakeSense(*optimizer)) {
298  throw std::logic_error("The given calculator settings are too inaccurate for the given convergence criteria of "
299  "this optimization Task");
300  }
301 
302  const int cycles = optimizer->optimize(*_logger);
303  const int maxiter = settings.getInt("convergence_max_iterations");
304 
305  bool converged = cycles < maxiter;
306  if (converged) {
307  cout << Core::Log::endl
308  << " Converged after " << cycles << " iterations." << Core::Log::endl
309  << Core::Log::endl;
310  }
311  else {
312  cout << Core::Log::endl
313  << " Stopped after " << maxiter << " iterations." << Core::Log::endl
314  << Core::Log::endl;
315  if (stopOnError) {
316  throw std::runtime_error("Problem: Path optimization did not converge.");
317  }
318  }
319  const auto& optimizedProfile = optimizer->getReactionProfile();
320 
321  return std::make_pair(optimizedProfile, converged);
322  }
323 
324  std::vector<Utils::AtomCollection> extractTransitionStateGuessStructure(ElementaryStepOptimization::ReactionProfile profile,
325  std::shared_ptr<Core::Calculator> calculator,
326  const boost::filesystem::path& dir) const {
327  auto cout = _logger->output;
328  const auto& molecularSpline = profile.getMolecularSpline();
329 
330  // Generate an empty molecule with the correct element types
331  const Utils::ElementTypeCollection& ec = molecularSpline.getElements();
332  Utils::PositionCollection pc(ec.size(), 3);
333 
334  calculator->setStructure(Utils::AtomCollection(ec, pc));
335  calculator->setRequiredProperties(Utils::Property::Energy);
336 
337  std::vector<double> coordinates;
338  std::vector<double> energies;
339  int maxEnergyIndex;
340  getInitialValues(maxEnergyIndex, coordinates, energies, calculator, profile);
341 
342  while (pointDistance(coordinates, maxEnergyIndex) > coordinateThresholdForMaximumExtraction_) {
343  coordinates = getNewCoordinates(coordinates, maxEnergyIndex);
344  energies = getNewEnergies(energies, coordinates, maxEnergyIndex, calculator, molecularSpline);
345  maxEnergyIndex = getIndexForMaxEnergyAndCheckValidity(energies);
346  }
347 
348  /* Write out tangent */
349  if (!tangentFileName_.empty()) {
350  Utils::BSplines::BSpline spline = molecularSpline.getBSpline();
351  Utils::BSplines::BSpline derivative = spline.getDerivativeBSpline(1);
352  Utils::BSplines::MolecularSpline molecularDerivative(ec, derivative);
353  Utils::PositionCollection derivativePosition = molecularDerivative.getPositions(coordinates.at(maxEnergyIndex));
354  Eigen::Map<Eigen::VectorXd> tangent(derivativePosition.data(), derivativePosition.size());
355  writeTangentToFile(tangent, dir);
356  }
357 
358  std::vector<Utils::AtomCollection> result;
359  result.push_back(molecularSpline.at(coordinates.at(maxEnergyIndex)));
360  if (extractTsGuessNeighbours_) {
361  result.push_back(molecularSpline.at(coordinates.at(maxEnergyIndex - 1)));
362  result.push_back(molecularSpline.at(coordinates.at(maxEnergyIndex + 1)));
363  }
364 
365  return result;
366  }
367 
368  void writeTangentToFile(const Eigen::VectorXd& tangent, const boost::filesystem::path& generalOutputDir) const {
369  // if absolute path given by user, path is directly used
370  // else the relative path is used relative to the general output of the calculation
371  boost::filesystem::path tangentPath(tangentFileName_);
372  if (tangentFileName_.at(0) != '/') {
373  tangentPath = boost::filesystem::absolute(generalOutputDir / tangentFileName_);
374  }
375  // create necessary directories for file if necessary
376  boost::filesystem::path parent = tangentPath.parent_path();
377  if (!parent.empty()) {
378  // does not give error if directories already exist
379  boost::filesystem::detail::create_directories(parent);
380  }
381  std::ofstream fout(tangentPath.string());
382  if (!fout.is_open()) {
383  throw std::runtime_error("Problem when opening/creating file: " + tangentPath.string());
384  }
385  fout.imbue(std::locale("C"));
386  for (int i = 0; i < tangent.size(); ++i) {
387  fout << tangent.row(i) << '\n';
388  }
389  fout.close();
390  };
391 
392  Utils::MolecularTrajectory convertProfileToTrajectory(ElementaryStepOptimization::ReactionProfile profile) const {
393  auto elements = profile.getMolecularSpline().getElements();
394  auto spline = profile.getMolecularSpline().getBSpline();
395  Utils::MolecularTrajectory trajectory = discretizeSpline(elements, spline, numberStructuresForMolecularTrajectory_);
396  return trajectory;
397  }
398 
399  static Utils::MolecularTrajectory discretizeSpline(const Utils::ElementTypeCollection& elements,
400  const Utils::BSplines::BSpline& spline, int numberPoints) {
401  Utils::MolecularTrajectory t;
402  t.setElementTypes(elements);
403  double deltaU = 1.0 / (numberPoints - 1);
404  for (int i = 0; i < numberPoints; ++i) {
405  double u = i * deltaU;
406  Utils::PositionCollection pc =
407  Eigen::Map<const Utils::PositionCollection>(spline.evaluate(u).data(), elements.size(), 3);
408  t.push_back(std::move(pc));
409  }
410  return t;
411  }
412 
413  static void writeTrajectory(const Utils::MolecularTrajectory& trajectory, const std::string& filepath) {
414  std::ofstream ostream(filepath, std::ofstream::out);
416  ostream.close();
417  }
418 
419  static Utils::MolecularTrajectory readTrajectory(const std::string& filepath) {
420  Utils::MolecularTrajectory trajectory;
421  std::filebuf fb;
422  if (fb.open(filepath, std::istream::in)) {
423  std::istream is(&fb);
425  fb.close();
426  }
427  return trajectory;
428  }
429 
430  static void getInitialValues(int& maxEnergyIndex, std::vector<double>& coordinates, std::vector<double>& energies,
431  std::shared_ptr<Core::Calculator> calculator,
432  const ElementaryStepOptimization::ReactionProfile& profile) {
433  const auto& profileEnergies = profile.getProfileEnergies();
434  if (!profile.getProfileEnergies().empty()) {
435  coordinates = profileEnergies.getCoordinates();
436  energies = profileEnergies.getEnergies();
437  maxEnergyIndex = getIndexForMaxEnergyAndCheckValidity(energies);
438  }
439  else {
440  const auto& molecularSpline = profile.getMolecularSpline();
441  coordinates = {0.0, 0.5, 1.0};
442  energies.resize(3);
443 
444  calculator->modifyPositions(molecularSpline.getPositions(coordinates[0]));
445  energies[0] = calculator->calculate("").get<Utils::Property::Energy>();
446 
447  calculator->modifyPositions(molecularSpline.getPositions(coordinates[1]));
448  energies[1] = calculator->calculate("").get<Utils::Property::Energy>();
449 
450  calculator->modifyPositions(molecularSpline.getPositions(coordinates[2]));
451  energies[2] = calculator->calculate("").get<Utils::Property::Energy>();
452 
453  maxEnergyIndex = 1;
454  }
455  }
456 
457  static double pointDistance(const std::vector<double>& coordinates, int maxEnergyIndex) {
458  double diff = coordinates[maxEnergyIndex + 1] - coordinates[maxEnergyIndex - 1];
459  return diff / 2;
460  }
461 
462  static std::vector<double> getNewCoordinates(const std::vector<double>& oldCoordinates, int maxEnergyIndex) {
463  std::vector<double> u(5);
464  u[0] = oldCoordinates[maxEnergyIndex - 1];
465  u[2] = oldCoordinates[maxEnergyIndex];
466  u[4] = oldCoordinates[maxEnergyIndex + 1];
467  u[1] = 0.5 * (u[0] + u[2]);
468  u[3] = 0.5 * (u[2] + u[4]);
469  return u;
470  }
471 
472  static std::vector<double> getNewEnergies(const std::vector<double>& oldEnergies, const std::vector<double>& coordinates,
473  int maxEnergyIndex, std::shared_ptr<Core::Calculator> calculator,
474  const Utils::BSplines::MolecularSpline& spline) {
475  std::vector<double> e(5);
476  e[0] = oldEnergies[maxEnergyIndex - 1];
477  e[2] = oldEnergies[maxEnergyIndex];
478  e[4] = oldEnergies[maxEnergyIndex + 1];
479 
480  calculator->modifyPositions(spline.getPositions(coordinates[1]));
481  e[1] = calculator->calculate("").get<Utils::Property::Energy>();
482 
483  calculator->modifyPositions(spline.getPositions(coordinates[3]));
484  e[3] = calculator->calculate("").get<Utils::Property::Energy>();
485 
486  return e;
487  }
488 
489  static int getIndexForMaxEnergyAndCheckValidity(const std::vector<double>& energies) {
490  auto maxIt = std::max_element(energies.begin(), energies.end());
491  auto dist = std::distance(energies.begin(), maxIt);
492  if (dist == 0 || dist == static_cast<int>(energies.size()) - 1) {
493  throw std::runtime_error("Problem: End point has maximal energy.");
494  }
495  return static_cast<int>(dist);
496  }
497 
498  // Default settings
499  mutable bool alignStructuresBeforeInterpolation_ = true;
500  mutable int numberControlPointsForInterpolation_ = 5;
501  mutable int numberStructuresForMolecularTrajectory_ = 10;
502  mutable bool optimize_ = true;
503  mutable std::string optimizer_ = "lbfgs";
504  mutable bool extractTsGuess_ = false;
505  mutable bool extractTsGuessNeighbours_ = false;
506  mutable std::string tangentFileName_;
507  mutable double coordinateThresholdForMaximumExtraction_ = 1e-3;
508 };
509 
510 } // namespace Readuct
511 } // namespace Scine
512 
513 #endif // READUCT_BSPLINEINTERPOLATIONTASK_H_
void warningIfMultipleOutputsGiven() const
Warn if more than one output system was specified.
Definition: Task.h:99
static std::ostream & endl(std::ostream &os)
static MolecularTrajectory read(format f, const std::string &fileName)
const std::vector< std::string > & input() const
Getter for the expected names of the input systems.
Definition: Task.h:77
static std::ostream & nl(std::ostream &os)
Definition: BSplineInterpolationTask.h:30
static void write(format f, const std::string &fileName, const MolecularTrajectory &m)
void alignPositions(const PositionCollection &reference, PositionCollection &positions)
const std::vector< std::string > & output() const
Getter for the names of the output systems generated by this task.
Definition: Task.h:84
BSplineInterpolationTask(std::vector< std::string > input, std::vector< std::string > output, std::shared_ptr< Core::Log > logger=nullptr)
Construct a new BSplineInterpolationTask.
Definition: BSplineInterpolationTask.h:38
The base class for all tasks in Readuct.
Definition: Task.h:28
std::string name() const override
Getter for the tasks name.
Definition: BSplineInterpolationTask.h:43