Molassembler  3.0.0
Molecule graph and conformer library
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
TrustRegion.h
Go to the documentation of this file.
1 
8 #ifndef INCLUDE_TEMPLE_OPTIMIZATION_TRUST_REGION_NEWTON_H
9 #define INCLUDE_TEMPLE_OPTIMIZATION_TRUST_REGION_NEWTON_H
10 
13 #include "boost/math/tools/roots.hpp"
14 #include <Eigen/Eigenvalues>
15 
16 #include <iostream>
17 
18 namespace Scine {
19 namespace Molassembler {
20 namespace Temple {
21 namespace Detail {
22 
23 template<typename Derived>
24 auto resolve(const Eigen::MatrixBase<Derived>& matrix) -> typename Eigen::MatrixBase<Derived>::Scalar {
25  assert(matrix.rows() == 1 && matrix.cols() == 1);
26  return matrix(0, 0);
27 }
28 
29 template<typename FloatType>
30 struct TROFloatingPointTolerances {};
31 
32 template<>
33 struct TROFloatingPointTolerances<double> {
34  static constexpr unsigned rootBitAccuracy = 20;
35 };
36 
37 template<>
38 struct TROFloatingPointTolerances<float> {
39  static constexpr unsigned rootBitAccuracy = 10;
40 };
41 
42 } // namespace Detail
43 
49 template<typename FloatType = double>
51  using MatrixType = Eigen::Matrix<FloatType, Eigen::Dynamic, Eigen::Dynamic>;
52  using VectorType = Eigen::Matrix<FloatType, Eigen::Dynamic, 1>;
53 
57  unsigned iterations;
59  FloatType value;
61  VectorType gradient;
62  };
63 
64  template<
65  typename UpdateFunction,
66  typename Checker
67  > static OptimizationReturnType minimize(
68  Eigen::Ref<VectorType> parameters,
69  UpdateFunction&& function,
70  Checker&& check
71  ) {
72  // This is chosen arbitrarily
73  constexpr FloatType trustRadiusLimit = 4.0;
74  static_assert(trustRadiusLimit > 0, "Trust radius is not positive!");
75 
76  // This is a fixed constant of the algorithm
77  constexpr FloatType agreementContractionUpperBound = 0.25;
78  constexpr FloatType agreementExpansionLowerBound = 0.75;
79 
80  // This is chosen arbitrarily within [0, agreementContractionUpperBound)
81  constexpr FloatType agreementEta = 0.125;
82  static_assert(
83  0.0 <= agreementEta && agreementEta < 0.25,
84  "Eta not within bounds"
85  );
86 
87  // Loop variables initialization
88  // Choose initial trust radius within (0, trustRadiusLimit)
89  FloatType trustRadius = 1.0; // Δ
90  FloatType modelAgreement = 0; // ρ
91  unsigned iterations = 0;
92  StepValues step;
93  step.initialize(parameters, function, trustRadius);
94 
95  for(
96  ;
97  check.shouldContinue(
98  iterations,
99  std::as_const(step.values.current),
100  std::as_const(step.gradients.current)
101  );
102  ++iterations
103  ) {
104  modelAgreement = step.modelAgreement();
105 
106  /* Adjust trust radius if necessary */
107  if(modelAgreement < agreementContractionUpperBound) {
108  // Contract the trust radius
109  trustRadius = agreementContractionUpperBound * trustRadius;
110  } else if(
111  modelAgreement > agreementExpansionLowerBound
112  && std::fabs(step.direction.norm() - trustRadius) < 1e-5
113  ) {
114  // Expand the trust radius
115  trustRadius = std::min(2 * trustRadius, trustRadiusLimit);
116  }
117 
118  /* Decide what to do with the step */
119  if(modelAgreement > agreementEta) {
120  // Accept the step and prepare a new one
121  step.accept(function, trustRadius);
122  } else {
123  // Keep the current parameters, generate new prospective parameters
124  step.prepare(function, trustRadius);
125  }
126  }
127 
128  return {
129  iterations,
130  step.values.current,
131  std::move(step.gradients.current)
132  };
133  }
134 
135 private:
143  struct StepValues {
148 
149  VectorType direction;
150 
151  template<typename UpdateFunction>
152  void initialize(
153  Eigen::Ref<VectorType> initialParameters,
154  UpdateFunction&& function,
155  const FloatType trustRadius
156  ) {
157  parameters.current = std::move(initialParameters);
158  const unsigned P = parameters.current.size();
159  parameters.proposed.resize(P);
160  gradients.current.resize(P);
161  gradients.proposed.resize(P);
162  hessians.current.resize(P, P);
163  hessians.proposed.resize(P, P);
164 
165  function(parameters.current, values.current, gradients.current, hessians.current);
166  prepare(function, trustRadius);
167  }
168 
182  VectorType subspaceMinimize(
183  const FloatType trustRadius,
184  const VectorType& g,
185  const MatrixType& B
186  ) const {
187  using Vector2Type = Eigen::Matrix<FloatType, 2, 1>;
188  using Matrix2Type = Eigen::Matrix<FloatType, 2, 2>;
189 
190  assert(positiveDefinite(B));
191  const MatrixType Binverse = B.inverse();
192 
193  const Vector2Type gTilde {g.squaredNorm(), g.dot(Binverse * g)};
194  Matrix2Type BTilde;
195  BTilde << g.dot(B * g), gTilde(0),
196  gTilde(0), gTilde(1);
197 
198  const Vector2Type uStar = - BTilde.inverse() * gTilde;
199 
200  Matrix2Type BBar;
201  BBar << gTilde(0), gTilde(1),
202  gTilde(1), (Binverse * g).squaredNorm();
203  BBar *= 2;
204 
205  const FloatType J = FloatType {0.5} * uStar.transpose() * BBar * uStar;
206  const FloatType trustRadiusSquare = trustRadius * trustRadius;
207 
208  if(J <= trustRadiusSquare) {
209  return uStar(0) * g + uStar(1) * Binverse * g;
210  }
211 
212  // Now we have to find lambda
213  boost::uintmax_t iterations = 1000;
214  auto root_result = boost::math::tools::toms748_solve(
215  [&](const FloatType lambda) -> FloatType {
216  const Vector2Type intermediate = (BTilde + lambda * BBar).inverse() * gTilde;
217  return FloatType {0.5} * intermediate.transpose() * BBar * intermediate - trustRadiusSquare;
218  },
219  std::nextafter(FloatType {0.0}, FloatType {1.0}),
220  FloatType {100.0},
221  boost::math::tools::eps_tolerance<FloatType> {
222  Detail::TROFloatingPointTolerances<FloatType>::rootBitAccuracy
223  },
224  iterations
225  );
226 
227  if(iterations >= 1000) {
228  throw std::logic_error("Could not find lambda to satisfy equation");
229  }
230 
231  const FloatType lambda = (root_result.first + root_result.second) / 2;
232 
233  // Calculate uMin and calculate p from it
234  const Vector2Type uMin = -(BTilde + lambda * BBar).inverse() * gTilde;
235  return uMin(0) * g + uMin(1) * Binverse * g;
236  }
237 
238  VectorType determineDirection(const FloatType trustRadius) const {
239  if(positiveDefinite(hessians.current)) {
240  return subspaceMinimize(trustRadius, gradients.current, hessians.current);
241  }
242 
243  /* Now there are several possible cases for what the hessian is currently
244  * like, but we'll need the eigenvalues to determine what to do.
245  *
246  * The hessian is a symmetric real matrix, so it is also self-adjoint,
247  * which we exploit for faster eigenvalue calculation:
248  */
249  VectorType eigenvalues = hessians.current.template selfadjointView<Eigen::Lower>().eigenvalues();
250 
251  /* First case: there are negative eigenvalues. In this case we adjust
252  * the hessian with a unitary matrix to become positive definite:
253  */
254  if((eigenvalues.array() < FloatType {0.0}).any()) {
255  // Eigenvalues are ordered ascending
256  const FloatType mostNegativeEigenvalue = eigenvalues(0);
257  /* Nocedal & Wright say to choose alpha between (-λ,-2λ], so we
258  * go right in the middle because we don't know anything about the
259  * significance of the choice.
260  */
261  const FloatType alpha = FloatType {-1.5} * mostNegativeEigenvalue;
262  const unsigned P = parameters.current.size();
263  const Eigen::MatrixXd modifiedHessian = hessians.current + alpha * Eigen::MatrixXd::Identity(P, P);
264 
265  /* Subspace minimize the modified hessian
266  *
267  * NOTE: Nocedal & Wright say to differentiate some things further
268  * here, but I think the subspace minimization procedure itself
269  * differentiates those cases.
270  */
271  return subspaceMinimize(trustRadius, gradients.current, modifiedHessian);
272  }
273 
274  /* Remaining case: There are no negative eigenvalues, but the hessian is
275  * not positive definite. There must be eigenvalues of value zero. In
276  * this case, we use the cauchy point to make progress:
277  */
278  const FloatType gradientNorm = gradients.current.norm();
279  const FloatType tau = [&]() -> FloatType {
280  const FloatType cauchyIntermediate = gradients.current.transpose() * hessians.current * gradients.current;
281  if(cauchyIntermediate <= 0) {
282  return 1.0;
283  }
284 
285  return std::min(
286  std::pow(gradientNorm, 3) / (trustRadius * cauchyIntermediate),
287  1.0
288  );
289  }();
290 
291  return - (tau * trustRadius / gradientNorm) * gradients.current;
292  }
293 
294  template<typename UpdateFunction>
295  void prepare(UpdateFunction&& function, const FloatType trustRadius) {
296  direction = determineDirection(trustRadius);
297  parameters.proposed.noalias() = parameters.current + direction;
298  function(parameters.proposed, values.proposed, gradients.proposed, hessians.proposed);
299  }
300 
301  FloatType modelAgreement() const {
302  const FloatType predictedValue = (
303  values.current
304  + Detail::resolve(gradients.current.transpose() * direction)
305  + Detail::resolve(FloatType {0.5} * direction.transpose() * hessians.current * direction)
306  );
307 
308  return (
309  (values.current - values.proposed)
310  / (values.current - predictedValue)
311  );
312  }
313 
314  template<typename UpdateFunction>
315  void accept(UpdateFunction&& function, const FloatType trustRadius) {
316  parameters.propagate();
317  values.propagate();
318  gradients.propagate();
319  hessians.propagate();
320 
321  prepare(function, trustRadius);
322  }
323  };
324 };
325 
326 } // namespace Temple
327 } // namespace Molassembler
328 } // namespace Scine
329 
330 #endif
Sylvester&#39;s criterion for positive definiteness and positive semi-definiteness.
VectorType subspaceMinimize(const FloatType trustRadius, const VectorType &g, const MatrixType &B) const
Find the minimizer of the trust region subspace.
Definition: TrustRegion.h:182
unsigned iterations
Number of iterations.
Definition: TrustRegion.h:57
Trust region Newton-Raphson minimizer with 2D subspace minimization.
Definition: TrustRegion.h:50
Type returned from an optimization.
Definition: TrustRegion.h:55
bool positiveDefinite(const Eigen::MatrixBase< Derived > &matrix)
Determine whether a matrix is positive definite.
Definition: SylvestersCriterion.h:26
FloatType value
Final function value.
Definition: TrustRegion.h:59
Data types common to optimizers.
VectorType gradient
Final gradient.
Definition: TrustRegion.h:61
double tau(const std::vector< double > &angles)
Calculates the tau value for four and five angle symmetries.
Definition: TauCriteria.h:63
Stores all parameter, value, gradient and hessian data of two points.
Definition: TrustRegion.h:143