Molassembler  1.0.0
Molecule graph and conformer library
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
TrustRegion.h
Go to the documentation of this file.
1 
8 #ifndef INCLUDE_TEMPLE_OPTIMIZATION_TRUST_REGION_NEWTON_H
9 #define INCLUDE_TEMPLE_OPTIMIZATION_TRUST_REGION_NEWTON_H
10 
14 #include "boost/math/tools/roots.hpp"
15 #include <Eigen/Eigenvalues>
16 
17 #include <iostream>
18 
19 namespace Scine {
20 namespace Molassembler {
21 namespace Temple {
22 namespace Detail {
23 
24 template<typename Derived>
25 auto resolve(const Eigen::MatrixBase<Derived>& matrix) -> typename Eigen::MatrixBase<Derived>::Scalar {
26  assert(matrix.rows() == 1 && matrix.cols() == 1);
27  return matrix(0, 0);
28 }
29 
30 template<typename FloatType>
31 struct TROFloatingPointTolerances {};
32 
33 template<>
34 struct TROFloatingPointTolerances<double> {
35  static constexpr unsigned rootBitAccuracy = 20;
36 };
37 
38 template<>
39 struct TROFloatingPointTolerances<float> {
40  static constexpr unsigned rootBitAccuracy = 10;
41 };
42 
43 } // namespace Detail
44 
50 template<typename FloatType = double>
52  using MatrixType = Eigen::Matrix<FloatType, Eigen::Dynamic, Eigen::Dynamic>;
53  using VectorType = Eigen::Matrix<FloatType, Eigen::Dynamic, 1>;
54 
58  unsigned iterations;
60  FloatType value;
62  VectorType gradient;
63  };
64 
65  template<
66  typename UpdateFunction,
67  typename Checker
68  > static OptimizationReturnType minimize(
69  Eigen::Ref<VectorType> parameters,
70  UpdateFunction&& function,
71  Checker&& check
72  ) {
73  // This is chosen arbitrarily
74  constexpr FloatType trustRadiusLimit = 4.0;
75  static_assert(trustRadiusLimit > 0, "Trust radius is not positive!");
76 
77  // This is a fixed constant of the algorithm
78  constexpr FloatType agreementContractionUpperBound = 0.25;
79  constexpr FloatType agreementExpansionLowerBound = 0.75;
80 
81  // This is chosen arbitrarily within [0, agreementContractionUpperBound)
82  constexpr FloatType agreementEta = 0.125;
83  static_assert(
84  0.0 <= agreementEta && agreementEta < 0.25,
85  "Eta not within bounds"
86  );
87 
88  // Loop variables initialization
89  // Choose initial trust radius within (0, trustRadiusLimit)
90  FloatType trustRadius = 1.0; // Δ
91  FloatType modelAgreement = 0; // ρ
92  unsigned iterations = 0;
93  StepValues step;
94  step.initialize(parameters, function, trustRadius);
95 
96  for(
97  ;
98  check.shouldContinue(
99  iterations,
100  Stl17::as_const(step.values.current),
101  Stl17::as_const(step.gradients.current)
102  );
103  ++iterations
104  ) {
105  modelAgreement = step.modelAgreement();
106 
107  /* Adjust trust radius if necessary */
108  if(modelAgreement < agreementContractionUpperBound) {
109  // Contract the trust radius
110  trustRadius = agreementContractionUpperBound * trustRadius;
111  } else if(
112  modelAgreement > agreementExpansionLowerBound
113  && std::fabs(step.direction.norm() - trustRadius) < 1e-5
114  ) {
115  // Expand the trust radius
116  trustRadius = std::min(2 * trustRadius, trustRadiusLimit);
117  }
118 
119  /* Decide what to do with the step */
120  if(modelAgreement > agreementEta) {
121  // Accept the step and prepare a new one
122  step.accept(function, trustRadius);
123  } else {
124  // Keep the current parameters, generate new prospective parameters
125  step.prepare(function, trustRadius);
126  }
127  }
128 
129  return {
130  iterations,
131  step.values.current,
132  std::move(step.gradients.current)
133  };
134  }
135 
136 private:
144  struct StepValues {
149 
150  VectorType direction;
151 
152  template<typename UpdateFunction>
153  void initialize(
154  Eigen::Ref<VectorType> initialParameters,
155  UpdateFunction&& function,
156  const FloatType trustRadius
157  ) {
158  parameters.current = std::move(initialParameters);
159  const unsigned P = parameters.current.size();
160  parameters.proposed.resize(P);
161  gradients.current.resize(P);
162  gradients.proposed.resize(P);
163  hessians.current.resize(P, P);
164  hessians.proposed.resize(P, P);
165 
166  function(parameters.current, values.current, gradients.current, hessians.current);
167  prepare(function, trustRadius);
168  }
169 
183  VectorType subspaceMinimize(
184  const FloatType trustRadius,
185  const VectorType& g,
186  const MatrixType& B
187  ) const {
188  using Vector2Type = Eigen::Matrix<FloatType, 2, 1>;
189  using Matrix2Type = Eigen::Matrix<FloatType, 2, 2>;
190 
191  assert(positiveDefinite(B));
192  const MatrixType Binverse = B.inverse();
193 
194  const Vector2Type gTilde {g.squaredNorm(), g.dot(Binverse * g)};
195  Matrix2Type BTilde;
196  BTilde << g.dot(B * g), gTilde(0),
197  gTilde(0), gTilde(1);
198 
199  const Vector2Type uStar = - BTilde.inverse() * gTilde;
200 
201  Matrix2Type BBar;
202  BBar << gTilde(0), gTilde(1),
203  gTilde(1), (Binverse * g).squaredNorm();
204  BBar *= 2;
205 
206  const FloatType J = FloatType {0.5} * uStar.transpose() * BBar * uStar;
207  const FloatType trustRadiusSquare = trustRadius * trustRadius;
208 
209  if(J <= trustRadiusSquare) {
210  return uStar(0) * g + uStar(1) * Binverse * g;
211  }
212 
213  // Now we have to find lambda
214  boost::uintmax_t iterations = 1000;
215  auto root_result = boost::math::tools::toms748_solve(
216  [&](const FloatType lambda) -> FloatType {
217  const Vector2Type intermediate = (BTilde + lambda * BBar).inverse() * gTilde;
218  return FloatType {0.5} * intermediate.transpose() * BBar * intermediate - trustRadiusSquare;
219  },
220  std::nextafter(FloatType {0.0}, FloatType {1.0}),
221  FloatType {100.0},
222  boost::math::tools::eps_tolerance<FloatType> {
223  Detail::TROFloatingPointTolerances<FloatType>::rootBitAccuracy
224  },
225  iterations
226  );
227 
228  if(iterations >= 1000) {
229  throw std::logic_error("Could not find lambda to satisfy equation");
230  }
231 
232  const FloatType lambda = (root_result.first + root_result.second) / 2;
233 
234  // Calculate uMin and calculate p from it
235  const Vector2Type uMin = -(BTilde + lambda * BBar).inverse() * gTilde;
236  return uMin(0) * g + uMin(1) * Binverse * g;
237  }
238 
239  VectorType determineDirection(const FloatType trustRadius) const {
240  if(positiveDefinite(hessians.current)) {
241  return subspaceMinimize(trustRadius, gradients.current, hessians.current);
242  }
243 
244  /* Now there are several possible cases for what the hessian is currently
245  * like, but we'll need the eigenvalues to determine what to do.
246  *
247  * The hessian is a symmetric real matrix, so it is also self-adjoint,
248  * which we exploit for faster eigenvalue calculation:
249  */
250  VectorType eigenvalues = hessians.current.template selfadjointView<Eigen::Lower>().eigenvalues();
251 
252  /* First case: there are negative eigenvalues. In this case we adjust
253  * the hessian with a unitary matrix to become positive definite:
254  */
255  if((eigenvalues.array() < FloatType {0.0}).any()) {
256  // Eigenvalues are ordered ascending
257  const FloatType mostNegativeEigenvalue = eigenvalues(0);
258  /* Nocedal & Wright say to choose alpha between (-λ,-2λ], so we
259  * go right in the middle because we don't know anything about the
260  * significance of the choice.
261  */
262  const FloatType alpha = FloatType {-1.5} * mostNegativeEigenvalue;
263  const unsigned P = parameters.current.size();
264  const Eigen::MatrixXd modifiedHessian = hessians.current + alpha * Eigen::MatrixXd::Identity(P, P);
265 
266  /* Subspace minimize the modified hessian
267  *
268  * NOTE: Nocedal & Wright say to differentiate some things further
269  * here, but I think the subspace minimization procedure itself
270  * differentiates those cases.
271  */
272  return subspaceMinimize(trustRadius, gradients.current, modifiedHessian);
273  }
274 
275  /* Remaining case: There are no negative eigenvalues, but the hessian is
276  * not positive definite. There must be eigenvalues of value zero. In
277  * this case, we use the cauchy point to make progress:
278  */
279  const FloatType gradientNorm = gradients.current.norm();
280  const FloatType tau = [&]() -> FloatType {
281  const FloatType cauchyIntermediate = gradients.current.transpose() * hessians.current * gradients.current;
282  if(cauchyIntermediate <= 0) {
283  return 1.0;
284  }
285 
286  return std::min(
287  std::pow(gradientNorm, 3) / (trustRadius * cauchyIntermediate),
288  1.0
289  );
290  }();
291 
292  return - (tau * trustRadius / gradientNorm) * gradients.current;
293  }
294 
295  template<typename UpdateFunction>
296  void prepare(UpdateFunction&& function, const FloatType trustRadius) {
297  direction = determineDirection(trustRadius);
298  parameters.proposed.noalias() = parameters.current + direction;
299  function(parameters.proposed, values.proposed, gradients.proposed, hessians.proposed);
300  }
301 
302  FloatType modelAgreement() const {
303  const FloatType predictedValue = (
304  values.current
305  + Detail::resolve(gradients.current.transpose() * direction)
306  + Detail::resolve(FloatType {0.5} * direction.transpose() * hessians.current * direction)
307  );
308 
309  return (
310  (values.current - values.proposed)
311  / (values.current - predictedValue)
312  );
313  }
314 
315  template<typename UpdateFunction>
316  void accept(UpdateFunction&& function, const FloatType trustRadius) {
317  parameters.propagate();
318  values.propagate();
319  gradients.propagate();
320  hessians.propagate();
321 
322  prepare(function, trustRadius);
323  }
324  };
325 };
326 
327 } // namespace Temple
328 } // namespace Molassembler
329 } // namespace Scine
330 
331 #endif
Sylvester&#39;s criterion for positive definiteness and positive semi-definiteness.
VectorType subspaceMinimize(const FloatType trustRadius, const VectorType &g, const MatrixType &B) const
Find the minimizer of the trust region subspace.
Definition: TrustRegion.h:183
unsigned iterations
Number of iterations.
Definition: TrustRegion.h:58
Trust region Newton-Raphson minimizer with 2D subspace minimization.
Definition: TrustRegion.h:51
Type returned from an optimization.
Definition: TrustRegion.h:56
bool positiveDefinite(const Eigen::MatrixBase< Derived > &matrix)
Determine whether a matrix is positive definite.
Definition: SylvestersCriterion.h:26
FloatType value
Final function value.
Definition: TrustRegion.h:60
Data types common to optimizers.
VectorType gradient
Final gradient.
Definition: TrustRegion.h:62
double tau(const std::vector< double > &angles)
Calculates the tau value for four and five angle symmetries.
Definition: TauCriteria.h:63
Algorithms available in the C++17 STL.
Stores all parameter, value, gradient and hessian data of two points.
Definition: TrustRegion.h:144