Molassembler  2.0.0
Molecule graph and conformer library
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Lbfgs.h
Go to the documentation of this file.
1 
7 #ifndef INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
8 #define INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
9 
11 
12 /* TODO
13  * - Detect ill-conditioning of LBFGS? Doesn't do well at all near maxima
14  */
15 
16 namespace Scine {
17 namespace Molassembler {
18 namespace Temple {
19 namespace Detail {
20 
21 template<typename FloatType> constexpr FloatType cfabs(FloatType x) noexcept {
22  return (x >= 0) ? x : -x;
23 }
24 
25 struct DoNothingObserver {
26  template<typename T>
27  void operator() (T&& /* x */) {}
28 };
29 
30 template<typename FloatType, typename VectorType, typename UpdateFunction>
31 struct ClampFunction {
32  UpdateFunction function;
33  const VectorType& low;
34  const VectorType& high;
35 
36  ClampFunction(
37  UpdateFunction&& fn,
38  const VectorType& lo,
39  const VectorType& hi
40  ) : function(fn), low(lo), high(hi) {}
41 
42  void operator() (
43  const VectorType& parameters,
44  FloatType& value,
45  Eigen::Ref<VectorType> gradient
46  ) {
47  function(
48  parameters.cwiseMax(low).cwiseMin(high),
49  value,
50  gradient
51  );
52  }
53 };
54 
55 template<typename VectorType, typename UpdateFunction>
56 auto clampFunction(
57  UpdateFunction&& fn,
58  const VectorType& lo,
59  const VectorType& hi
60 ) {
61  using FloatType = typename VectorType::Scalar;
62  return ClampFunction<FloatType, VectorType, UpdateFunction>(
63  std::forward<UpdateFunction>(fn),
64  lo,
65  hi
66  );
67 }
68 
69 namespace Boxes {
70 
71 template<typename VectorType>
72 void clampToBounds(Eigen::Ref<VectorType> /* parameters */) {}
73 
74 template<typename VectorType, typename BoxType>
75 void clampToBounds(Eigen::Ref<VectorType> parameters, const BoxType& box) {
76  box.clamp(parameters);
77 }
78 
79 
80 template<typename VectorType>
81 bool validate(const VectorType& /* parameters */) {
82  return true;
83 }
84 
85 template<typename VectorType, typename BoxType>
86 bool validate(const VectorType& parameters, const BoxType& box) {
87  return box.validate(parameters);
88 }
89 
90 template<typename VectorType>
91 void adjustGradient(
92  Eigen::Ref<VectorType> /* gradient */,
93  const VectorType& /* parameters */
94 ) {}
95 
96 template<typename VectorType, typename BoxType>
97 void adjustGradient(
98  Eigen::Ref<VectorType> gradient,
99  const VectorType& parameters,
100  const BoxType& box
101 ) {
102  box.adjustGradient(gradient, parameters);
103 }
104 
105 template<typename VectorType>
106 void adjustDirection(
107  Eigen::Ref<VectorType> /* direction */,
108  const VectorType& /* parameters */,
109  const VectorType& /* gradient */
110 ) {}
111 
112 template<typename VectorType, typename BoxType>
113 void adjustDirection(
114  Eigen::Ref<VectorType> direction,
115  const VectorType& parameters,
116  const VectorType& gradient,
117  const BoxType& box
118 ) {
119  box.adjustDirection(direction, parameters, gradient);
120 }
121 
122 } // namespace Boxes
123 } // namespace Detail
124 
132 template<typename FloatType = double, unsigned ringBufferSize = 16>
133 class Lbfgs {
134 public:
135  constexpr static bool isPowerOfTwo(unsigned x) {
136  return (x & (x - 1)) == 0;
137  }
138 
139  static_assert(
140  isPowerOfTwo(ringBufferSize) && ringBufferSize != 0,
141  "For good performance, the ring buffer size has to be a power of two"
142  );
143 
144  using MatrixType = Eigen::Matrix<FloatType, Eigen::Dynamic, Eigen::Dynamic>;
145  using VectorType = Eigen::Matrix<FloatType, Eigen::Dynamic, 1>;
146 
150  unsigned iterations;
152  FloatType value;
154  VectorType gradient;
155  };
156 
160  struct Box {
162  VectorType minima;
164  VectorType maxima;
165 
166  Box() = default;
167  Box(const VectorType& passMinima, const VectorType& passMaxima)
168  : minima(passMinima),
169  maxima(passMaxima)
170  {
171  assert((minima.array() <= maxima.array()).all());
172  }
173 
175  void clamp(Eigen::Ref<VectorType> x) const {
176  x = x.cwiseMax(minima).cwiseMin(maxima);
177  }
178 
180  bool validate(const VectorType& v) const {
181  return (
182  (minima.array() <= v.array()).all()
183  && (v.array() <= maxima.array()).all()
184  );
185  }
186 
189  Eigen::Ref<VectorType> gradient,
190  const VectorType& parameters
191  ) const {
192  constexpr FloatType gapEpsilon = 1e-8;
193  const Eigen::Index P = parameters.size();
194 
195  for(Eigen::Index i = 0; i < P; ++i) {
196  if(
197  (
198  minima[i] + gapEpsilon >= parameters[i]
199  && gradient[i] > 0
200  ) || (
201  maxima[i] - gapEpsilon <= parameters[i]
202  && gradient[i] < 0
203  )
204  ) {
205  gradient[i] = 0;
206  }
207  }
208  }
209 
212  Eigen::Ref<VectorType> direction,
213  const VectorType& parameters,
214  const VectorType& gradient
215  ) const {
216  constexpr FloatType gapEpsilon = 1e-8;
217 
218  const Eigen::Index P = parameters.size();
219  for(Eigen::Index i = 0; i < P; ++i) {
220  if(
221  minima[i] + gapEpsilon >= parameters[i]
222  && gradient[i] > 0
223  ) {
224  direction[i] = minima[i] - parameters[i];
225  } else if(
226  maxima[i] - gapEpsilon <= parameters[i]
227  && gradient[i] < 0
228  ) {
229  direction[i] = maxima[i] - parameters[i];
230  }
231  }
232  }
233  };
234 
238  struct StepValues {
242 
253  template<typename UpdateFunction, typename ... Boxes>
255  UpdateFunction&& function,
256  Eigen::Ref<VectorType> initialParameters,
257  const FloatType multiplier,
258  const Boxes& ... boxes
259  ) {
260  const unsigned P = initialParameters.size();
261  parameters.current = std::move(initialParameters);
262  gradients.current.resize(P);
263  gradients.proposed.resize(P);
264 
265  // Initialize current values and gradients
266  function(parameters.current, values.current, gradients.current);
267  Detail::Boxes::adjustGradient<VectorType>(gradients.current, parameters.current, boxes ...);
268 
269  // Initialize new with a small steepest descent step
270  VectorType direction;
271  const FloatType gradientNorm = gradients.current.norm();
272  if(gradientNorm > FloatType {1e-4}) {
273  direction = -gradients.current / gradientNorm;
274  } else {
275  direction = FloatType {-1.0} * gradients.current;
276  }
277 
278  prepare(function, multiplier, direction, boxes ...);
279 
280  return direction;
281  }
282 
283  template<typename UpdateFunction, typename ... Boxes>
284  void prepare(
285  UpdateFunction&& function,
286  const FloatType multiplier,
287  const VectorType& direction,
288  const Boxes& ... boxes
289  ) {
290  parameters.proposed.noalias() = parameters.current + multiplier * direction;
291  Detail::Boxes::clampToBounds<VectorType>(parameters.proposed, boxes ...);
292  function(parameters.proposed, values.proposed, gradients.proposed);
293  Detail::Boxes::adjustGradient<VectorType>(gradients.proposed, parameters.proposed, boxes ...);
294  }
295 
305  template<typename UpdateFunction, typename ... Boxes>
306  void propagate(
307  UpdateFunction&& function,
308  const FloatType multiplier,
309  const VectorType& direction,
310  const Boxes& ... boxes
311  ) {
312  parameters.propagate();
313  values.propagate();
314  gradients.propagate();
315 
316  prepare(function, multiplier, direction, boxes ...);
317  }
318  };
319 
320 
338  template<
339  typename UpdateFunction,
340  typename Checker,
341  typename Observer = Detail::DoNothingObserver
343  Eigen::Ref<VectorType> parameters,
344  UpdateFunction&& function,
345  Checker&& check,
346  Observer&& observer = Observer {}
347  ) {
348  return minimizeBase(
349  parameters,
350  std::forward<UpdateFunction>(function),
351  std::forward<Checker>(check),
352  std::forward<Observer>(observer)
353  );
354  }
355 
373  template<
374  typename UpdateFunction,
375  typename Checker,
376  typename Observer = Detail::DoNothingObserver
378  Eigen::Ref<VectorType> parameters,
379  UpdateFunction&& function,
380  Checker&& check,
381  Observer&& observer = Observer {}
382  ) {
383  OptimizationReturnType results = minimize(
384  parameters,
385  Optimization::negateFunction<VectorType>(
386  std::forward<UpdateFunction>(function)
387  ),
388  std::forward<Checker>(check),
389  std::forward<Observer>(observer)
390  );
391 
392  // Revert sign of negated function minimization
393  results.value *= -1;
394  results.gradient *= -1;
395 
396  return results;
397  }
398 
419  template<
420  typename UpdateFunction,
421  typename Checker,
422  typename Observer = Detail::DoNothingObserver
424  Eigen::Ref<VectorType> parameters,
425  const Box& box,
426  UpdateFunction&& function,
427  Checker&& check,
428  Observer&& observer = Observer {}
429  ) {
430  return minimizeBase(
431  parameters,
432  std::forward<UpdateFunction>(function),
433  std::forward<Checker>(check),
434  std::forward<Observer>(observer),
435  box
436  );
437  }
438 
457  template<
458  typename UpdateFunction,
459  typename Checker,
460  typename Observer = Detail::DoNothingObserver
462  Eigen::Ref<VectorType> parameters,
463  const Box& box,
464  UpdateFunction&& function,
465  Checker&& check,
466  Observer&& observer = Observer {}
467  ) {
468  OptimizationReturnType results = minimize(
469  parameters,
470  box,
471  Optimization::negateFunction<VectorType>(
472  std::forward<UpdateFunction>(function)
473  ),
474  std::forward<Checker>(check),
475  std::forward<Observer>(observer)
476  );
477 
478  // Revert sign of negated function minimization
479  results.value *= -1;
480  results.gradient *= -1;
481 
482  return results;
483  }
484 
488  FloatType c1 = 0.0001;
494  FloatType c2 = 0.9;
500  FloatType stepLength = 1.0;
501 
502 private:
508  Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize> y;
510  Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize> s;
512  Eigen::Matrix<FloatType, ringBufferSize, 1> sDotY;
513  unsigned count = 0, offset = 0;
514 
515  CollectiveRingBuffer(const unsigned nParams)
516  : y(nParams, ringBufferSize),
517  s(nParams, ringBufferSize)
518  {}
519 
520  unsigned newestOffset() const {
521  return (count + offset - 1) % ringBufferSize;
522  }
523 
524  unsigned oldestOffset() const {
525  return (count + offset) % ringBufferSize;
526  }
527 
535  template<typename UnaryFn>
536  void newestToOldest(UnaryFn&& fn) const {
537  assert(count > 0);
538  const int newest = newestOffset();
539  for(int i = newest; i > - 1; --i) {
540  fn(i);
541  }
542 
543  const int countOrSizeLimit = std::min(count - 1, ringBufferSize - 1);
544  for(int i = countOrSizeLimit; i > newest; --i) {
545  fn(i);
546  }
547  }
548 
556  template<typename UnaryFn>
557  void oldestToNewest(UnaryFn&& fn) const {
558  assert(count > 0);
559  const unsigned oldest = oldestOffset();
560  for(unsigned i = oldest; i < count; ++i) {
561  fn(i);
562  }
563 
564  for(unsigned i = 0; i < oldest; ++i) {
565  fn(i);
566  }
567  }
568 
569  bool addInformation(
570  const Optimization::EigenUpdateBuffer<VectorType>& parameterBuffer,
571  const Optimization::EigenUpdateBuffer<VectorType>& gradientBuffer
572  ) {
573  bool dotProductNotZero;
574  if(count < ringBufferSize) {
575  y.col(count).noalias() = gradientBuffer.proposed - gradientBuffer.current;
576  s.col(count).noalias() = parameterBuffer.proposed - parameterBuffer.current;
577  sDotY(count) = s.col(count).dot(y.col(count));
578  dotProductNotZero = (sDotY(count) != 0);
579  ++count;
580  } else {
581  // Rotate columns without copying (introduce modulo offset)
582  const unsigned columnOffset = (count + offset) % ringBufferSize;
583  y.col(columnOffset).noalias() = gradientBuffer.proposed - gradientBuffer.current;
584  s.col(columnOffset).noalias() = parameterBuffer.proposed - parameterBuffer.current;
585  sDotY(columnOffset) = s.col(columnOffset).dot(y.col(columnOffset));
586  dotProductNotZero = (sDotY(columnOffset) != 0);
587  offset = (offset + 1) % ringBufferSize;
588  }
589 
590  return dotProductNotZero;
591  }
592 
601  Eigen::Ref<VectorType> q,
602  const VectorType& proposedGradient
603  ) const {
604  /* Note: Naming follows Numerical Optimization (2006)'s naming scheme,
605  * p.178.
606  */
607  const unsigned kMinusOne = newestOffset();
608 
609  q = -proposedGradient;
610  VectorType alpha(count);
611 
613  [&](const unsigned i) {
614  alpha[i] = s.col(i).dot(q) / sDotY(i);
615  q.noalias() -= alpha[i] * y.col(i);
616  }
617  );
618 
619  /* gamma_k = s_{k-1}^T y_{k-1} / (y_{k-1}^T y_{k-1})
620  * = s_{k-1}.dot(y_{k-1}) / y_{k-1}.squaredNorm()
621  *
622  * H_k^0 = y_k * I (where I is the identity matrix for n dimensions)
623  * r = H_k^0 * q (where q is the proposed direction)
624  *
625  * q is not needed again later in the algorithm, so instead of defining
626  * a new vector r we just reuse q.
627  *
628  * -> q *= gamma_k;
629  */
630  q *= sDotY(kMinusOne) / y.col(kMinusOne).squaredNorm();
631 
633  [&](const unsigned i) {
634  const FloatType beta = y.col(i).dot(q) / sDotY(i);
635  q.noalias() += (alpha[i] - beta) * s.col(i);
636  }
637  );
638  }
639 
640  template<typename ... Boxes>
641  bool updateAndGenerateNewDirection(
642  Eigen::Ref<VectorType> direction,
643  const StepValues& step,
644  const Boxes& ... boxes
645  ) {
646  /* L-BFGS update: Use current parameters and gradients to improve steepest
647  * descent direction
648  */
649  if(!addInformation(step.parameters, step.gradients)) {
650  return false;
651  }
652 
653  generateNewDirection(direction, step.gradients.proposed);
654  Detail::Boxes::adjustDirection(
655  direction,
656  step.parameters.proposed,
657  step.gradients.proposed,
658  boxes ...
659  );
660 
661  return true;
662  }
663  };
664 
682  template<
683  typename UpdateFunction,
684  typename Observer,
685  typename ... Boxes
687  UpdateFunction&& function,
688  Observer&& observer,
689  StepValues& step,
690  const VectorType& direction,
691  const Boxes& ... boxes
692  ) {
693  unsigned iterations = 0;
694 
695  /* Numerical optimization p. 178, paraphrased: Generated search direction
696  * is typically well scaled, so that step length = 1 should be accepted in
697  * most iterations.
698  *
699  * Therefore, we only allow buildup of lengthening of the step length, but
700  * not shortening.
701  */
702  if(stepLength < 1) {
703  stepLength = 1;
704  }
705 
706  for(iterations = 0; stepLength > 0 && iterations < 100; ++iterations) {
707  const FloatType currentGradDotDirection = step.gradients.current.dot(direction);
708 
709  /* Armijo rule (Wolfe condition I) */
710  bool armijoRule = (
711  step.values.proposed
712  <= step.values.current + c1 * stepLength * currentGradDotDirection
713  );
714 
715  /* Curvature condition (Wolfe condition II) */
716  bool curvatureCondition = (
717  step.gradients.proposed.dot(direction)
718  >= c2 * currentGradDotDirection
719  );
720 
721  /* Backtracking condition */
722  bool backtrack = (step.values.proposed >= step.values.current);
723 
724  // Decide whether to shorten or extend the step length
725  constexpr FloatType shortenFactor = 0.5;
726  constexpr FloatType lengthenFactor = 1.5;
727  static_assert(
728  Detail::cfabs(shortenFactor * lengthenFactor - 1.0) > 0.1,
729  "Shortening and lengthening factor should not multiply to give "
730  "approximately one, this can cause oscillation"
731  );
732 
733  if(!armijoRule) {
734  stepLength *= shortenFactor;
735  } else if(!curvatureCondition) {
736  stepLength *= lengthenFactor;
737  }
738 
739  /* If we don't have to backtrack, then don't re-evaluate along this
740  * direction, but accept the step and perform the next evaluation at
741  * the next position. We have improved the function value with the step
742  * and possibly adjusted the step length using the Wolfe conditions,
743  * that's enough.
744  */
745  if(backtrack && (!armijoRule || !curvatureCondition)) {
746  // Retry shortened/lengthened step (no propagation needed)
747  step.prepare(function, stepLength, direction, boxes ...);
748  observer(step.parameters.proposed);
749 
750  // Handle encountered parameter boundaries
751  if(
752  sizeof...(boxes) > 0
753  && (step.parameters.proposed - step.parameters.current).squaredNorm() < FloatType {1e-8}
754  ) {
755  break;
756  }
757  } else {
758  break;
759  }
760  }
761 
762  if(stepLength == 0) {
763  throw std::logic_error("Could not find a step length that reduces objective function value.");
764  }
765 
766  return iterations;
767  }
768 
769  template<
770  typename UpdateFunction,
771  typename Checker,
772  typename Observer,
773  typename ... Boxes
774  > OptimizationReturnType minimizeBase(
775  Eigen::Ref<VectorType> parameters,
776  UpdateFunction&& function,
777  Checker&& check,
778  Observer&& observer,
779  Boxes&& ... boxes
780  ) {
781  // If there is a box, make sure the parameters are valid
782  assert(Detail::Boxes::validate(parameters, boxes ...));
783 
784  // Set up a first small conjugate gradient step
785  StepValues step;
786  VectorType direction = step.generateInitialDirection(function, parameters, stepLength, boxes ...);
787  observer(step.parameters.proposed);
788 
789  /* Set up ring buffer to keep changes in gradient and parameters to
790  * approximate the inverse Hessian with
791  */
792  CollectiveRingBuffer ringBuffer {static_cast<unsigned>(parameters.size())};
793 
794  // Begin optimization loop
795  unsigned iteration = 1;
796  for(
797  ;
798  check.shouldContinue(iteration, std::as_const(step));
799  ++iteration
800  ) {
801  // Line search the chosen direction
802  iteration += adjustStepAlongDirection(function, observer, step, direction, boxes ...);
803 
804  /* Add more information to approximate the inverse Hessian and use it to
805  * generate a new direction.
806  */
807  if(!ringBuffer.updateAndGenerateNewDirection(direction, step, boxes ...)) {
808  break;
809  }
810 
811  /* Accept the proposed step and prepare a new prospective step using the
812  * updated direction vector
813  */
814  step.propagate(function, stepLength, direction, boxes ...);
815  observer(step.parameters.proposed);
816  }
817 
818  // Copy the optimal parameters back into the in/out argument
819  parameters = std::move(step.parameters.current);
820  return {
821  iteration,
822  step.values.current,
823  std::move(step.gradients.current)
824  };
825  }
826 };
827 
828 } // namespace Temple
829 } // namespace Molassembler
830 } // namespace Scine
831 
832 #endif
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function within bounds.
Definition: Lbfgs.h:461
FloatType stepLength
The initial step length used in the L-BFGS.
Definition: Lbfgs.h:500
FloatType c2
2nd parameter for the Wolfe conditions.
Definition: Lbfgs.h:494
unsigned iterations
Number of iterations.
Definition: Lbfgs.h:150
void oldestToNewest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from oldest to newest entries...
Definition: Lbfgs.h:557
VectorType generateInitialDirection(UpdateFunction &&function, Eigen::Ref< VectorType > initialParameters, const FloatType multiplier, const Boxes &...boxes)
Initialize class state and generate first direction vector.
Definition: Lbfgs.h:254
Type returned from an optimization.
Definition: Lbfgs.h:148
LBFGS optimizer with optional boxing.
Definition: Lbfgs.h:133
VectorType gradient
Final gradient.
Definition: Lbfgs.h:154
void generateNewDirection(Eigen::Ref< VectorType > q, const VectorType &proposedGradient) const
Use stored gradient and parameter differences to generate a new direction.
Definition: Lbfgs.h:600
Eigen::Matrix< FloatType, ringBufferSize, 1 > sDotY
Ring buffered result of s_k.dot(y_k)
Definition: Lbfgs.h:512
void adjustDirection(Eigen::Ref< VectorType > direction, const VectorType &parameters, const VectorType &gradient) const
Adjust a direction vector to avoid exceeding the box.
Definition: Lbfgs.h:211
void clamp(Eigen::Ref< VectorType > x) const
Clamp parameter values to the bounds.
Definition: Lbfgs.h:175
Class carrying proposed parameter updates in an optimization.
Definition: Lbfgs.h:238
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function within bounds.
Definition: Lbfgs.h:423
void adjustGradient(Eigen::Ref< VectorType > gradient, const VectorType &parameters) const
Zero out those elements of a gradient which are bounded.
Definition: Lbfgs.h:188
VectorType maxima
Upper bounds for parameters.
Definition: Lbfgs.h:164
FloatType value
Final function value.
Definition: Lbfgs.h:152
bool validate(const VectorType &v) const
Test whether parameters are within the bounds.
Definition: Lbfgs.h:180
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > y
Ring buffer containing gradient differences: y_k = g_{k+1} - g_k.
Definition: Lbfgs.h:508
Data types common to optimizers.
FloatType c1
1st parameter for the Wolfe conditions.
Definition: Lbfgs.h:488
Type storing boundaries for parameter values.
Definition: Lbfgs.h:160
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > s
Ring buffer containing parameter differnces: s_k = x_{k+1} - x_k.
Definition: Lbfgs.h:510
Class containing hessian information for LBFGS update.
Definition: Lbfgs.h:506
unsigned adjustStepAlongDirection(UpdateFunction &&function, Observer &&observer, StepValues &step, const VectorType &direction, const Boxes &...boxes)
Adjusts stepLength member and ensures the proposed parameter adjustment lowers the function value...
Definition: Lbfgs.h:686
void newestToOldest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from newest to oldest entries...
Definition: Lbfgs.h:536
VectorType minima
Lower bounds for parameters.
Definition: Lbfgs.h:162
void propagate(UpdateFunction &&function, const FloatType multiplier, const VectorType &direction, const Boxes &...boxes)
Accept a step, create new parameters and evaluate the function to get value and gradients.
Definition: Lbfgs.h:306
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function.
Definition: Lbfgs.h:377
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function.
Definition: Lbfgs.h:342