Molassembler  3.0.0
Molecule graph and conformer library
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Lbfgs.h
Go to the documentation of this file.
1 
7 #ifndef INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
8 #define INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
9 
11 #include <utility>
12 
13 /* TODO
14  * - Detect ill-conditioning of LBFGS? Doesn't do well at all near maxima
15  */
16 
17 namespace Scine {
18 namespace Molassembler {
19 namespace Temple {
20 namespace Detail {
21 
22 template<typename FloatType> constexpr FloatType cfabs(FloatType x) noexcept {
23  return (x >= 0) ? x : -x;
24 }
25 
26 struct DoNothingObserver {
27  template<typename T>
28  void operator() (T&& /* x */) {}
29 };
30 
31 template<typename FloatType, typename VectorType, typename UpdateFunction>
32 struct ClampFunction {
33  UpdateFunction function;
34  const VectorType& low;
35  const VectorType& high;
36 
37  ClampFunction(
38  UpdateFunction&& fn,
39  const VectorType& lo,
40  const VectorType& hi
41  ) : function(fn), low(lo), high(hi) {}
42 
43  void operator() (
44  const VectorType& parameters,
45  FloatType& value,
46  Eigen::Ref<VectorType> gradient
47  ) {
48  function(
49  parameters.cwiseMax(low).cwiseMin(high),
50  value,
51  gradient
52  );
53  }
54 };
55 
56 template<typename VectorType, typename UpdateFunction>
57 auto clampFunction(
58  UpdateFunction&& fn,
59  const VectorType& lo,
60  const VectorType& hi
61 ) {
62  using FloatType = typename VectorType::Scalar;
63  return ClampFunction<FloatType, VectorType, UpdateFunction>(
64  std::forward<UpdateFunction>(fn),
65  lo,
66  hi
67  );
68 }
69 
70 namespace Boxes {
71 
72 template<typename VectorType>
73 void clampToBounds(Eigen::Ref<VectorType> /* parameters */) {}
74 
75 template<typename VectorType, typename BoxType>
76 void clampToBounds(Eigen::Ref<VectorType> parameters, const BoxType& box) {
77  box.clamp(parameters);
78 }
79 
80 
81 template<typename VectorType>
82 bool validate(const VectorType& /* parameters */) {
83  return true;
84 }
85 
86 template<typename VectorType, typename BoxType>
87 bool validate(const VectorType& parameters, const BoxType& box) {
88  return box.validate(parameters);
89 }
90 
91 template<typename VectorType>
92 void adjustGradient(
93  Eigen::Ref<VectorType> /* gradient */,
94  const VectorType& /* parameters */
95 ) {}
96 
97 template<typename VectorType, typename BoxType>
98 void adjustGradient(
99  Eigen::Ref<VectorType> gradient,
100  const VectorType& parameters,
101  const BoxType& box
102 ) {
103  box.adjustGradient(gradient, parameters);
104 }
105 
106 template<typename VectorType>
107 void adjustDirection(
108  Eigen::Ref<VectorType> /* direction */,
109  const VectorType& /* parameters */,
110  const VectorType& /* gradient */
111 ) {}
112 
113 template<typename VectorType, typename BoxType>
114 void adjustDirection(
115  Eigen::Ref<VectorType> direction,
116  const VectorType& parameters,
117  const VectorType& gradient,
118  const BoxType& box
119 ) {
120  box.adjustDirection(direction, parameters, gradient);
121 }
122 
123 } // namespace Boxes
124 } // namespace Detail
125 
133 template<typename FloatType = double, unsigned ringBufferSize = 16>
134 class Lbfgs {
135 public:
136  constexpr static bool isPowerOfTwo(unsigned x) {
137  return (x & (x - 1)) == 0;
138  }
139 
140  static_assert(
141  isPowerOfTwo(ringBufferSize) && ringBufferSize != 0,
142  "For good performance, the ring buffer size has to be a power of two"
143  );
144 
145  using MatrixType = Eigen::Matrix<FloatType, Eigen::Dynamic, Eigen::Dynamic>;
146  using VectorType = Eigen::Matrix<FloatType, Eigen::Dynamic, 1>;
147 
151  unsigned iterations;
153  FloatType value;
155  VectorType gradient;
156  };
157 
161  struct Box {
163  VectorType minima;
165  VectorType maxima;
166 
167  Box() = default;
168  Box(const VectorType& passMinima, const VectorType& passMaxima)
169  : minima(passMinima),
170  maxima(passMaxima)
171  {
172  assert((minima.array() <= maxima.array()).all());
173  }
174 
176  void clamp(Eigen::Ref<VectorType> x) const {
177  x = x.cwiseMax(minima).cwiseMin(maxima);
178  }
179 
181  bool validate(const VectorType& v) const {
182  return (
183  (minima.array() <= v.array()).all()
184  && (v.array() <= maxima.array()).all()
185  );
186  }
187 
190  Eigen::Ref<VectorType> gradient,
191  const VectorType& parameters
192  ) const {
193  constexpr FloatType gapEpsilon = 1e-8;
194  const Eigen::Index P = parameters.size();
195 
196  for(Eigen::Index i = 0; i < P; ++i) {
197  if(
198  (
199  minima[i] + gapEpsilon >= parameters[i]
200  && gradient[i] > 0
201  ) || (
202  maxima[i] - gapEpsilon <= parameters[i]
203  && gradient[i] < 0
204  )
205  ) {
206  gradient[i] = 0;
207  }
208  }
209  }
210 
213  Eigen::Ref<VectorType> direction,
214  const VectorType& parameters,
215  const VectorType& gradient
216  ) const {
217  constexpr FloatType gapEpsilon = 1e-8;
218 
219  const Eigen::Index P = parameters.size();
220  for(Eigen::Index i = 0; i < P; ++i) {
221  if(
222  minima[i] + gapEpsilon >= parameters[i]
223  && gradient[i] > 0
224  ) {
225  direction[i] = minima[i] - parameters[i];
226  } else if(
227  maxima[i] - gapEpsilon <= parameters[i]
228  && gradient[i] < 0
229  ) {
230  direction[i] = maxima[i] - parameters[i];
231  }
232  }
233  }
234  };
235 
239  struct StepValues {
243 
254  template<typename UpdateFunction, typename ... Boxes>
256  UpdateFunction&& function,
257  Eigen::Ref<VectorType> initialParameters,
258  const FloatType multiplier,
259  const Boxes& ... boxes
260  ) {
261  const unsigned P = initialParameters.size();
262  parameters.current = std::move(initialParameters);
263  gradients.current.resize(P);
264  gradients.proposed.resize(P);
265 
266  // Initialize current values and gradients
267  function(parameters.current, values.current, gradients.current);
268  Detail::Boxes::adjustGradient<VectorType>(gradients.current, parameters.current, boxes ...);
269 
270  // Initialize new with a small steepest descent step
271  VectorType direction;
272  const FloatType gradientNorm = gradients.current.norm();
273  if(gradientNorm > FloatType {1e-4}) {
274  direction = -gradients.current / gradientNorm;
275  } else {
276  direction = FloatType {-1.0} * gradients.current;
277  }
278 
279  prepare(function, multiplier, direction, boxes ...);
280 
281  return direction;
282  }
283 
284  template<typename UpdateFunction, typename ... Boxes>
285  void prepare(
286  UpdateFunction&& function,
287  const FloatType multiplier,
288  const VectorType& direction,
289  const Boxes& ... boxes
290  ) {
291  parameters.proposed.noalias() = parameters.current + multiplier * direction;
292  Detail::Boxes::clampToBounds<VectorType>(parameters.proposed, boxes ...);
293  function(parameters.proposed, values.proposed, gradients.proposed);
294  Detail::Boxes::adjustGradient<VectorType>(gradients.proposed, parameters.proposed, boxes ...);
295  }
296 
306  template<typename UpdateFunction, typename ... Boxes>
307  void propagate(
308  UpdateFunction&& function,
309  const FloatType multiplier,
310  const VectorType& direction,
311  const Boxes& ... boxes
312  ) {
313  parameters.propagate();
314  values.propagate();
315  gradients.propagate();
316 
317  prepare(function, multiplier, direction, boxes ...);
318  }
319  };
320 
321 
339  template<
340  typename UpdateFunction,
341  typename Checker,
342  typename Observer = Detail::DoNothingObserver
344  Eigen::Ref<VectorType> parameters,
345  UpdateFunction&& function,
346  Checker&& check,
347  Observer&& observer = Observer {}
348  ) {
349  return minimizeBase(
350  parameters,
351  std::forward<UpdateFunction>(function),
352  std::forward<Checker>(check),
353  std::forward<Observer>(observer)
354  );
355  }
356 
374  template<
375  typename UpdateFunction,
376  typename Checker,
377  typename Observer = Detail::DoNothingObserver
379  Eigen::Ref<VectorType> parameters,
380  UpdateFunction&& function,
381  Checker&& check,
382  Observer&& observer = Observer {}
383  ) {
384  OptimizationReturnType results = minimize(
385  parameters,
386  Optimization::negateFunction<VectorType>(
387  std::forward<UpdateFunction>(function)
388  ),
389  std::forward<Checker>(check),
390  std::forward<Observer>(observer)
391  );
392 
393  // Revert sign of negated function minimization
394  results.value *= -1;
395  results.gradient *= -1;
396 
397  return results;
398  }
399 
420  template<
421  typename UpdateFunction,
422  typename Checker,
423  typename Observer = Detail::DoNothingObserver
425  Eigen::Ref<VectorType> parameters,
426  const Box& box,
427  UpdateFunction&& function,
428  Checker&& check,
429  Observer&& observer = Observer {}
430  ) {
431  return minimizeBase(
432  parameters,
433  std::forward<UpdateFunction>(function),
434  std::forward<Checker>(check),
435  std::forward<Observer>(observer),
436  box
437  );
438  }
439 
458  template<
459  typename UpdateFunction,
460  typename Checker,
461  typename Observer = Detail::DoNothingObserver
463  Eigen::Ref<VectorType> parameters,
464  const Box& box,
465  UpdateFunction&& function,
466  Checker&& check,
467  Observer&& observer = Observer {}
468  ) {
469  OptimizationReturnType results = minimize(
470  parameters,
471  box,
472  Optimization::negateFunction<VectorType>(
473  std::forward<UpdateFunction>(function)
474  ),
475  std::forward<Checker>(check),
476  std::forward<Observer>(observer)
477  );
478 
479  // Revert sign of negated function minimization
480  results.value *= -1;
481  results.gradient *= -1;
482 
483  return results;
484  }
485 
489  FloatType c1 = 0.0001;
495  FloatType c2 = 0.9;
501  FloatType stepLength = 1.0;
502 
503 private:
509  Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize> y;
511  Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize> s;
513  Eigen::Matrix<FloatType, ringBufferSize, 1> sDotY;
514  unsigned count = 0, offset = 0;
515 
516  CollectiveRingBuffer(const unsigned nParams)
517  : y(nParams, ringBufferSize),
518  s(nParams, ringBufferSize)
519  {}
520 
521  unsigned newestOffset() const {
522  return (count + offset - 1) % ringBufferSize;
523  }
524 
525  unsigned oldestOffset() const {
526  return (count + offset) % ringBufferSize;
527  }
528 
536  template<typename UnaryFn>
537  void newestToOldest(UnaryFn&& fn) const {
538  assert(count > 0);
539  const int newest = newestOffset();
540  for(int i = newest; i > - 1; --i) {
541  fn(i);
542  }
543 
544  const int countOrSizeLimit = std::min(count - 1, ringBufferSize - 1);
545  for(int i = countOrSizeLimit; i > newest; --i) {
546  fn(i);
547  }
548  }
549 
557  template<typename UnaryFn>
558  void oldestToNewest(UnaryFn&& fn) const {
559  assert(count > 0);
560  const unsigned oldest = oldestOffset();
561  for(unsigned i = oldest; i < count; ++i) {
562  fn(i);
563  }
564 
565  for(unsigned i = 0; i < oldest; ++i) {
566  fn(i);
567  }
568  }
569 
570  bool addInformation(
571  const Optimization::EigenUpdateBuffer<VectorType>& parameterBuffer,
572  const Optimization::EigenUpdateBuffer<VectorType>& gradientBuffer
573  ) {
574  bool dotProductNotZero;
575  if(count < ringBufferSize) {
576  y.col(count).noalias() = gradientBuffer.proposed - gradientBuffer.current;
577  s.col(count).noalias() = parameterBuffer.proposed - parameterBuffer.current;
578  sDotY(count) = s.col(count).dot(y.col(count));
579  dotProductNotZero = (sDotY(count) != 0);
580  ++count;
581  } else {
582  // Rotate columns without copying (introduce modulo offset)
583  const unsigned columnOffset = (count + offset) % ringBufferSize;
584  y.col(columnOffset).noalias() = gradientBuffer.proposed - gradientBuffer.current;
585  s.col(columnOffset).noalias() = parameterBuffer.proposed - parameterBuffer.current;
586  sDotY(columnOffset) = s.col(columnOffset).dot(y.col(columnOffset));
587  dotProductNotZero = (sDotY(columnOffset) != 0);
588  offset = (offset + 1) % ringBufferSize;
589  }
590 
591  return dotProductNotZero;
592  }
593 
602  Eigen::Ref<VectorType> q,
603  const VectorType& proposedGradient
604  ) const {
605  /* Note: Naming follows Numerical Optimization (2006)'s naming scheme,
606  * p.178.
607  */
608  const unsigned kMinusOne = newestOffset();
609 
610  q = -proposedGradient;
611  VectorType alpha(count);
612 
614  [&](const unsigned i) {
615  alpha[i] = s.col(i).dot(q) / sDotY(i);
616  q.noalias() -= alpha[i] * y.col(i);
617  }
618  );
619 
620  /* gamma_k = s_{k-1}^T y_{k-1} / (y_{k-1}^T y_{k-1})
621  * = s_{k-1}.dot(y_{k-1}) / y_{k-1}.squaredNorm()
622  *
623  * H_k^0 = y_k * I (where I is the identity matrix for n dimensions)
624  * r = H_k^0 * q (where q is the proposed direction)
625  *
626  * q is not needed again later in the algorithm, so instead of defining
627  * a new vector r we just reuse q.
628  *
629  * -> q *= gamma_k;
630  */
631  q *= sDotY(kMinusOne) / y.col(kMinusOne).squaredNorm();
632 
634  [&](const unsigned i) {
635  const FloatType beta = y.col(i).dot(q) / sDotY(i);
636  q.noalias() += (alpha[i] - beta) * s.col(i);
637  }
638  );
639  }
640 
641  template<typename ... Boxes>
642  bool updateAndGenerateNewDirection(
643  Eigen::Ref<VectorType> direction,
644  const StepValues& step,
645  const Boxes& ... boxes
646  ) {
647  /* L-BFGS update: Use current parameters and gradients to improve steepest
648  * descent direction
649  */
650  if(!addInformation(step.parameters, step.gradients)) {
651  return false;
652  }
653 
654  generateNewDirection(direction, step.gradients.proposed);
655  Detail::Boxes::adjustDirection(
656  direction,
657  step.parameters.proposed,
658  step.gradients.proposed,
659  boxes ...
660  );
661 
662  return true;
663  }
664  };
665 
683  template<
684  typename UpdateFunction,
685  typename Observer,
686  typename ... Boxes
688  UpdateFunction&& function,
689  Observer&& observer,
690  StepValues& step,
691  const VectorType& direction,
692  const Boxes& ... boxes
693  ) {
694  unsigned iterations = 0;
695 
696  /* Numerical optimization p. 178, paraphrased: Generated search direction
697  * is typically well scaled, so that step length = 1 should be accepted in
698  * most iterations.
699  *
700  * Therefore, we only allow buildup of lengthening of the step length, but
701  * not shortening.
702  */
703  if(stepLength < 1) {
704  stepLength = 1;
705  }
706 
707  for(iterations = 0; stepLength > 0 && iterations < 100; ++iterations) {
708  const FloatType currentGradDotDirection = step.gradients.current.dot(direction);
709 
710  /* Armijo rule (Wolfe condition I) */
711  bool armijoRule = (
712  step.values.proposed
713  <= step.values.current + c1 * stepLength * currentGradDotDirection
714  );
715 
716  /* Curvature condition (Wolfe condition II) */
717  bool curvatureCondition = (
718  step.gradients.proposed.dot(direction)
719  >= c2 * currentGradDotDirection
720  );
721 
722  /* Backtracking condition */
723  bool backtrack = (step.values.proposed >= step.values.current);
724 
725  // Decide whether to shorten or extend the step length
726  constexpr FloatType shortenFactor = 0.5;
727  constexpr FloatType lengthenFactor = 1.5;
728  static_assert(
729  Detail::cfabs(shortenFactor * lengthenFactor - 1.0) > 0.1,
730  "Shortening and lengthening factor should not multiply to give "
731  "approximately one, this can cause oscillation"
732  );
733 
734  if(!armijoRule) {
735  stepLength *= shortenFactor;
736  } else if(!curvatureCondition) {
737  stepLength *= lengthenFactor;
738  }
739 
740  /* If we don't have to backtrack, then don't re-evaluate along this
741  * direction, but accept the step and perform the next evaluation at
742  * the next position. We have improved the function value with the step
743  * and possibly adjusted the step length using the Wolfe conditions,
744  * that's enough.
745  */
746  if(backtrack && (!armijoRule || !curvatureCondition)) {
747  // Retry shortened/lengthened step (no propagation needed)
748  step.prepare(function, stepLength, direction, boxes ...);
749  observer(step.parameters.proposed);
750 
751  // Handle encountered parameter boundaries
752  if(
753  sizeof...(boxes) > 0
754  && (step.parameters.proposed - step.parameters.current).squaredNorm() < FloatType {1e-8}
755  ) {
756  break;
757  }
758  } else {
759  break;
760  }
761  }
762 
763  if(stepLength == 0) {
764  throw std::logic_error("Could not find a step length that reduces objective function value.");
765  }
766 
767  return iterations;
768  }
769 
770  template<
771  typename UpdateFunction,
772  typename Checker,
773  typename Observer,
774  typename ... Boxes
775  > OptimizationReturnType minimizeBase(
776  Eigen::Ref<VectorType> parameters,
777  UpdateFunction&& function,
778  Checker&& check,
779  Observer&& observer,
780  Boxes&& ... boxes
781  ) {
782  // If there is a box, make sure the parameters are valid
783  assert(Detail::Boxes::validate(parameters, boxes ...));
784 
785  // Set up a first small conjugate gradient step
786  StepValues step;
787  VectorType direction = step.generateInitialDirection(function, parameters, stepLength, boxes ...);
788  observer(step.parameters.proposed);
789 
790  /* Set up ring buffer to keep changes in gradient and parameters to
791  * approximate the inverse Hessian with
792  */
793  CollectiveRingBuffer ringBuffer {static_cast<unsigned>(parameters.size())};
794 
795  // Begin optimization loop
796  unsigned iteration = 1;
797  for(
798  ;
799  check.shouldContinue(iteration, std::as_const(step));
800  ++iteration
801  ) {
802  // Line search the chosen direction
803  iteration += adjustStepAlongDirection(function, observer, step, direction, boxes ...);
804 
805  /* Add more information to approximate the inverse Hessian and use it to
806  * generate a new direction.
807  */
808  if(!ringBuffer.updateAndGenerateNewDirection(direction, step, boxes ...)) {
809  break;
810  }
811 
812  /* Accept the proposed step and prepare a new prospective step using the
813  * updated direction vector
814  */
815  step.propagate(function, stepLength, direction, boxes ...);
816  observer(step.parameters.proposed);
817  }
818 
819  // Copy the optimal parameters back into the in/out argument
820  parameters = std::move(step.parameters.current);
821  return {
822  iteration,
823  step.values.current,
824  std::move(step.gradients.current)
825  };
826  }
827 };
828 
829 } // namespace Temple
830 } // namespace Molassembler
831 } // namespace Scine
832 
833 #endif
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function within bounds.
Definition: Lbfgs.h:462
FloatType stepLength
The initial step length used in the L-BFGS.
Definition: Lbfgs.h:501
FloatType c2
2nd parameter for the Wolfe conditions.
Definition: Lbfgs.h:495
unsigned iterations
Number of iterations.
Definition: Lbfgs.h:151
void oldestToNewest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from oldest to newest entries...
Definition: Lbfgs.h:558
VectorType generateInitialDirection(UpdateFunction &&function, Eigen::Ref< VectorType > initialParameters, const FloatType multiplier, const Boxes &...boxes)
Initialize class state and generate first direction vector.
Definition: Lbfgs.h:255
Type returned from an optimization.
Definition: Lbfgs.h:149
LBFGS optimizer with optional boxing.
Definition: Lbfgs.h:134
VectorType gradient
Final gradient.
Definition: Lbfgs.h:155
void generateNewDirection(Eigen::Ref< VectorType > q, const VectorType &proposedGradient) const
Use stored gradient and parameter differences to generate a new direction.
Definition: Lbfgs.h:601
Eigen::Matrix< FloatType, ringBufferSize, 1 > sDotY
Ring buffered result of s_k.dot(y_k)
Definition: Lbfgs.h:513
void adjustDirection(Eigen::Ref< VectorType > direction, const VectorType &parameters, const VectorType &gradient) const
Adjust a direction vector to avoid exceeding the box.
Definition: Lbfgs.h:212
void clamp(Eigen::Ref< VectorType > x) const
Clamp parameter values to the bounds.
Definition: Lbfgs.h:176
Class carrying proposed parameter updates in an optimization.
Definition: Lbfgs.h:239
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function within bounds.
Definition: Lbfgs.h:424
void adjustGradient(Eigen::Ref< VectorType > gradient, const VectorType &parameters) const
Zero out those elements of a gradient which are bounded.
Definition: Lbfgs.h:189
VectorType maxima
Upper bounds for parameters.
Definition: Lbfgs.h:165
FloatType value
Final function value.
Definition: Lbfgs.h:153
bool validate(const VectorType &v) const
Test whether parameters are within the bounds.
Definition: Lbfgs.h:181
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > y
Ring buffer containing gradient differences: y_k = g_{k+1} - g_k.
Definition: Lbfgs.h:509
Data types common to optimizers.
FloatType c1
1st parameter for the Wolfe conditions.
Definition: Lbfgs.h:489
Type storing boundaries for parameter values.
Definition: Lbfgs.h:161
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > s
Ring buffer containing parameter differnces: s_k = x_{k+1} - x_k.
Definition: Lbfgs.h:511
Class containing hessian information for LBFGS update.
Definition: Lbfgs.h:507
unsigned adjustStepAlongDirection(UpdateFunction &&function, Observer &&observer, StepValues &step, const VectorType &direction, const Boxes &...boxes)
Adjusts stepLength member and ensures the proposed parameter adjustment lowers the function value...
Definition: Lbfgs.h:687
void newestToOldest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from newest to oldest entries...
Definition: Lbfgs.h:537
VectorType minima
Lower bounds for parameters.
Definition: Lbfgs.h:163
void propagate(UpdateFunction &&function, const FloatType multiplier, const VectorType &direction, const Boxes &...boxes)
Accept a step, create new parameters and evaluate the function to get value and gradients.
Definition: Lbfgs.h:307
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function.
Definition: Lbfgs.h:378
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function.
Definition: Lbfgs.h:343