7 #ifndef INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
8 #define INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
18 namespace Molassembler {
22 template<
typename FloatType> constexpr FloatType cfabs(FloatType x) noexcept {
23 return (x >= 0) ? x : -x;
26 struct DoNothingObserver {
28 void operator() (T&& ) {}
31 template<
typename FloatType,
typename VectorType,
typename UpdateFunction>
32 struct ClampFunction {
33 UpdateFunction
function;
34 const VectorType& low;
35 const VectorType& high;
41 ) : function(fn), low(lo), high(hi) {}
44 const VectorType& parameters,
46 Eigen::Ref<VectorType> gradient
49 parameters.cwiseMax(low).cwiseMin(high),
56 template<
typename VectorType,
typename UpdateFunction>
62 using FloatType =
typename VectorType::Scalar;
63 return ClampFunction<FloatType, VectorType, UpdateFunction>(
64 std::forward<UpdateFunction>(fn),
72 template<
typename VectorType>
73 void clampToBounds(Eigen::Ref<VectorType> ) {}
75 template<
typename VectorType,
typename BoxType>
76 void clampToBounds(Eigen::Ref<VectorType> parameters,
const BoxType& box) {
77 box.clamp(parameters);
81 template<
typename VectorType>
82 bool validate(
const VectorType& ) {
86 template<
typename VectorType,
typename BoxType>
87 bool validate(
const VectorType& parameters,
const BoxType& box) {
88 return box.validate(parameters);
91 template<
typename VectorType>
93 Eigen::Ref<VectorType> ,
97 template<
typename VectorType,
typename BoxType>
99 Eigen::Ref<VectorType> gradient,
100 const VectorType& parameters,
103 box.adjustGradient(gradient, parameters);
106 template<
typename VectorType>
107 void adjustDirection(
108 Eigen::Ref<VectorType> ,
113 template<
typename VectorType,
typename BoxType>
114 void adjustDirection(
115 Eigen::Ref<VectorType> direction,
116 const VectorType& parameters,
117 const VectorType& gradient,
120 box.adjustDirection(direction, parameters, gradient);
133 template<
typename FloatType =
double,
unsigned ringBufferSize = 16>
136 constexpr
static bool isPowerOfTwo(
unsigned x) {
137 return (x & (x - 1)) == 0;
141 isPowerOfTwo(ringBufferSize) && ringBufferSize != 0,
142 "For good performance, the ring buffer size has to be a power of two"
145 using MatrixType = Eigen::Matrix<FloatType, Eigen::Dynamic, Eigen::Dynamic>;
146 using VectorType = Eigen::Matrix<FloatType, Eigen::Dynamic, 1>;
168 Box(
const VectorType& passMinima,
const VectorType& passMaxima)
176 void clamp(Eigen::Ref<VectorType> x)
const {
183 (
minima.array() <= v.array()).all()
184 && (v.array() <=
maxima.array()).all()
190 Eigen::Ref<VectorType> gradient,
191 const VectorType& parameters
193 constexpr FloatType gapEpsilon = 1e-8;
194 const Eigen::Index P = parameters.size();
196 for(Eigen::Index i = 0; i < P; ++i) {
199 minima[i] + gapEpsilon >= parameters[i]
202 maxima[i] - gapEpsilon <= parameters[i]
213 Eigen::Ref<VectorType> direction,
214 const VectorType& parameters,
215 const VectorType& gradient
217 constexpr FloatType gapEpsilon = 1e-8;
219 const Eigen::Index P = parameters.size();
220 for(Eigen::Index i = 0; i < P; ++i) {
222 minima[i] + gapEpsilon >= parameters[i]
225 direction[i] =
minima[i] - parameters[i];
227 maxima[i] - gapEpsilon <= parameters[i]
230 direction[i] =
maxima[i] - parameters[i];
254 template<
typename UpdateFunction,
typename ... Boxes>
256 UpdateFunction&&
function,
257 Eigen::Ref<VectorType> initialParameters,
258 const FloatType multiplier,
259 const Boxes& ... boxes
261 const unsigned P = initialParameters.size();
262 parameters.current = std::move(initialParameters);
263 gradients.current.resize(P);
264 gradients.proposed.resize(P);
267 function(parameters.current, values.current, gradients.current);
268 Detail::Boxes::adjustGradient<VectorType>(gradients.current, parameters.current, boxes ...);
271 VectorType direction;
272 const FloatType gradientNorm = gradients.current.norm();
273 if(gradientNorm > FloatType {1e-4}) {
274 direction = -gradients.current / gradientNorm;
276 direction = FloatType {-1.0} * gradients.current;
279 prepare(
function, multiplier, direction, boxes ...);
284 template<
typename UpdateFunction,
typename ... Boxes>
286 UpdateFunction&&
function,
287 const FloatType multiplier,
288 const VectorType& direction,
289 const Boxes& ... boxes
291 parameters.proposed.noalias() = parameters.current + multiplier * direction;
292 Detail::Boxes::clampToBounds<VectorType>(parameters.proposed, boxes ...);
293 function(parameters.proposed, values.proposed, gradients.proposed);
294 Detail::Boxes::adjustGradient<VectorType>(gradients.proposed, parameters.proposed, boxes ...);
306 template<
typename UpdateFunction,
typename ... Boxes>
308 UpdateFunction&&
function,
309 const FloatType multiplier,
310 const VectorType& direction,
311 const Boxes& ... boxes
313 parameters.propagate();
315 gradients.propagate();
317 prepare(
function, multiplier, direction, boxes ...);
340 typename UpdateFunction,
342 typename Observer = Detail::DoNothingObserver
344 Eigen::Ref<VectorType> parameters,
345 UpdateFunction&&
function,
347 Observer&& observer = Observer {}
351 std::forward<UpdateFunction>(
function),
352 std::forward<Checker>(check),
353 std::forward<Observer>(observer)
375 typename UpdateFunction,
377 typename Observer = Detail::DoNothingObserver
379 Eigen::Ref<VectorType> parameters,
380 UpdateFunction&&
function,
382 Observer&& observer = Observer {}
384 OptimizationReturnType results =
minimize(
386 Optimization::negateFunction<VectorType>(
387 std::forward<UpdateFunction>(
function)
389 std::forward<Checker>(check),
390 std::forward<Observer>(observer)
395 results.gradient *= -1;
421 typename UpdateFunction,
423 typename Observer = Detail::DoNothingObserver
425 Eigen::Ref<VectorType> parameters,
427 UpdateFunction&&
function,
429 Observer&& observer = Observer {}
433 std::forward<UpdateFunction>(
function),
434 std::forward<Checker>(check),
435 std::forward<Observer>(observer),
459 typename UpdateFunction,
461 typename Observer = Detail::DoNothingObserver
463 Eigen::Ref<VectorType> parameters,
465 UpdateFunction&&
function,
467 Observer&& observer = Observer {}
469 OptimizationReturnType results =
minimize(
472 Optimization::negateFunction<VectorType>(
473 std::forward<UpdateFunction>(
function)
475 std::forward<Checker>(check),
476 std::forward<Observer>(observer)
481 results.gradient *= -1;
489 FloatType
c1 = 0.0001;
509 Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize>
y;
511 Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize>
s;
513 Eigen::Matrix<FloatType, ringBufferSize, 1>
sDotY;
514 unsigned count = 0, offset = 0;
517 :
y(nParams, ringBufferSize),
518 s(nParams, ringBufferSize)
521 unsigned newestOffset()
const {
522 return (count + offset - 1) % ringBufferSize;
525 unsigned oldestOffset()
const {
526 return (count + offset) % ringBufferSize;
536 template<
typename UnaryFn>
539 const int newest = newestOffset();
540 for(
int i = newest; i > - 1; --i) {
544 const int countOrSizeLimit = std::min(count - 1, ringBufferSize - 1);
545 for(
int i = countOrSizeLimit; i > newest; --i) {
557 template<
typename UnaryFn>
560 const unsigned oldest = oldestOffset();
561 for(
unsigned i = oldest; i < count; ++i) {
565 for(
unsigned i = 0; i < oldest; ++i) {
574 bool dotProductNotZero;
575 if(count < ringBufferSize) {
576 y.col(count).noalias() = gradientBuffer.proposed - gradientBuffer.current;
577 s.col(count).noalias() = parameterBuffer.proposed - parameterBuffer.current;
578 sDotY(count) =
s.col(count).dot(
y.col(count));
579 dotProductNotZero = (
sDotY(count) != 0);
583 const unsigned columnOffset = (count + offset) % ringBufferSize;
584 y.col(columnOffset).noalias() = gradientBuffer.proposed - gradientBuffer.current;
585 s.col(columnOffset).noalias() = parameterBuffer.proposed - parameterBuffer.current;
586 sDotY(columnOffset) =
s.col(columnOffset).dot(
y.col(columnOffset));
587 dotProductNotZero = (
sDotY(columnOffset) != 0);
588 offset = (offset + 1) % ringBufferSize;
591 return dotProductNotZero;
602 Eigen::Ref<VectorType> q,
603 const VectorType& proposedGradient
608 const unsigned kMinusOne = newestOffset();
610 q = -proposedGradient;
611 VectorType alpha(count);
614 [&](
const unsigned i) {
615 alpha[i] =
s.col(i).dot(q) /
sDotY(i);
616 q.noalias() -= alpha[i] *
y.col(i);
631 q *=
sDotY(kMinusOne) /
y.col(kMinusOne).squaredNorm();
634 [&](
const unsigned i) {
635 const FloatType beta =
y.col(i).dot(q) /
sDotY(i);
636 q.noalias() += (alpha[i] - beta) *
s.col(i);
641 template<
typename ... Boxes>
642 bool updateAndGenerateNewDirection(
643 Eigen::Ref<VectorType> direction,
645 const Boxes& ... boxes
650 if(!addInformation(step.parameters, step.gradients)) {
655 Detail::Boxes::adjustDirection(
657 step.parameters.proposed,
658 step.gradients.proposed,
684 typename UpdateFunction,
688 UpdateFunction&&
function,
691 const VectorType& direction,
692 const Boxes& ... boxes
694 unsigned iterations = 0;
707 for(iterations = 0;
stepLength > 0 && iterations < 100; ++iterations) {
708 const FloatType currentGradDotDirection = step.gradients.current.dot(direction);
713 <= step.values.current +
c1 *
stepLength * currentGradDotDirection
717 bool curvatureCondition = (
718 step.gradients.proposed.dot(direction)
719 >=
c2 * currentGradDotDirection
723 bool backtrack = (step.values.proposed >= step.values.current);
726 constexpr FloatType shortenFactor = 0.5;
727 constexpr FloatType lengthenFactor = 1.5;
729 Detail::cfabs(shortenFactor * lengthenFactor - 1.0) > 0.1,
730 "Shortening and lengthening factor should not multiply to give "
731 "approximately one, this can cause oscillation"
736 }
else if(!curvatureCondition) {
746 if(backtrack && (!armijoRule || !curvatureCondition)) {
748 step.prepare(
function,
stepLength, direction, boxes ...);
749 observer(step.parameters.proposed);
754 && (step.parameters.proposed - step.parameters.current).squaredNorm() < FloatType {1e-8}
764 throw std::logic_error(
"Could not find a step length that reduces objective function value.");
771 typename UpdateFunction,
775 > OptimizationReturnType minimizeBase(
776 Eigen::Ref<VectorType> parameters,
777 UpdateFunction&&
function,
783 assert(Detail::Boxes::validate(parameters, boxes ...));
787 VectorType direction = step.generateInitialDirection(
function, parameters,
stepLength, boxes ...);
788 observer(step.parameters.proposed);
793 CollectiveRingBuffer ringBuffer {
static_cast<unsigned>(parameters.size())};
796 unsigned iteration = 1;
799 check.shouldContinue(iteration, std::as_const(step));
808 if(!ringBuffer.updateAndGenerateNewDirection(direction, step, boxes ...)) {
815 step.propagate(
function,
stepLength, direction, boxes ...);
816 observer(step.parameters.proposed);
820 parameters = std::move(step.parameters.current);
824 std::move(step.gradients.current)
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function within bounds.
Definition: Lbfgs.h:462
FloatType stepLength
The initial step length used in the L-BFGS.
Definition: Lbfgs.h:501
FloatType c2
2nd parameter for the Wolfe conditions.
Definition: Lbfgs.h:495
unsigned iterations
Number of iterations.
Definition: Lbfgs.h:151
void oldestToNewest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from oldest to newest entries...
Definition: Lbfgs.h:558
VectorType generateInitialDirection(UpdateFunction &&function, Eigen::Ref< VectorType > initialParameters, const FloatType multiplier, const Boxes &...boxes)
Initialize class state and generate first direction vector.
Definition: Lbfgs.h:255
Type returned from an optimization.
Definition: Lbfgs.h:149
LBFGS optimizer with optional boxing.
Definition: Lbfgs.h:134
VectorType gradient
Final gradient.
Definition: Lbfgs.h:155
void generateNewDirection(Eigen::Ref< VectorType > q, const VectorType &proposedGradient) const
Use stored gradient and parameter differences to generate a new direction.
Definition: Lbfgs.h:601
Eigen::Matrix< FloatType, ringBufferSize, 1 > sDotY
Ring buffered result of s_k.dot(y_k)
Definition: Lbfgs.h:513
void adjustDirection(Eigen::Ref< VectorType > direction, const VectorType ¶meters, const VectorType &gradient) const
Adjust a direction vector to avoid exceeding the box.
Definition: Lbfgs.h:212
void clamp(Eigen::Ref< VectorType > x) const
Clamp parameter values to the bounds.
Definition: Lbfgs.h:176
Class carrying proposed parameter updates in an optimization.
Definition: Lbfgs.h:239
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function within bounds.
Definition: Lbfgs.h:424
void adjustGradient(Eigen::Ref< VectorType > gradient, const VectorType ¶meters) const
Zero out those elements of a gradient which are bounded.
Definition: Lbfgs.h:189
VectorType maxima
Upper bounds for parameters.
Definition: Lbfgs.h:165
FloatType value
Final function value.
Definition: Lbfgs.h:153
bool validate(const VectorType &v) const
Test whether parameters are within the bounds.
Definition: Lbfgs.h:181
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > y
Ring buffer containing gradient differences: y_k = g_{k+1} - g_k.
Definition: Lbfgs.h:509
Data types common to optimizers.
FloatType c1
1st parameter for the Wolfe conditions.
Definition: Lbfgs.h:489
Type storing boundaries for parameter values.
Definition: Lbfgs.h:161
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > s
Ring buffer containing parameter differnces: s_k = x_{k+1} - x_k.
Definition: Lbfgs.h:511
Class containing hessian information for LBFGS update.
Definition: Lbfgs.h:507
unsigned adjustStepAlongDirection(UpdateFunction &&function, Observer &&observer, StepValues &step, const VectorType &direction, const Boxes &...boxes)
Adjusts stepLength member and ensures the proposed parameter adjustment lowers the function value...
Definition: Lbfgs.h:687
void newestToOldest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from newest to oldest entries...
Definition: Lbfgs.h:537
VectorType minima
Lower bounds for parameters.
Definition: Lbfgs.h:163
void propagate(UpdateFunction &&function, const FloatType multiplier, const VectorType &direction, const Boxes &...boxes)
Accept a step, create new parameters and evaluate the function to get value and gradients.
Definition: Lbfgs.h:307
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function.
Definition: Lbfgs.h:378
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function.
Definition: Lbfgs.h:343