7 #ifndef INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
8 #define INCLUDE_TEMPLE_OPTIMIZATION_LBFGS_H
17 namespace Molassembler {
21 template<
typename FloatType> constexpr FloatType cfabs(FloatType x) noexcept {
22 return (x >= 0) ? x : -x;
25 struct DoNothingObserver {
27 void operator() (T&& ) {}
30 template<
typename FloatType,
typename VectorType,
typename UpdateFunction>
31 struct ClampFunction {
32 UpdateFunction
function;
33 const VectorType& low;
34 const VectorType& high;
40 ) : function(fn), low(lo), high(hi) {}
43 const VectorType& parameters,
45 Eigen::Ref<VectorType> gradient
48 parameters.cwiseMax(low).cwiseMin(high),
55 template<
typename VectorType,
typename UpdateFunction>
61 using FloatType =
typename VectorType::Scalar;
62 return ClampFunction<FloatType, VectorType, UpdateFunction>(
63 std::forward<UpdateFunction>(fn),
71 template<
typename VectorType>
72 void clampToBounds(Eigen::Ref<VectorType> ) {}
74 template<
typename VectorType,
typename BoxType>
75 void clampToBounds(Eigen::Ref<VectorType> parameters,
const BoxType& box) {
76 box.clamp(parameters);
80 template<
typename VectorType>
81 bool validate(
const VectorType& ) {
85 template<
typename VectorType,
typename BoxType>
86 bool validate(
const VectorType& parameters,
const BoxType& box) {
87 return box.validate(parameters);
90 template<
typename VectorType>
92 Eigen::Ref<VectorType> ,
96 template<
typename VectorType,
typename BoxType>
98 Eigen::Ref<VectorType> gradient,
99 const VectorType& parameters,
102 box.adjustGradient(gradient, parameters);
105 template<
typename VectorType>
106 void adjustDirection(
107 Eigen::Ref<VectorType> ,
112 template<
typename VectorType,
typename BoxType>
113 void adjustDirection(
114 Eigen::Ref<VectorType> direction,
115 const VectorType& parameters,
116 const VectorType& gradient,
119 box.adjustDirection(direction, parameters, gradient);
132 template<
typename FloatType =
double,
unsigned ringBufferSize = 16>
135 constexpr
static bool isPowerOfTwo(
unsigned x) {
136 return (x & (x - 1)) == 0;
140 isPowerOfTwo(ringBufferSize) && ringBufferSize != 0,
141 "For good performance, the ring buffer size has to be a power of two"
144 using MatrixType = Eigen::Matrix<FloatType, Eigen::Dynamic, Eigen::Dynamic>;
145 using VectorType = Eigen::Matrix<FloatType, Eigen::Dynamic, 1>;
167 Box(
const VectorType& passMinima,
const VectorType& passMaxima)
175 void clamp(Eigen::Ref<VectorType> x)
const {
182 (
minima.array() <= v.array()).all()
183 && (v.array() <=
maxima.array()).all()
189 Eigen::Ref<VectorType> gradient,
190 const VectorType& parameters
192 constexpr FloatType gapEpsilon = 1e-8;
193 const Eigen::Index P = parameters.size();
195 for(Eigen::Index i = 0; i < P; ++i) {
198 minima[i] + gapEpsilon >= parameters[i]
201 maxima[i] - gapEpsilon <= parameters[i]
212 Eigen::Ref<VectorType> direction,
213 const VectorType& parameters,
214 const VectorType& gradient
216 constexpr FloatType gapEpsilon = 1e-8;
218 const Eigen::Index P = parameters.size();
219 for(Eigen::Index i = 0; i < P; ++i) {
221 minima[i] + gapEpsilon >= parameters[i]
224 direction[i] =
minima[i] - parameters[i];
226 maxima[i] - gapEpsilon <= parameters[i]
229 direction[i] =
maxima[i] - parameters[i];
253 template<
typename UpdateFunction,
typename ... Boxes>
255 UpdateFunction&&
function,
256 Eigen::Ref<VectorType> initialParameters,
257 const FloatType multiplier,
258 const Boxes& ... boxes
260 const unsigned P = initialParameters.size();
261 parameters.current = std::move(initialParameters);
262 gradients.current.resize(P);
263 gradients.proposed.resize(P);
266 function(parameters.current, values.current, gradients.current);
267 Detail::Boxes::adjustGradient<VectorType>(gradients.current, parameters.current, boxes ...);
270 VectorType direction;
271 const FloatType gradientNorm = gradients.current.norm();
272 if(gradientNorm > FloatType {1e-4}) {
273 direction = -gradients.current / gradientNorm;
275 direction = FloatType {-1.0} * gradients.current;
278 prepare(
function, multiplier, direction, boxes ...);
283 template<
typename UpdateFunction,
typename ... Boxes>
285 UpdateFunction&&
function,
286 const FloatType multiplier,
287 const VectorType& direction,
288 const Boxes& ... boxes
290 parameters.proposed.noalias() = parameters.current + multiplier * direction;
291 Detail::Boxes::clampToBounds<VectorType>(parameters.proposed, boxes ...);
292 function(parameters.proposed, values.proposed, gradients.proposed);
293 Detail::Boxes::adjustGradient<VectorType>(gradients.proposed, parameters.proposed, boxes ...);
305 template<
typename UpdateFunction,
typename ... Boxes>
307 UpdateFunction&&
function,
308 const FloatType multiplier,
309 const VectorType& direction,
310 const Boxes& ... boxes
312 parameters.propagate();
314 gradients.propagate();
316 prepare(
function, multiplier, direction, boxes ...);
339 typename UpdateFunction,
341 typename Observer = Detail::DoNothingObserver
343 Eigen::Ref<VectorType> parameters,
344 UpdateFunction&&
function,
346 Observer&& observer = Observer {}
350 std::forward<UpdateFunction>(
function),
351 std::forward<Checker>(check),
352 std::forward<Observer>(observer)
374 typename UpdateFunction,
376 typename Observer = Detail::DoNothingObserver
378 Eigen::Ref<VectorType> parameters,
379 UpdateFunction&&
function,
381 Observer&& observer = Observer {}
383 OptimizationReturnType results =
minimize(
385 Optimization::negateFunction<VectorType>(
386 std::forward<UpdateFunction>(
function)
388 std::forward<Checker>(check),
389 std::forward<Observer>(observer)
394 results.gradient *= -1;
420 typename UpdateFunction,
422 typename Observer = Detail::DoNothingObserver
424 Eigen::Ref<VectorType> parameters,
426 UpdateFunction&&
function,
428 Observer&& observer = Observer {}
432 std::forward<UpdateFunction>(
function),
433 std::forward<Checker>(check),
434 std::forward<Observer>(observer),
458 typename UpdateFunction,
460 typename Observer = Detail::DoNothingObserver
462 Eigen::Ref<VectorType> parameters,
464 UpdateFunction&&
function,
466 Observer&& observer = Observer {}
468 OptimizationReturnType results =
minimize(
471 Optimization::negateFunction<VectorType>(
472 std::forward<UpdateFunction>(
function)
474 std::forward<Checker>(check),
475 std::forward<Observer>(observer)
480 results.gradient *= -1;
488 FloatType
c1 = 0.0001;
508 Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize>
y;
510 Eigen::Matrix<FloatType, Eigen::Dynamic, ringBufferSize>
s;
512 Eigen::Matrix<FloatType, ringBufferSize, 1>
sDotY;
513 unsigned count = 0, offset = 0;
516 :
y(nParams, ringBufferSize),
517 s(nParams, ringBufferSize)
520 unsigned newestOffset()
const {
521 return (count + offset - 1) % ringBufferSize;
524 unsigned oldestOffset()
const {
525 return (count + offset) % ringBufferSize;
535 template<
typename UnaryFn>
538 const int newest = newestOffset();
539 for(
int i = newest; i > - 1; --i) {
543 const int countOrSizeLimit = std::min(count - 1, ringBufferSize - 1);
544 for(
int i = countOrSizeLimit; i > newest; --i) {
556 template<
typename UnaryFn>
559 const unsigned oldest = oldestOffset();
560 for(
unsigned i = oldest; i < count; ++i) {
564 for(
unsigned i = 0; i < oldest; ++i) {
573 bool dotProductNotZero;
574 if(count < ringBufferSize) {
575 y.col(count).noalias() = gradientBuffer.proposed - gradientBuffer.current;
576 s.col(count).noalias() = parameterBuffer.proposed - parameterBuffer.current;
577 sDotY(count) =
s.col(count).dot(
y.col(count));
578 dotProductNotZero = (
sDotY(count) != 0);
582 const unsigned columnOffset = (count + offset) % ringBufferSize;
583 y.col(columnOffset).noalias() = gradientBuffer.proposed - gradientBuffer.current;
584 s.col(columnOffset).noalias() = parameterBuffer.proposed - parameterBuffer.current;
585 sDotY(columnOffset) =
s.col(columnOffset).dot(
y.col(columnOffset));
586 dotProductNotZero = (
sDotY(columnOffset) != 0);
587 offset = (offset + 1) % ringBufferSize;
590 return dotProductNotZero;
601 Eigen::Ref<VectorType> q,
602 const VectorType& proposedGradient
607 const unsigned kMinusOne = newestOffset();
609 q = -proposedGradient;
610 VectorType alpha(count);
613 [&](
const unsigned i) {
614 alpha[i] =
s.col(i).dot(q) /
sDotY(i);
615 q.noalias() -= alpha[i] *
y.col(i);
630 q *=
sDotY(kMinusOne) /
y.col(kMinusOne).squaredNorm();
633 [&](
const unsigned i) {
634 const FloatType beta =
y.col(i).dot(q) /
sDotY(i);
635 q.noalias() += (alpha[i] - beta) *
s.col(i);
640 template<
typename ... Boxes>
641 bool updateAndGenerateNewDirection(
642 Eigen::Ref<VectorType> direction,
644 const Boxes& ... boxes
649 if(!addInformation(step.parameters, step.gradients)) {
654 Detail::Boxes::adjustDirection(
656 step.parameters.proposed,
657 step.gradients.proposed,
683 typename UpdateFunction,
687 UpdateFunction&&
function,
690 const VectorType& direction,
691 const Boxes& ... boxes
693 unsigned iterations = 0;
706 for(iterations = 0;
stepLength > 0 && iterations < 100; ++iterations) {
707 const FloatType currentGradDotDirection = step.gradients.current.dot(direction);
712 <= step.values.current +
c1 *
stepLength * currentGradDotDirection
716 bool curvatureCondition = (
717 step.gradients.proposed.dot(direction)
718 >=
c2 * currentGradDotDirection
722 bool backtrack = (step.values.proposed >= step.values.current);
725 constexpr FloatType shortenFactor = 0.5;
726 constexpr FloatType lengthenFactor = 1.5;
728 Detail::cfabs(shortenFactor * lengthenFactor - 1.0) > 0.1,
729 "Shortening and lengthening factor should not multiply to give "
730 "approximately one, this can cause oscillation"
735 }
else if(!curvatureCondition) {
745 if(backtrack && (!armijoRule || !curvatureCondition)) {
747 step.prepare(
function,
stepLength, direction, boxes ...);
748 observer(step.parameters.proposed);
753 && (step.parameters.proposed - step.parameters.current).squaredNorm() < FloatType {1e-8}
763 throw std::logic_error(
"Could not find a step length that reduces objective function value.");
770 typename UpdateFunction,
774 > OptimizationReturnType minimizeBase(
775 Eigen::Ref<VectorType> parameters,
776 UpdateFunction&&
function,
782 assert(Detail::Boxes::validate(parameters, boxes ...));
786 VectorType direction = step.generateInitialDirection(
function, parameters,
stepLength, boxes ...);
787 observer(step.parameters.proposed);
792 CollectiveRingBuffer ringBuffer {
static_cast<unsigned>(parameters.size())};
795 unsigned iteration = 1;
798 check.shouldContinue(iteration, std::as_const(step));
807 if(!ringBuffer.updateAndGenerateNewDirection(direction, step, boxes ...)) {
814 step.propagate(
function,
stepLength, direction, boxes ...);
815 observer(step.parameters.proposed);
819 parameters = std::move(step.parameters.current);
823 std::move(step.gradients.current)
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function within bounds.
Definition: Lbfgs.h:461
FloatType stepLength
The initial step length used in the L-BFGS.
Definition: Lbfgs.h:500
FloatType c2
2nd parameter for the Wolfe conditions.
Definition: Lbfgs.h:494
unsigned iterations
Number of iterations.
Definition: Lbfgs.h:150
void oldestToNewest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from oldest to newest entries...
Definition: Lbfgs.h:557
VectorType generateInitialDirection(UpdateFunction &&function, Eigen::Ref< VectorType > initialParameters, const FloatType multiplier, const Boxes &...boxes)
Initialize class state and generate first direction vector.
Definition: Lbfgs.h:254
Type returned from an optimization.
Definition: Lbfgs.h:148
LBFGS optimizer with optional boxing.
Definition: Lbfgs.h:133
VectorType gradient
Final gradient.
Definition: Lbfgs.h:154
void generateNewDirection(Eigen::Ref< VectorType > q, const VectorType &proposedGradient) const
Use stored gradient and parameter differences to generate a new direction.
Definition: Lbfgs.h:600
Eigen::Matrix< FloatType, ringBufferSize, 1 > sDotY
Ring buffered result of s_k.dot(y_k)
Definition: Lbfgs.h:512
void adjustDirection(Eigen::Ref< VectorType > direction, const VectorType ¶meters, const VectorType &gradient) const
Adjust a direction vector to avoid exceeding the box.
Definition: Lbfgs.h:211
void clamp(Eigen::Ref< VectorType > x) const
Clamp parameter values to the bounds.
Definition: Lbfgs.h:175
Class carrying proposed parameter updates in an optimization.
Definition: Lbfgs.h:238
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, const Box &box, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function within bounds.
Definition: Lbfgs.h:423
void adjustGradient(Eigen::Ref< VectorType > gradient, const VectorType ¶meters) const
Zero out those elements of a gradient which are bounded.
Definition: Lbfgs.h:188
VectorType maxima
Upper bounds for parameters.
Definition: Lbfgs.h:164
FloatType value
Final function value.
Definition: Lbfgs.h:152
bool validate(const VectorType &v) const
Test whether parameters are within the bounds.
Definition: Lbfgs.h:180
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > y
Ring buffer containing gradient differences: y_k = g_{k+1} - g_k.
Definition: Lbfgs.h:508
Data types common to optimizers.
FloatType c1
1st parameter for the Wolfe conditions.
Definition: Lbfgs.h:488
Type storing boundaries for parameter values.
Definition: Lbfgs.h:160
Eigen::Matrix< FloatType, Eigen::Dynamic, ringBufferSize > s
Ring buffer containing parameter differnces: s_k = x_{k+1} - x_k.
Definition: Lbfgs.h:510
Class containing hessian information for LBFGS update.
Definition: Lbfgs.h:506
unsigned adjustStepAlongDirection(UpdateFunction &&function, Observer &&observer, StepValues &step, const VectorType &direction, const Boxes &...boxes)
Adjusts stepLength member and ensures the proposed parameter adjustment lowers the function value...
Definition: Lbfgs.h:686
void newestToOldest(UnaryFn &&fn) const
Calls a unary function with the indices of the ring buffer in sequence from newest to oldest entries...
Definition: Lbfgs.h:536
VectorType minima
Lower bounds for parameters.
Definition: Lbfgs.h:162
void propagate(UpdateFunction &&function, const FloatType multiplier, const VectorType &direction, const Boxes &...boxes)
Accept a step, create new parameters and evaluate the function to get value and gradients.
Definition: Lbfgs.h:306
OptimizationReturnType maximize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to maximize an objective function.
Definition: Lbfgs.h:377
OptimizationReturnType minimize(Eigen::Ref< VectorType > parameters, UpdateFunction &&function, Checker &&check, Observer &&observer=Observer{})
Find parameters to minimize an objective function.
Definition: Lbfgs.h:342