Skip to content

Commit

Permalink
Add LM, clean up README, a few fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
JSzitas committed Mar 31, 2024
1 parent 42630a8 commit cce339a
Show file tree
Hide file tree
Showing 6 changed files with 1,220 additions and 1,123 deletions.
8 changes: 0 additions & 8 deletions .idea/.gitignore

This file was deleted.

2 changes: 2 additions & 0 deletions .idea/nlsolver.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@ cmake_minimum_required(VERSION 3.26)
project(nlsolver)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

add_library(nlsolver example.cpp)

16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ Just copy the header into your project, include and use:
+ Vanilla
+ Accelerated
* Simulated Annealing
+ Currently without option for custom sample generators, only using the Markov Gaussian Kernel
+ Currently, without option for custom sample generators, only using the Markov Gaussian Kernel
* Nelder-Mead PSO hybrid
+ Might under-perform other solvers, but should do better than vanilla Nelder-Mead on problems with
many local minima where Nelder-Mead can get stuck
* Gradient Descent
+ Several flavours, including Vanilla with fixed steps, steps optimized using MoreThuente linesearch,
+ Several flavours, including Vanilla with fixed steps, optimized using MoreThuente linesearch,
and even an implementation of [Provably Faster Gradient Descent via Long Steps](https://arxiv.org/abs/2307.06324)
in case you have a particularly well behaved smooth convex problem.
in case you have a particularly well-behaved smooth convex problem.

## Experimental

Expand Down Expand Up @@ -105,7 +105,7 @@ And run the examples from the command line

There are some design decisions in this library which warrant discussion:

* the objective functions to minimize/ maximize are passed as objects, prefering functors,
* the objective functions to minimize/ maximize are passed as objects, preferring functors,
requiring an overloaded **public** **()** operator which takes a **std::vector<T>**, e.g.
```cpp
struct example {
Expand All @@ -119,7 +119,7 @@ note that this will also work with lambda functions, and a struct/class is not s
* there are no virtual calls in this library - thus incurring no performance penalty
* each function exposes both a minimization and a maximization interface, and maximization is
always implemented as 'negative minimization', i.e. by multiplying the objective function by -1
* all optimizers come with default arguments that try to be sane and user friendly - but expert
* all optimizers come with default arguments that try to be sane and user-friendly - but expert
users are highly encouraged to supply their own values
* currently no multi-threading is supported - this is by design as functors are *potentially*
stateful objective functions and multi-threading would require ensuring no data races happen
Expand All @@ -137,10 +137,10 @@ impure objective functions).
# Notes
[^lambda_note]: This flexibility is included for cases where you want to implicitly bundle mutable data within
[^lambda_note] This flexibility is included for cases where you want to implicitly bundle mutable data within
the struct, and do not want to have to pass the data (e.g. through a pointer) to your objective function.
This makes the overall design cleaner - if your objective function needs data, mainstains state, or
This makes the overall design cleaner - if your objective function needs data, maintains state, or
does anything else on evaluation, you can keep the entirety of that within the struct (and even extract it
after the solver finishes). If you do not need the functionality and you simply want to optimize some ad-hoc function, using
after the solver finishes). If you do not need the functionality, and you simply want to optimize some ad-hoc function, using
a lambda is probably much simpler and cleaner.
93 changes: 33 additions & 60 deletions example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@ using nlsolver::ConjugatedGradientDescent;
using nlsolver::DE;
using nlsolver::GradientDescent;
using nlsolver::NelderMead;
using nlsolver::NelderMeadPSO;
using nlsolver::PSO;
using nlsolver::SANN;
// helper definition for GDType
using GDType = nlsolver::GradientStepType;
using nlsolver::BFGS;

// experimental solvers
using nlsolver::experimental::LevenbergMarquardt;
using nlsolver::experimental::NelderMeadPSO;

// RNG
using nlsolver::rng::xorshift;
Expand Down Expand Up @@ -72,6 +73,20 @@ struct std_MT {
};
*/

template <typename T>
void run_solver(T &solver, std::vector<double> init = {2, 5}) {
auto de_res = solver.minimize(init);
de_res.print();
print_vector(init);
}
template <typename T>
void run_solver(T &solver, std::vector<double> lower, std::vector<double> upper,
std::vector<double> init = {2, 5}) {
auto de_res = solver.minimize(init, lower, upper);
de_res.print();
print_vector(init);
}

int main() {
// define problem functor - in our case a variant of the rosenbrock function
Rosenbrock prob;
Expand All @@ -95,15 +110,8 @@ int main() {
};
auto nm_solver_lambda =
NelderMead<decltype(RosenbrockLambda), double>(RosenbrockLambda);
// initialize function arguments
nm_init = {2, 7};
// nm_init[0] = 2;
// nm_init[1] = 7;
auto nm_res_lambda = nm_solver_lambda.minimize(nm_init);
// check solver status
nm_res_lambda.print();
// and estimated function parameters
print_vector(nm_init);
// repeat with lambda function, using a simple wrapper defined above:
run_solver(nm_solver_lambda, {2, 7});

// use recombination strategy
using DEStrat = nlsolver::RecombinationStrategy;
Expand All @@ -114,12 +122,7 @@ int main() {
// again initialize solver, this time also with the RNG
auto de_solver =
DE<Rosenbrock, xorshift<double>, double, DEStrat::best>(prob, gen);

std::vector<double> de_init = {2, 7};

auto de_res = de_solver.minimize(de_init);
de_res.print();
print_vector(de_init);
run_solver(de_solver, {2, 7});

std::cout << "Particle Swarm Optimization with xoshiro: " << std::endl;
// we also have a xoshiro generator
Expand All @@ -131,22 +134,12 @@ int main() {
auto pso_solver = PSO<Rosenbrock, xoshiro<double>, double>(prob, xos_gen);
// set initial state - if no bounds are given, default initial parameters are
// taken roughly as the scale of the parameter space
std::vector<double> pso_init = {3, 3};
auto pso_res = pso_solver.minimize(pso_init);
pso_res.print();
print_vector(pso_init);
run_solver(pso_solver, {3, 3});
std::cout << "Particle Swarm Optimization with xoshiro (and bounds): "
<< std::endl;
// this tends to be much worse than not specifying bounds for PSO - so
// we heavily recommend those:
pso_init[0] = 0;
pso_init[1] = 0;
std::vector<double> pso_lower = {-1, -1};
std::vector<double> pso_upper = {1, 1};

pso_res = pso_solver.minimize(pso_init, pso_lower, pso_upper);
pso_res.print();
print_vector(pso_init);
run_solver(pso_solver, {-1, -1}, {1, 1}, {0, 0});
std::cout << "Accelerated Particle Swarm Optimization with xoshiro: "
<< std::endl;
// we also have an accelerated version - we reset the RNG as well.
Expand All @@ -156,21 +149,15 @@ int main() {
xos_gen);
// set initial state - if no bounds are given, default initial parameters are
// taken roughly as the scale of the parameter space
std::vector<double> apso_init = {3, 3};
auto apso_res = apso_solver.minimize(apso_init);
apso_res.print();
print_vector(apso_init);
run_solver(apso_solver, {3, 3});

std::cout << "Simulated Annealing with xoshiro: " << std::endl;
// we also have an accelerated version - we reset the RNG as well.
xos_gen.reset();
auto sann_solver = SANN<Rosenbrock, xoshiro<double>, double>(prob, xos_gen);
// set initial state - if no bounds are given, default initial parameters are
// taken roughly as the scale of the parameter space
std::vector<double> sann_init = {3, 3};
auto sann_res = sann_solver.minimize(sann_init);
sann_res.print();
print_vector(sann_init);
run_solver(sann_solver, {5, 5});

std::cout << "NelderMead-PSO hybrid with xoshiro: " << std::endl;
// we also have an accelerated version - we reset the RNG as well.
Expand All @@ -179,53 +166,39 @@ int main() {
NelderMeadPSO<Rosenbrock, xoshiro<double>, double>(prob, xos_gen);
// set initial state - if no bounds are given, default initial parameters are
// taken roughly as the scale of the parameter space
std::vector<double> nm_pso_init = {3, 3};
auto nm_pso_res = nm_pso_solver.minimize(nm_pso_init);
nm_pso_res.print();
print_vector(nm_pso_init);
run_solver(nm_pso_solver, {3, 3});

std::cout << "Gradient Descent without line-search using fixed step size: "
<< std::endl;
auto gd_solver_fixed =
GradientDescent<Rosenbrock, double, GDType::Fixed>(prob, 0.0005);
std::vector<double> gd_init_fixed = {2, 2};
auto gd_res_fixed = gd_solver_fixed.minimize(gd_init_fixed);
gd_res_fixed.print();
print_vector(gd_init_fixed);
run_solver(gd_solver_fixed, {2, 2});

std::cout << "Gradient Descent with line-search: " << std::endl;
auto gd_solver_linesearch =
GradientDescent<Rosenbrock, double, GDType::Linesearch>(prob);
std::vector<double> gd_init_linesearch = {2, 2};
auto gd_res_linesearch = gd_solver_linesearch.minimize(gd_init_linesearch);
gd_res_linesearch.print();
print_vector(gd_init_linesearch);
run_solver(gd_solver_linesearch, {2, 2});

std::cout
<< "Gradient Descent without line-search using big steps,"
<< " cycling through step-sizes (and lipschitz constant eyeballing): "
<< std::endl;
auto gd_solver_bigstep =
GradientDescent<Rosenbrock, double, GDType::Bigstep, 5, true>(prob);
std::vector<double> gd_init_bigstep = {2, 2};
auto gd_res_bigstep = gd_solver_bigstep.minimize(gd_init_bigstep);
gd_res_bigstep.print();
print_vector(gd_init_bigstep);
run_solver(gd_solver_bigstep, {2, 2});

std::cout << "Conjugated Gradient Descent (always requires linesearch)"
<< std::endl;
auto cgd_solver = ConjugatedGradientDescent<Rosenbrock, double>(prob);
std::vector<double> cgd_init = {2, 2};
auto cgd_res = cgd_solver.minimize(cgd_init);
cgd_res.print();
print_vector(cgd_init);
run_solver(cgd_solver, {2, 2});

std::cout << "BFGS (always requires linesearch)" << std::endl;
auto bfgs_solver = BFGS<Rosenbrock, double>(prob);
std::vector<double> bfgs_init = {2, 2};
auto bfgs_res = bfgs_solver.minimize(bfgs_init);
bfgs_res.print();
print_vector(bfgs_init);
run_solver(bfgs_solver, {2, 2});

std::cout << "LevenbergMarquardt (always requires hessian)" << std::endl;
auto lm_solver = LevenbergMarquardt<Rosenbrock, double>(prob);
run_solver(lm_solver, {2, 2});

return 0;
}
Loading

0 comments on commit cce339a

Please sign in to comment.