Add LM, clean up README, a few fixes

JSzitas · Mar 31, 2024 · cce339a · cce339a
1 parent 42630a8
commit cce339a
Show file tree

Hide file tree

Showing 6 changed files with 1,220 additions and 1,123 deletions.
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/nlsolver.iml b/.idea/nlsolver.iml
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -2,5 +2,7 @@ cmake_minimum_required(VERSION 3.26)
 project(nlsolver)
 
 set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 add_library(nlsolver example.cpp)
+
diff --git a/README.md b/README.md
@@ -21,14 +21,14 @@ Just copy the header into your project, include and use:
   + Vanilla
   + Accelerated
 * Simulated Annealing 
-  + Currently without option for custom sample generators, only using the Markov Gaussian Kernel 
+  + Currently, without option for custom sample generators, only using the Markov Gaussian Kernel 
 * Nelder-Mead PSO hybrid
   + Might under-perform other solvers, but should do better than vanilla Nelder-Mead on problems with 
   many local minima where Nelder-Mead can get stuck
 * Gradient Descent 
-  + Several flavours, including Vanilla with fixed steps, steps optimized using MoreThuente linesearch, 
+  + Several flavours, including Vanilla with fixed steps, optimized using MoreThuente linesearch, 
   and even an implementation of [Provably Faster Gradient Descent via Long Steps](https://arxiv.org/abs/2307.06324) 
-  in case you have a particularly well behaved smooth convex problem.
+  in case you have a particularly well-behaved smooth convex problem.
 
 ## Experimental 
 
@@ -105,7 +105,7 @@ And run the examples from the command line
 
 There are some design decisions in this library which warrant discussion: 
 
-* the objective functions to minimize/ maximize are passed as objects, prefering functors, 
+* the objective functions to minimize/ maximize are passed as objects, preferring functors, 
 requiring an overloaded **public** **()** operator which takes a **std::vector<T>**, e.g. 
 ```cpp
 struct example {
@@ -119,7 +119,7 @@ note that this will also work with lambda functions, and a struct/class is not s
 * there are no virtual calls in this library - thus incurring no performance penalty
 * each function exposes both a minimization and a maximization interface, and maximization is 
   always implemented as 'negative minimization', i.e. by multiplying the objective function by -1
-* all optimizers come with default arguments that try to be sane and user friendly - but expert 
+* all optimizers come with default arguments that try to be sane and user-friendly - but expert 
   users are highly encouraged to supply their own values
 * currently no multi-threading is supported - this is by design as functors are *potentially*
   stateful objective functions and multi-threading would require ensuring no data races happen
@@ -137,10 +137,10 @@ impure objective functions).
 
 # Notes
 
-[^lambda_note]: This flexibility is included for cases where you want to implicitly bundle mutable data within 
+[^lambda_note] This flexibility is included for cases where you want to implicitly bundle mutable data within 
 the struct, and do not want to have to pass the data (e.g. through a pointer) to your objective function. 
-This makes the overall design cleaner - if your objective function needs data, mainstains state, or 
+This makes the overall design cleaner - if your objective function needs data, maintains state, or 
 does anything else on evaluation, you can keep the entirety of that within the struct (and even extract it 
-after the solver finishes). If you do not need the functionality and you simply want to optimize some ad-hoc function, using 
+after the solver finishes). If you do not need the functionality, and you simply want to optimize some ad-hoc function, using 
 a lambda is probably much simpler and cleaner. 
 
diff --git a/example.cpp b/example.cpp
@@ -22,14 +22,15 @@ using nlsolver::ConjugatedGradientDescent;
 using nlsolver::DE;
 using nlsolver::GradientDescent;
 using nlsolver::NelderMead;
-using nlsolver::NelderMeadPSO;
 using nlsolver::PSO;
 using nlsolver::SANN;
 // helper definition for GDType
 using GDType = nlsolver::GradientStepType;
 using nlsolver::BFGS;
 
 // experimental solvers
+using nlsolver::experimental::LevenbergMarquardt;
+using nlsolver::experimental::NelderMeadPSO;
 
 // RNG
 using nlsolver::rng::xorshift;
@@ -72,6 +73,20 @@ struct std_MT {
 };
  */
 
+template <typename T>
+void run_solver(T &solver, std::vector<double> init = {2, 5}) {
+  auto de_res = solver.minimize(init);
+  de_res.print();
+  print_vector(init);
+}
+template <typename T>
+void run_solver(T &solver, std::vector<double> lower, std::vector<double> upper,
+                std::vector<double> init = {2, 5}) {
+  auto de_res = solver.minimize(init, lower, upper);
+  de_res.print();
+  print_vector(init);
+}
+
 int main() {
   // define problem functor - in our case a variant of the rosenbrock function
   Rosenbrock prob;
@@ -95,15 +110,8 @@ int main() {
   };
   auto nm_solver_lambda =
       NelderMead<decltype(RosenbrockLambda), double>(RosenbrockLambda);
-  // initialize function arguments
-  nm_init = {2, 7};
-  // nm_init[0] = 2;
-  // nm_init[1] = 7;
-  auto nm_res_lambda = nm_solver_lambda.minimize(nm_init);
-  // check solver status
-  nm_res_lambda.print();
-  // and estimated function parameters
-  print_vector(nm_init);
+  // repeat with lambda function, using a simple wrapper defined above:
+  run_solver(nm_solver_lambda, {2, 7});
 
   // use recombination strategy
   using DEStrat = nlsolver::RecombinationStrategy;
@@ -114,12 +122,7 @@ int main() {
   // again initialize solver, this time also with the RNG
   auto de_solver =
       DE<Rosenbrock, xorshift<double>, double, DEStrat::best>(prob, gen);
-
-  std::vector<double> de_init = {2, 7};
-
-  auto de_res = de_solver.minimize(de_init);
-  de_res.print();
-  print_vector(de_init);
+  run_solver(de_solver, {2, 7});
 
   std::cout << "Particle Swarm Optimization with xoshiro: " << std::endl;
   // we also have a xoshiro generator
@@ -131,22 +134,12 @@ int main() {
   auto pso_solver = PSO<Rosenbrock, xoshiro<double>, double>(prob, xos_gen);
   // set initial state - if no bounds are given, default initial parameters are
   // taken roughly as the scale of the parameter space
-  std::vector<double> pso_init = {3, 3};
-  auto pso_res = pso_solver.minimize(pso_init);
-  pso_res.print();
-  print_vector(pso_init);
+  run_solver(pso_solver, {3, 3});
   std::cout << "Particle Swarm Optimization with xoshiro (and bounds): "
             << std::endl;
   // this tends to be much worse than not specifying bounds for PSO - so
   // we heavily recommend those:
-  pso_init[0] = 0;
-  pso_init[1] = 0;
-  std::vector<double> pso_lower = {-1, -1};
-  std::vector<double> pso_upper = {1, 1};
-
-  pso_res = pso_solver.minimize(pso_init, pso_lower, pso_upper);
-  pso_res.print();
-  print_vector(pso_init);
+  run_solver(pso_solver, {-1, -1}, {1, 1}, {0, 0});
   std::cout << "Accelerated Particle Swarm Optimization with xoshiro: "
             << std::endl;
   // we also have an accelerated version - we reset the RNG as well.
@@ -156,21 +149,15 @@ int main() {
                                                                      xos_gen);
   // set initial state - if no bounds are given, default initial parameters are
   // taken roughly as the scale of the parameter space
-  std::vector<double> apso_init = {3, 3};
-  auto apso_res = apso_solver.minimize(apso_init);
-  apso_res.print();
-  print_vector(apso_init);
+  run_solver(apso_solver, {3, 3});
 
   std::cout << "Simulated Annealing with xoshiro: " << std::endl;
   // we also have an accelerated version - we reset the RNG as well.
   xos_gen.reset();
   auto sann_solver = SANN<Rosenbrock, xoshiro<double>, double>(prob, xos_gen);
   // set initial state - if no bounds are given, default initial parameters are
   // taken roughly as the scale of the parameter space
-  std::vector<double> sann_init = {3, 3};
-  auto sann_res = sann_solver.minimize(sann_init);
-  sann_res.print();
-  print_vector(sann_init);
+  run_solver(sann_solver, {5, 5});
 
   std::cout << "NelderMead-PSO hybrid with xoshiro: " << std::endl;
   // we also have an accelerated version - we reset the RNG as well.
@@ -179,53 +166,39 @@ int main() {
       NelderMeadPSO<Rosenbrock, xoshiro<double>, double>(prob, xos_gen);
   // set initial state - if no bounds are given, default initial parameters are
   // taken roughly as the scale of the parameter space
-  std::vector<double> nm_pso_init = {3, 3};
-  auto nm_pso_res = nm_pso_solver.minimize(nm_pso_init);
-  nm_pso_res.print();
-  print_vector(nm_pso_init);
+  run_solver(nm_pso_solver, {3, 3});
 
   std::cout << "Gradient Descent without line-search using fixed step size: "
             << std::endl;
   auto gd_solver_fixed =
       GradientDescent<Rosenbrock, double, GDType::Fixed>(prob, 0.0005);
-  std::vector<double> gd_init_fixed = {2, 2};
-  auto gd_res_fixed = gd_solver_fixed.minimize(gd_init_fixed);
-  gd_res_fixed.print();
-  print_vector(gd_init_fixed);
+  run_solver(gd_solver_fixed, {2, 2});
 
   std::cout << "Gradient Descent with line-search: " << std::endl;
   auto gd_solver_linesearch =
       GradientDescent<Rosenbrock, double, GDType::Linesearch>(prob);
-  std::vector<double> gd_init_linesearch = {2, 2};
-  auto gd_res_linesearch = gd_solver_linesearch.minimize(gd_init_linesearch);
-  gd_res_linesearch.print();
-  print_vector(gd_init_linesearch);
+  run_solver(gd_solver_linesearch, {2, 2});
 
   std::cout
       << "Gradient Descent without line-search using big steps,"
       << " cycling through step-sizes (and lipschitz constant eyeballing): "
       << std::endl;
   auto gd_solver_bigstep =
       GradientDescent<Rosenbrock, double, GDType::Bigstep, 5, true>(prob);
-  std::vector<double> gd_init_bigstep = {2, 2};
-  auto gd_res_bigstep = gd_solver_bigstep.minimize(gd_init_bigstep);
-  gd_res_bigstep.print();
-  print_vector(gd_init_bigstep);
+  run_solver(gd_solver_bigstep, {2, 2});
 
   std::cout << "Conjugated Gradient Descent (always requires linesearch)"
             << std::endl;
   auto cgd_solver = ConjugatedGradientDescent<Rosenbrock, double>(prob);
-  std::vector<double> cgd_init = {2, 2};
-  auto cgd_res = cgd_solver.minimize(cgd_init);
-  cgd_res.print();
-  print_vector(cgd_init);
+  run_solver(cgd_solver, {2, 2});
 
   std::cout << "BFGS (always requires linesearch)" << std::endl;
   auto bfgs_solver = BFGS<Rosenbrock, double>(prob);
-  std::vector<double> bfgs_init = {2, 2};
-  auto bfgs_res = bfgs_solver.minimize(bfgs_init);
-  bfgs_res.print();
-  print_vector(bfgs_init);
+  run_solver(bfgs_solver, {2, 2});
+
+  std::cout << "LevenbergMarquardt (always requires hessian)" << std::endl;
+  auto lm_solver = LevenbergMarquardt<Rosenbrock, double>(prob);
+  run_solver(lm_solver, {2, 2});
 
   return 0;
 }