Skip to content

Commit

Permalink
Refacto api v3 with #![no_std]
Browse files Browse the repository at this point in the history
  • Loading branch information
geosarr committed Aug 2, 2024
1 parent fb29ec2 commit 81bf9b4
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 251 deletions.
8 changes: 4 additions & 4 deletions benches/steepest_descent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ fn armijo_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_armijo(0.01, 0.01);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, 1000);
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, Some(1000));
});
}

Expand All @@ -53,7 +53,7 @@ fn powell_wolfe_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_powell_wolfe(0.01, 0.1);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, 1000);
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, Some(1000));
});
}

Expand All @@ -64,7 +64,7 @@ fn adagrad_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_adagrad(0.1, 0.0001);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, 1000);
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, Some(1000));
});
}

Expand All @@ -75,6 +75,6 @@ fn adadelta_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_adadelta(0.1, 0.0001);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, 1000);
let _solution = descent(&f, &gradf, &x0, &params, 1e-6, Some(1000));
});
}
2 changes: 1 addition & 1 deletion py-tuutal/src/first_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ macro_rules! first_order_method {
&x0.as_array().to_owned(),
&DescentParameter::$name(gamma, beta),
gtol,
maxiter.unwrap_or(x0.len().unwrap() * 1000),
maxiter,
) {
Ok(value) => Ok(value.into_pyarray_bound(py)),
Err(error) => match error {
Expand Down
234 changes: 233 additions & 1 deletion src/first_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,236 @@ mod adaptive_descent;
mod macros;
mod steepest_descent;

pub use steepest_descent::{descent, Armijo, DescentParameter, PowellWolfe};
use core::ops::{Add, Mul};

use crate::{
traits::{VecDot, Vector},
Number, Optimizer, TuutalError,
};
pub use adaptive_descent::{AdaDelta, AdaGrad};
pub use steepest_descent::{Armijo, PowellWolfe};

/// Parameters used in the a descent method.
///
/// The **gamma** parameter represents the:
/// - magnitude of decrease in the objective function in the negative gradient direction for Armijo and Powell rules.
/// - general step size for AdaGrad rule.
/// - magnitude of decay for previous gradients in the AdaDelta algorithms.
///
/// The **beta** parameter controls the:
/// - step size magnitude of decrease in the Armijo rule algorithm.
/// - descent steepness for the Powell Wolfe strategy.
/// - tolerance factor of the update denominator in the AdaGrad and AdaDelta algorithms.
///
/// Use methods [`new_armijo`], [`new_powell_wolfe`], [`new_adagrad`] and [`new_adadelta`], to construct these parameters.
///
/// [`new_armijo`]: DescentParameter::new_armijo
///
/// [`new_powell_wolfe`]: DescentParameter::new_powell_wolfe
///
/// [`new_adagrad`]: DescentParameter::new_adagrad
///
/// [`new_adadelta`]: DescentParameter::new_adadelta
#[derive(Debug, Clone, Copy, PartialEq)]

pub enum DescentParameter<T> {
/// Armijo rule step size rule
///
/// At each step t, the step size is a scalar.
Armijo { gamma: T, beta: T },
/// Powell Wolfe step size rule
///
/// At each step t, the step size is a scalar.
PowellWolfe { gamma: T, beta: T },
/// Adaptive Gradient step size rule
///
/// At each step t, the vector step size is given by:
/// - adagrad_step_size<sub>t</sub> = gamma / ( sum<sub>k <= t</sub> g<sub>k</sub><sup>2</sup> + beta ).sqrt()
/// where g<sub>k</sub> is the gradient at step k.
AdaGrad { gamma: T, beta: T },
/// Adaptive Learning Rate DELTA step size rule
///
/// At each step t, the vector step size is given by:
/// - adadelta_step_size<sub>t</sub> = RMS(x<sub>t-1</sub>) / RMS(g<sub>t</sub>)
/// where :
/// - x<sub>k</sub> is the update at step k
/// - g<sub>k</sub> is the gradient of x<sub>k</sub>
/// - RMS(v<sub>t</sub>) = ( sum<sub>k <= t</sub> E[v<sup>2</sup>]<sub>k</sub> + beta ).sqrt()
/// - E[v<sup>2</sup>]<sub>k</sub> = gamma * E[v<sup>2</sup>]<sub>k-1</sub> + (1 - gamma) * v<sub>k</sub><sup>2</sup>
/// with E[v<sup>2</sup>]<sub>0</sub> = 0
AdaDelta { gamma: T, beta: T },
}

impl<T> Default for DescentParameter<T>
where
T: Number,
{
fn default() -> Self {
Self::Armijo {
gamma: T::exp_base(10, -3),
beta: T::cast_from_f32(0.5),
}
}
}

impl<T> DescentParameter<T>
where
T: Number,
{
/// Constructs an Armijo rule parameter.
///
/// # Panics
/// When one of these conditions is not satisfied:
/// - 0. < gamma < 1.
/// - 0. < beta < 1.
///
/// ```
/// use tuutal::DescentParameter;
/// let param = DescentParameter::new_armijo(0.1f32, 0.016f32);
/// ```
pub fn new_armijo(gamma: T, beta: T) -> Self {
assert!(
(T::zero() < gamma) && (gamma < T::one()),
"gamma should satisfy: 0. < gamma < 1."
);
assert!(
(T::zero() < beta) && (beta < T::one()),
"beta should satisfy: 0. < beta < 1."
);
Self::Armijo { gamma, beta }
}
/// Constructs a Powell-Wolfe rule parameter.
///
/// # Panics
/// When one of these conditions is not satisfied:
/// - 0. < gamma < 1/2
/// - gamma < beta < 1.
///
/// ```
/// use tuutal::DescentParameter;
/// let param = DescentParameter::new_powell_wolfe(0.01f32, 0.75f32);
/// ```
pub fn new_powell_wolfe(gamma: T, beta: T) -> Self {
assert!(
(T::zero() < gamma) && (gamma < T::cast_from_f32(0.5)),
"gamma should satisfy: 0 < gamma < 1/2"
);
assert!(
(gamma < beta) && (beta < T::one()),
"beta should satisfy: gamma < beta < 1."
);
Self::PowellWolfe { gamma, beta }
}
/// Constructs an AdaGrad rule parameter.
///
/// # Panics
/// When one of these conditions is not satisfied:
/// - 0. < gamma.
/// - beta > 0.
/// ```
/// use tuutal::DescentParameter;
/// let param = DescentParameter::new_adagrad(0.01f32, 0.0001);
/// ```
pub fn new_adagrad(gamma: T, beta: T) -> Self {
assert!(gamma > T::zero());
assert!(beta > T::zero());
Self::AdaGrad { gamma, beta }
}
/// Constructs an AdaDelta rule parameter.
///
/// # Panics
/// When one of these conditions is not satisfied:
/// - 0. < gamma < 1.
/// - beta > 0.
/// ```
/// use tuutal::DescentParameter;
/// let param = DescentParameter::new_adadelta(0.2f32, 0.04);
/// ```
pub fn new_adadelta(gamma: T, beta: T) -> Self {
assert!(gamma > T::zero() && gamma < T::one());
assert!(beta > T::zero());
Self::AdaDelta { gamma, beta }
}
}

/// A descent algorithm using some step size method.
///
/// It requires an initial guess x<sub>0</sub>.
/// ```
/// use tuutal::{array, descent, DescentParameter, Array1};
/// // Example from python scipy.optimize.minimize_scalar
/// let f = |x: &Array1<f32>| (x[0] - 2.) * x[0] * (x[0] + 2.).powi(2);
/// let gradf = |x: &Array1<f32>| array![2. * (x[0] + 2.) * (2. * x[0].powi(2) - x[0] - 1.)];
/// let x0 = &array![-1.];
///
/// let x_star = descent(
/// f,
/// gradf,
/// &x0,
/// &DescentParameter::new_armijo(1e-2, 0.25),
/// 1e-3,
/// Some(10),
/// ).unwrap();
/// assert!((-2. - x_star[0]).abs() < 1e-10);
///
/// let x_star = descent(
/// &f,
/// &gradf,
/// &x0,
/// &DescentParameter::new_powell_wolfe(1e-2, 0.9),
/// 1e-3,
/// Some(10),
/// ).unwrap();
/// assert!((-2. - x_star[0]).abs() < 1e-10);
///
/// let x0 = &array![-0.5];
/// let x_star = descent(f, gradf, &x0, &Default::default(), 1e-3, Some(10)).unwrap();
/// assert!((-0.5 - x_star[0]).abs() < 1e-10);
///
/// let x0 = &array![0.];
/// let x_star = descent(f, gradf, &x0, &Default::default(), 1e-3, Some(10)).unwrap();
/// assert!((1. - x_star[0]).abs() < 1e-10);
///
/// // It also takes multivariate objective functions
/// let f =
/// |arr: &Array1<f32>| 100. * (arr[1] - arr[0].powi(2)).powi(2) + (1. - arr[0]).powi(2);
/// let gradf = |arr: &Array1<f32>| {
/// array![
/// -400. * arr[0] * (arr[1] - arr[0].powi(2)) - 2. * (1. - arr[0]),
/// 200. * (arr[1] - arr[0].powi(2))
/// ]
/// };
/// let x = array![1f32, -0.5f32];
/// let opt = descent(f, gradf, &x, &Default::default(), 1e-3, Some(10000)).unwrap();
/// assert!((opt[0] - 1.).abs() <= 1e-2);
/// assert!((opt[1] - 1.).abs() <= 1e-2);
/// ```
pub fn descent<X, F, G>(
f: F,
gradf: G,
x0: &X,
params: &DescentParameter<X::Elem>,
gtol: X::Elem,
maxiter: Option<usize>,
) -> Result<X, TuutalError<X>>
where
X: Vector + Clone + VecDot<Output = X::Elem>,
for<'a> &'a X: Add<X, Output = X> + Mul<&'a X, Output = X> + Mul<X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
{
match params {
DescentParameter::Armijo { gamma, beta } => {
Armijo::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(maxiter)
}
DescentParameter::PowellWolfe { gamma, beta } => {
PowellWolfe::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(maxiter)
}
DescentParameter::AdaDelta { gamma, beta } => {
AdaDelta::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(maxiter)
}
DescentParameter::AdaGrad { gamma, beta } => {
AdaGrad::new(f, gradf, x0.clone(), *gamma, *beta, gtol).optimize(maxiter)
}
}
}
8 changes: 4 additions & 4 deletions src/first_order/adaptive_descent.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
mod adadelta;
mod adagrad;

pub(crate) use adadelta::AdaDelta;
pub(crate) use adagrad::AdaGrad;
pub use adadelta::AdaDelta;
pub use adagrad::AdaGrad;

pub(crate) const ACCUM_GRAD: &str = "accum_grad";
pub(crate) const ACCUM_UPDATE: &str = "accum_update";
pub const ACCUM_GRAD: &str = "accum_grad";
pub const ACCUM_UPDATE: &str = "accum_update";
Loading

0 comments on commit 81bf9b4

Please sign in to comment.