Skip to content

Commit

Permalink
Added objective function and its gradient *args like arguments.
Browse files Browse the repository at this point in the history
  • Loading branch information
geosarr committed Aug 17, 2024
1 parent 12d4167 commit c8e3bc3
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 85 deletions.
16 changes: 8 additions & 8 deletions benches/steepest_descent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@ use test::Bencher;
use tuutal::{descent, s, Array1, DescentParameter};

fn rosenbrock_nd() -> (
impl Fn(&Array1<f32>) -> f32,
impl Fn(&Array1<f32>) -> Array1<f32>,
impl Fn(&Array1<f32>, &()) -> f32,
impl Fn(&Array1<f32>, &()) -> Array1<f32>,
) {
// Rosenbrock function and its gradient function for arrays of size >= 3.
let f = |x: &Array1<f32>| {
let f = |x: &Array1<f32>, _: &()| {
let xi = x.slice(s![0..x.len() - 1]);
let xi_plus1 = x.slice(s![1..]);
let term = 100. * (xi_plus1.to_owned() - xi.map(|x| x.powi(2))).map(|x| x.powi(2))
+ (1. - xi.to_owned()).map(|x| x.powi(2));
term.sum()
};
let gradf = |x: &Array1<f32>| {
let gradf = |x: &Array1<f32>, _: &()| {
let n = x.len();
let first_deriv = |i: usize| -400. * x[i] * (x[i + 1] - x[i].powi(2)) - 2. * (1. - x[i]);
let last_deriv = |i: usize| 200. * (x[i] - x[i - 1].powi(2));
Expand All @@ -42,7 +42,7 @@ fn armijo_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_armijo(0.01, 0.01);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000));
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000), (), ());
});
}

Expand All @@ -53,7 +53,7 @@ fn powell_wolfe_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_powell_wolfe(0.01, 0.1);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000));
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000), (), ());
});
}

Expand All @@ -64,7 +64,7 @@ fn adagrad_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_adagrad(0.1, 0.0001);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000));
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000), (), ());
});
}

Expand All @@ -75,6 +75,6 @@ fn adadelta_bench(bench: &mut Bencher) {
let x0 = Array1::from_vec(vec![0_f32; LENGTH]);
let params = DescentParameter::new_adadelta(0.1, 0.0001);
bench.iter(|| {
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000));
let _solution = descent(&f, &gradf, &x0, params, 1e-6, Some(1000), (), ());
});
}
34 changes: 24 additions & 10 deletions src/first_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ where
/// ```
/// use tuutal::{array, descent, DescentParameter, Array1};
/// // Example from python scipy.optimize.minimize_scalar
/// let f = |x: &Array1<f32>| (x[0] - 2.) * x[0] * (x[0] + 2.).powi(2);
/// let gradf = |x: &Array1<f32>| array![2. * (x[0] + 2.) * (2. * x[0].powi(2) - x[0] - 1.)];
/// let f = |x: &Array1<f32>, _: &()| (x[0] - 2.) * x[0] * (x[0] + 2.).powi(2);
/// let gradf = |x: &Array1<f32>, _: &()| array![2. * (x[0] + 2.) * (2. * x[0].powi(2) - x[0] - 1.)];
/// let x0 = &array![-1.];
///
/// let (x_star, f_star) = descent(
Expand All @@ -178,6 +178,8 @@ where
/// DescentParameter::new_armijo(1e-2, 0.25),
/// 1e-3,
/// Some(10),
/// (),
/// ()
/// ).unwrap();
/// assert!((-2. - x_star[0]).abs() < 1e-10);
///
Expand All @@ -188,44 +190,48 @@ where
/// DescentParameter::new_powell_wolfe(1e-2, 0.9),
/// 1e-3,
/// Some(10),
/// (),
/// ()
/// ).unwrap();
/// assert!((-2. - x_star[0]).abs() < 1e-10);
///
/// let x0 = &array![-0.5];
/// let (x_star, f_star) = descent(f, gradf, &x0, Default::default(), 1e-3, Some(10)).unwrap();
/// let (x_star, f_star) = descent(f, gradf, &x0, Default::default(), 1e-3, Some(10), (), ()).unwrap();
/// assert!((-0.5 - x_star[0]).abs() < 1e-10);
///
/// let x0 = &array![0.];
/// let (x_star, f_star) = descent(f, gradf, &x0, Default::default(), 1e-3, Some(10)).unwrap();
/// let (x_star, f_star) = descent(f, gradf, &x0, Default::default(), 1e-3, Some(10), (), ()).unwrap();
/// assert!((1. - x_star[0]).abs() < 1e-10);
///
/// // It also takes multivariate objective functions
/// let f =
/// |arr: &Array1<f32>| 100. * (arr[1] - arr[0].powi(2)).powi(2) + (1. - arr[0]).powi(2);
/// let gradf = |arr: &Array1<f32>| {
/// |arr: &Array1<f32>, _: &()| 100. * (arr[1] - arr[0].powi(2)).powi(2) + (1. - arr[0]).powi(2);
/// let gradf = |arr: &Array1<f32>, _: &()| {
/// array![
/// -400. * arr[0] * (arr[1] - arr[0].powi(2)) - 2. * (1. - arr[0]),
/// 200. * (arr[1] - arr[0].powi(2))
/// ]
/// };
/// let x = array![1f32, -0.5f32];
/// let (x_star, f_star) = descent(f, gradf, &x, Default::default(), 1e-3, Some(10000)).unwrap();
/// let (x_star, f_star) = descent(f, gradf, &x, Default::default(), 1e-3, Some(10000), (), ()).unwrap();
/// assert!((x_star[0] - 1.).abs() <= 1e-2);
/// assert!((x_star[1] - 1.).abs() <= 1e-2);
/// ```
pub fn descent<X, F, G>(
pub fn descent<X, F, G, Farg, Garg>(
f: F,
gradf: G,
x0: &X,
params: DescentParameter<X::Elem>,
gtol: X::Elem,
maxiter: Option<usize>,
farg: Farg,
garg: Garg,
) -> Result<(X, X::Elem), TuutalError<X>>
where
X: Vector + Clone + VecDot<Output = X::Elem>,
for<'a> &'a X: Add<X, Output = X> + Mul<&'a X, Output = X> + Mul<X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
match params {
DescentParameter::Armijo { gamma, beta } => Armijo::new(
Expand All @@ -237,6 +243,8 @@ where
beta,
epsilon: gtol,
},
farg,
garg,
)
.optimize(maxiter),
DescentParameter::PowellWolfe { gamma, beta } => PowellWolfe::new(
Expand All @@ -248,6 +256,8 @@ where
beta,
epsilon: gtol,
},
farg,
garg,
)
.optimize(maxiter),
DescentParameter::AdaDelta { gamma, beta } => AdaDelta::new(
Expand All @@ -259,6 +269,8 @@ where
beta,
epsilon: gtol,
},
farg,
garg,
)
.optimize(maxiter),
DescentParameter::AdaGrad { gamma, beta } => AdaGrad::new(
Expand All @@ -270,6 +282,8 @@ where
beta,
epsilon: gtol,
},
farg,
garg,
)
.optimize(maxiter),
}
Expand Down
7 changes: 4 additions & 3 deletions src/first_order/adaptive_descent/adadelta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@ descent_rule!(

impl_optimizer_descent!(AdaDelta, X, AdaDeltaHyperParameter, AdaDeltaAccumulators<X>);

impl<X, F, G> AdaDelta<X, F, G, X, AdaDeltaHyperParameter<X::Elem>, AdaDeltaAccumulators<X>>
impl<X, F, G, Farg, Garg>
AdaDelta<X, F, G, X, AdaDeltaHyperParameter<X::Elem>, AdaDeltaAccumulators<X>, Farg, Garg>
where
X: Vector,
for<'b> &'b X: Add<X, Output = X> + Mul<&'b X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
pub(crate) fn step(&mut self) {
let (gamma, beta) = (self.hyper_params.gamma, self.hyper_params.beta);
Expand Down
7 changes: 4 additions & 3 deletions src/first_order/adaptive_descent/adagrad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ descent_rule!(
);
impl_optimizer_descent!(AdaGrad, X, AdaGradHyperParameter, AdaGradAccumulators<X>);

impl<X, F, G> AdaGrad<X, F, G, X, AdaGradHyperParameter<X::Elem>, AdaGradAccumulators<X>>
impl<X, F, G, Farg, Garg>
AdaGrad<X, F, G, X, AdaGradHyperParameter<X::Elem>, AdaGradAccumulators<X>, Farg, Garg>
where
X: Vector,
for<'b> &'b X: Add<X, Output = X> + Mul<&'b X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
pub(crate) fn step(&mut self) {
let (gamma, beta) = (self.hyper_params.gamma, self.hyper_params.beta);
Expand Down
75 changes: 41 additions & 34 deletions src/first_order/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ macro_rules! descent_rule {
($rule:ident, $step:ty, $sigma:expr, $hp:ident, $accum:ty, $accumnew:expr) => {
#[derive(Debug)]
#[allow(dead_code)]
pub struct $rule<X, F, G, S, H, A>
pub struct $rule<X, F, G, S, H, A, Farg = (), Garg = ()>
where
X: Vector,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
f: F, // objective function
gradf: G, // gradient of the objective function
Expand All @@ -15,47 +17,38 @@ macro_rules! descent_rule {
counter: Counter<usize>, // [nb of iterations, number of f calls, nb of gradf calls]
stop_metrics: X::Elem, // metrics used to stop the algorithm,
accumulators: A, // accumulators during corresponding algorithms.
farg: Farg,
garg: Garg,
}

impl<X, F, G> $rule<X, F, G, $step, $hp<X::Elem>, $accum>
impl<X, F, G, Farg, Garg> $rule<X, F, G, $step, $hp<X::Elem>, $accum, Farg, Garg>
where
X: Vector,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
#[allow(dead_code)]
pub(crate) fn func(&self, x: &X) -> X::Elem
where
F: Fn(&X) -> X::Elem,
{
let f = &self.f;
f(x)
}
pub(crate) fn grad(&self, x: &X) -> X
where
G: Fn(&X) -> X,
{
let g = &self.gradf;
g(x)
}
pub(crate) fn stop(&self) -> bool {
self.stop_metrics <= self.hyper_params.epsilon.powi(2)
}

pub fn new(f: F, gradf: G, x: X, hyper_params: $hp<X::Elem>) -> Self
where
X: Vector,
G: Fn(&X) -> X,
{
let neg_gradfx = -gradf(&x);
/// New algorithm
pub fn new(
f: F,
gradf: G,
x: X,
hyper_params: $hp<X::Elem>,
farg: Farg,
garg: Garg,
) -> Self {
let neg_gradfx = -gradf(&x, &garg);
let mut optimizer = Self {
f,
gradf,
x,
neg_gradfx: neg_gradfx,
neg_gradfx,
sigma: $sigma,
hyper_params,
counter: Counter::new(),
stop_metrics: X::Elem::infinity(),
accumulators: $accumnew,
farg,
garg,
};
optimizer.counter.gcalls += 1;
// Not needed when broadcasting is allowed ?
Expand All @@ -68,18 +61,31 @@ macro_rules! descent_rule {
// }
optimizer
}
#[allow(dead_code)]
pub(crate) fn func(&self, x: &X) -> X::Elem {
let f = &self.f;
f(x, &self.farg)
}
pub(crate) fn grad(&self, x: &X) -> X {
let g = &self.gradf;
g(x, &self.garg)
}
pub(crate) fn stop(&self) -> bool {
self.stop_metrics <= self.hyper_params.epsilon.powi(2)
}
}
};
}

macro_rules! impl_optimizer_descent {
($rule:ident, $step:ty, $hp:ident, $accum:ty) => {
impl<X, F, G> core::iter::Iterator for $rule<X, F, G, $step, $hp<X::Elem>, $accum>
impl<X, F, G, Farg, Garg> core::iter::Iterator
for $rule<X, F, G, $step, $hp<X::Elem>, $accum, Farg, Garg>
where
X: Vector + VecDot<Output = X::Elem> + Clone,
for<'b> &'b X: Add<X, Output = X> + Mul<&'b X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
type Item = X::Elem;
fn next(&mut self) -> Option<Self::Item> {
Expand All @@ -95,12 +101,13 @@ macro_rules! impl_optimizer_descent {
}
}
}
impl<X, F, G> Optimizer for $rule<X, F, G, $step, $hp<X::Elem>, $accum>
impl<X, F, G, Farg, Garg> Optimizer
for $rule<X, F, G, $step, $hp<X::Elem>, $accum, Farg, Garg>
where
X: Vector + VecDot<Output = X::Elem> + Clone,
for<'b> &'b X: Add<X, Output = X> + Mul<&'b X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
type Iterate = X;
type Intermediate = $step;
Expand Down
8 changes: 4 additions & 4 deletions src/first_order/steepest_descent/armijo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use crate::{
Counter, Optimizer,
};
use num_traits::{Float, One, Zero};

/// Hyperparameters used to compute step sizes in Armijo rule.
#[derive(Debug)]
pub struct ArmijoHyperParameter<T> {
Expand All @@ -25,12 +24,13 @@ descent_rule!(
);
impl_optimizer_descent!(Armijo, [<X as Vector>::Elem; 1], ArmijoHyperParameter, ());

impl<X, F, G> Armijo<X, F, G, [X::Elem; 1], ArmijoHyperParameter<X::Elem>, ()>
impl<X, F, G, Farg, Garg>
Armijo<X, F, G, [X::Elem; 1], ArmijoHyperParameter<X::Elem>, (), Farg, Garg>
where
X: Vector + VecDot<X, Output = X::Elem>,
for<'b> &'b X: Add<X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
pub(crate) fn step(&mut self) {
let mut sigma = X::Elem::one();
Expand Down
7 changes: 4 additions & 3 deletions src/first_order/steepest_descent/powell_wolfe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ descent_rule!(
);
impl_optimizer_descent!(PowellWolfe, [X::Elem; 1], PowellWolfeHyperParameter, ());

impl<X, F, G> PowellWolfe<X, F, G, [X::Elem; 1], PowellWolfeHyperParameter<X::Elem>, ()>
impl<X, F, G, Farg, Garg>
PowellWolfe<X, F, G, [X::Elem; 1], PowellWolfeHyperParameter<X::Elem>, (), Farg, Garg>
where
X: Vector + VecDot<X, Output = X::Elem> + Add<X, Output = X>,
for<'b> &'b X: Add<X, Output = X>,
F: Fn(&X) -> X::Elem,
G: Fn(&X) -> X,
F: Fn(&X, &Farg) -> X::Elem,
G: Fn(&X, &Garg) -> X,
{
pub(crate) fn step(&mut self) {
let mut sigma_minus = X::Elem::one();
Expand Down
Loading

0 comments on commit c8e3bc3

Please sign in to comment.