Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unroll search #2034

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
7 changes: 4 additions & 3 deletions include/eve/module/algo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@
#include <eve/module/algo/algo/copy_if.hpp>
#include <eve/module/algo/algo/equal.hpp>
#include <eve/module/algo/algo/fill.hpp>
#include <eve/module/algo/algo/find.hpp>
#include <eve/module/algo/algo/find_last.hpp>
#include <eve/module/algo/algo/for_each.hpp>
#include <eve/module/algo/algo/for_each_iteration.hpp>
#include <eve/module/algo/algo/find.hpp>
#include <eve/module/algo/algo/for_each_iteration_fixed_overflow.hpp>
#include <eve/module/algo/algo/for_each_iteration_with_expensive_optional_part.hpp>
#include <eve/module/algo/algo/for_each_iteration.hpp>
#include <eve/module/algo/algo/for_each_selected.hpp>
#include <eve/module/algo/algo/for_each.hpp>
#include <eve/module/algo/algo/inclusive_scan.hpp>
#include <eve/module/algo/algo/iota.hpp>
#include <eve/module/algo/algo/iterator_helpers.hpp>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
//==================================================================================================
/*
EVE - Expressive Vector Engine
Copyright : EVE Project Contributors
SPDX-License-Identifier: BSL-1.0
*/
//==================================================================================================
#pragma once

#include <eve/module/algo/algo/concepts.hpp>
#include <eve/module/algo/algo/traits.hpp>
#include <eve/module/core.hpp>

namespace eve::algo
{

namespace detail
{
struct for_each_iteration_with_expensive_optional_part_common
{
template<typename Traits, typename I, typename S> auto unroll_l(Traits, I f, S l)
{
return eve::unalign(f) + (l - f - get_unrolling<Traits>() * iterator_cardinal_v<I>);
}

template<typename Traits, typename I, typename S, typename Delegate>
EVE_FORCEINLINE bool no_unrolling_loop(Traits, I& f, S l, Delegate& delegate) const
{
while( f < l )
{
if( delegate.step(f, eve::ignore_none) ) return true;
f += iterator_cardinal_v<I>;
}
return false;
}

template<typename Traits, typename I, typename S, typename Delegate>
EVE_FORCEINLINE bool main_loop(Traits tr, I& f, auto unroll_l, S l, Delegate& delegate) const
requires(get_unrolling<Traits>() == 1)
{
(void)unroll_l;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can mark the paramter [[maybe_unused]], it is better than random cast to void IMO.

EVE_FORCEINLINE bool main_loop(Traits tr, I& f, [[maybe_unused]] auto unroll_l, S l, Delegate& delegate) const

return no_unrolling_loop(tr, f, l, delegate);
}

template<typename I, typename S, typename Delegate> struct unrolled_steps_lambda
{
I& f;
Delegate& delegate;

template<int i> EVE_FORCEINLINE bool operator()(std::integral_constant<int, i>)
{
if( delegate.step(f + i * iterator_cardinal_v<I>, eve::ignore_none) )
{
f += i * iterator_cardinal_v<I>;
return true;
}
return false;
}
};

template<typename Traits, typename I, typename S, typename Delegate>
EVE_FORCEINLINE bool main_loop(Traits tr, I& f, auto unroll_l, S l, Delegate& delegate) const
{
while( f <= unroll_l )
{
if( eve::detail::for_until_<0, 1, get_unrolling<Traits>()>(
unrolled_steps_lambda<I, S, Delegate> {f, delegate}) )
{
return true;
}
f += get_unrolling<Traits>() * iterator_cardinal_v<I>;
}

return no_unrolling_loop(tr, f, l, delegate);
}
};

template<typename Traits, iterator I, sentinel_for<I> S>
struct for_each_iteration_with_expensive_optional_part_precise_f_l
: for_each_iteration_with_expensive_optional_part_common
{
Traits traits;
I base;
I f;
S l;

for_each_iteration_with_expensive_optional_part_precise_f_l(Traits t, I i, S s)
: traits(t)
, base(i)
, f(i)
, l(s)
{
EVE_ASSERT(((l - f) % iterator_cardinal_v<I> == 0),
" len of the range is no divisible by cardinal "
<< "when `divisible by cardinal is passed`: " << "l - f: " << (l - f)
<< " iterator_cardinal_v<I>: " << iterator_cardinal_v<I>);
}

template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate)
{
auto unroll_l = this->unroll_l(traits, f, l);
goto main_loop;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wow, goto ? Care to explain the rationale here ? Is it codegen motivated?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure.

There are two reasons for goto in this file.

  1. This is "for each with expensive optional part".
    In case of search "expensive optional part" is a full needle check.

That code is quite large and we want to have exactly one copy of it in the binary ouptut.

Otherwise you'd have multiple copies: for tails and for the main part.

  1. To make "expensive part triggers every time" better.

It is entierly possible that the "expensive part" (i.e. needle check) triggers almost every 32 bytes.

In which case the default code would be:

main_part:
if (!precheck) goto expensive_part;
if (!precheck) goto expensive_part;
if (!precheck) goto expensive_part;
if (!precheck) goto expensive_part;
expensive_part:
  expensive_check();
  goto main_part;

while we do:

goto main_part;
expensive_part:
  expensive_check();
main_part:
if (!precheck) goto expensive_part;
if (!precheck) goto expensive_part;
if (!precheck) goto expensive_part;
if (!precheck) goto expensive_part;

This way we effectively wrote

do {
  expensive_check()
} while (precheck);

Which is -1 jmp and is nicer.

I have a "trigger expensive check all the time" measurement where this branch is better than main. I can't tell you if it's because of this structure or other things. But I like it.

I originally found this loop form when workin on merge and I think it's cute.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sound good to me !


while( true )
{
// expensive part before main loop should help when expensive part
// it forms a separate while loop.
if( delegate.expensive_part(f) ) return;
f += iterator_cardinal_v<I>;
main_loop:
if( !this->main_loop(traits, f, unroll_l, l, delegate) ) return;
}
}
};

template<typename Traits, iterator I, sentinel_for<I> S>
struct for_each_iteration_with_expensive_optional_part_precise_f
: for_each_iteration_with_expensive_optional_part_common
{
Traits traits;
I base;
I f;
S l;

for_each_iteration_with_expensive_optional_part_precise_f(Traits t, I i, S s)
: traits(t)
, base(i)
, f(i)
, l(s)
{}

template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate)
{
I precise_l = f + (((l - f) / iterator_cardinal_v<I>)*iterator_cardinal_v<I>);
auto unroll_l = this->unroll_l(traits, f, l);
goto main_loop;

// expensive part before main loop should help when expensive part
// it forms a separate while loop.
expensive_part:
if( delegate.expensive_part(f) ) return;
f += iterator_cardinal_v<I>;
main_loop:
if( this->main_loop(traits, f, unroll_l, precise_l, delegate) ) { goto expensive_part; }

if( precise_l == l ) return;
{
eve::keep_first ignore {l - precise_l};
if( !delegate.step(f, ignore) ) { return; }

// hack to exit after the `expensive_part` without any extra checks.
l = precise_l;
goto expensive_part;
}
}
};

template<typename Traits, iterator I, sentinel_for<I> S>
struct for_each_iteration_with_expensive_optional_part_aligning
: for_each_iteration_with_expensive_optional_part_common
{
Traits traits;
I base;
I f;
S l;

for_each_iteration_with_expensive_optional_part_aligning(Traits t, I i, S s)
: traits(t)
, base(i.previous_partially_aligned())
, f(i)
, l(s)
{}

template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate)
{
auto aligned_f = base;
auto aligned_l = (f + (l - f)).previous_partially_aligned();
auto unroll_l = this->unroll_l(traits, f, l);

eve::ignore_first ignore_first {f - aligned_f};

if( aligned_f != aligned_l )
{
{
bool first_step_res = delegate.step(aligned_f, ignore_first);
ignore_first = eve::ignore_first {0};
if( !first_step_res )
{
aligned_f += iterator_cardinal_v<I>;
goto main_loop;
}
}

// expensive part before main loop should help when expensive part
// it forms a separate while loop.
expensive_part:
if( delegate.expensive_part(aligned_f) ) return;
aligned_f += iterator_cardinal_v<I>;
main_loop:
// handles aligned_f == aligned_l
if( this->main_loop(traits, aligned_f, unroll_l, aligned_l, delegate) ) goto expensive_part;
}

if( aligned_l == l ) return;
{
eve::ignore_last ignore_last {aligned_l + iterator_cardinal_v<I> - l};
if( !delegate.step(aligned_l, ignore_first && ignore_last) ) return;
l = aligned_l; // hack that pevents comming here after the expensive part
goto expensive_part;
}
}
};
}

//================================================================================================
//! @addtogroup algos
//! @{
//! @var for_each_iteration_with_expensive_optional_part
//!
//! @brief low level util for writing algorithms. A variation on for_each_iteration that has a
//! place for work we don't want duplicated in assembly.
//!
//! **Defined in Header**
//!
//! @code
//! #include <eve/module/algo.hpp>
//! @endcode
//!
//! `for_each_iteration`, even if not unrolled, generates a few copies of the
//! callback code. For some algorithms we want to move out a piece of callback code
//! but we still don't want a function call. Think search: we want to move the more
//! expensive part of validating match outside.
//!
//! You can find example usage in the search implementation.
//! @}
//================================================================================================
struct
{
template<typename Traits, iterator I, sentinel_for<I> S>
auto operator()(Traits traits, I f, S l) const
{
EVE_ASSERT(f != l,
"for_each_iteration_with_expensive_optional_part requires a non-empty range");
if constexpr( !Traits::contains(no_aligning) && !partially_aligned_iterator<I> )
{
return detail::for_each_iteration_with_expensive_optional_part_aligning {traits, f, l};
}
else if constexpr( Traits::contains(divisible_by_cardinal) )
{
return detail::for_each_iteration_with_expensive_optional_part_precise_f_l {traits, f, l};
}
else
{
return detail::for_each_iteration_with_expensive_optional_part_precise_f {traits, f, l};
}
}
} inline constexpr for_each_iteration_with_expensive_optional_part;

}
Loading
Loading