-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
unroll search #2034
base: main
Are you sure you want to change the base?
unroll search #2034
Changes from all commits
0ec5998
302e338
8608f27
4a77d21
56d779b
114ba84
258ba9f
1dd186b
7fab58c
97b8e32
50a7110
7d2c7c4
4d816ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
//================================================================================================== | ||
/* | ||
EVE - Expressive Vector Engine | ||
Copyright : EVE Project Contributors | ||
SPDX-License-Identifier: BSL-1.0 | ||
*/ | ||
//================================================================================================== | ||
#pragma once | ||
|
||
#include <eve/module/algo/algo/concepts.hpp> | ||
#include <eve/module/algo/algo/traits.hpp> | ||
#include <eve/module/core.hpp> | ||
|
||
namespace eve::algo | ||
{ | ||
|
||
namespace detail | ||
{ | ||
struct for_each_iteration_with_expensive_optional_part_common | ||
{ | ||
template<typename Traits, typename I, typename S> auto unroll_l(Traits, I f, S l) | ||
{ | ||
return eve::unalign(f) + (l - f - get_unrolling<Traits>() * iterator_cardinal_v<I>); | ||
} | ||
|
||
template<typename Traits, typename I, typename S, typename Delegate> | ||
EVE_FORCEINLINE bool no_unrolling_loop(Traits, I& f, S l, Delegate& delegate) const | ||
{ | ||
while( f < l ) | ||
{ | ||
if( delegate.step(f, eve::ignore_none) ) return true; | ||
f += iterator_cardinal_v<I>; | ||
} | ||
return false; | ||
} | ||
|
||
template<typename Traits, typename I, typename S, typename Delegate> | ||
EVE_FORCEINLINE bool main_loop(Traits tr, I& f, auto unroll_l, S l, Delegate& delegate) const | ||
requires(get_unrolling<Traits>() == 1) | ||
{ | ||
(void)unroll_l; | ||
return no_unrolling_loop(tr, f, l, delegate); | ||
} | ||
|
||
template<typename I, typename S, typename Delegate> struct unrolled_steps_lambda | ||
{ | ||
I& f; | ||
Delegate& delegate; | ||
|
||
template<int i> EVE_FORCEINLINE bool operator()(std::integral_constant<int, i>) | ||
{ | ||
if( delegate.step(f + i * iterator_cardinal_v<I>, eve::ignore_none) ) | ||
{ | ||
f += i * iterator_cardinal_v<I>; | ||
return true; | ||
} | ||
return false; | ||
} | ||
}; | ||
|
||
template<typename Traits, typename I, typename S, typename Delegate> | ||
EVE_FORCEINLINE bool main_loop(Traits tr, I& f, auto unroll_l, S l, Delegate& delegate) const | ||
{ | ||
while( f <= unroll_l ) | ||
{ | ||
if( eve::detail::for_until_<0, 1, get_unrolling<Traits>()>( | ||
unrolled_steps_lambda<I, S, Delegate> {f, delegate}) ) | ||
{ | ||
return true; | ||
} | ||
f += get_unrolling<Traits>() * iterator_cardinal_v<I>; | ||
} | ||
|
||
return no_unrolling_loop(tr, f, l, delegate); | ||
} | ||
}; | ||
|
||
template<typename Traits, iterator I, sentinel_for<I> S> | ||
struct for_each_iteration_with_expensive_optional_part_precise_f_l | ||
: for_each_iteration_with_expensive_optional_part_common | ||
{ | ||
Traits traits; | ||
I base; | ||
I f; | ||
S l; | ||
|
||
for_each_iteration_with_expensive_optional_part_precise_f_l(Traits t, I i, S s) | ||
: traits(t) | ||
, base(i) | ||
, f(i) | ||
, l(s) | ||
{ | ||
EVE_ASSERT(((l - f) % iterator_cardinal_v<I> == 0), | ||
" len of the range is no divisible by cardinal " | ||
<< "when `divisible by cardinal is passed`: " << "l - f: " << (l - f) | ||
<< " iterator_cardinal_v<I>: " << iterator_cardinal_v<I>); | ||
} | ||
|
||
template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate) | ||
{ | ||
auto unroll_l = this->unroll_l(traits, f, l); | ||
goto main_loop; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wow, goto ? Care to explain the rationale here ? Is it codegen motivated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure. There are two reasons for goto in this file.
That code is quite large and we want to have exactly one copy of it in the binary ouptut. Otherwise you'd have multiple copies: for tails and for the main part.
It is entierly possible that the "expensive part" (i.e. needle check) triggers almost every 32 bytes. In which case the default code would be:
while we do:
This way we effectively wrote
Which is -1 jmp and is nicer. I have a "trigger expensive check all the time" measurement where this branch is better than main. I can't tell you if it's because of this structure or other things. But I like it. I originally found this loop form when workin on merge and I think it's cute. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sound good to me ! |
||
|
||
while( true ) | ||
{ | ||
// expensive part before main loop should help when expensive part | ||
// it forms a separate while loop. | ||
if( delegate.expensive_part(f) ) return; | ||
f += iterator_cardinal_v<I>; | ||
main_loop: | ||
if( !this->main_loop(traits, f, unroll_l, l, delegate) ) return; | ||
} | ||
} | ||
}; | ||
|
||
template<typename Traits, iterator I, sentinel_for<I> S> | ||
struct for_each_iteration_with_expensive_optional_part_precise_f | ||
: for_each_iteration_with_expensive_optional_part_common | ||
{ | ||
Traits traits; | ||
I base; | ||
I f; | ||
S l; | ||
|
||
for_each_iteration_with_expensive_optional_part_precise_f(Traits t, I i, S s) | ||
: traits(t) | ||
, base(i) | ||
, f(i) | ||
, l(s) | ||
{} | ||
|
||
template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate) | ||
{ | ||
I precise_l = f + (((l - f) / iterator_cardinal_v<I>)*iterator_cardinal_v<I>); | ||
auto unroll_l = this->unroll_l(traits, f, l); | ||
goto main_loop; | ||
|
||
// expensive part before main loop should help when expensive part | ||
// it forms a separate while loop. | ||
expensive_part: | ||
if( delegate.expensive_part(f) ) return; | ||
f += iterator_cardinal_v<I>; | ||
main_loop: | ||
if( this->main_loop(traits, f, unroll_l, precise_l, delegate) ) { goto expensive_part; } | ||
|
||
if( precise_l == l ) return; | ||
{ | ||
eve::keep_first ignore {l - precise_l}; | ||
if( !delegate.step(f, ignore) ) { return; } | ||
|
||
// hack to exit after the `expensive_part` without any extra checks. | ||
l = precise_l; | ||
goto expensive_part; | ||
} | ||
} | ||
}; | ||
|
||
template<typename Traits, iterator I, sentinel_for<I> S> | ||
struct for_each_iteration_with_expensive_optional_part_aligning | ||
: for_each_iteration_with_expensive_optional_part_common | ||
{ | ||
Traits traits; | ||
I base; | ||
I f; | ||
S l; | ||
|
||
for_each_iteration_with_expensive_optional_part_aligning(Traits t, I i, S s) | ||
: traits(t) | ||
, base(i.previous_partially_aligned()) | ||
, f(i) | ||
, l(s) | ||
{} | ||
|
||
template<typename Delegate> EVE_FORCEINLINE void operator()(Delegate& delegate) | ||
{ | ||
auto aligned_f = base; | ||
auto aligned_l = (f + (l - f)).previous_partially_aligned(); | ||
auto unroll_l = this->unroll_l(traits, f, l); | ||
|
||
eve::ignore_first ignore_first {f - aligned_f}; | ||
|
||
if( aligned_f != aligned_l ) | ||
{ | ||
{ | ||
bool first_step_res = delegate.step(aligned_f, ignore_first); | ||
ignore_first = eve::ignore_first {0}; | ||
if( !first_step_res ) | ||
{ | ||
aligned_f += iterator_cardinal_v<I>; | ||
goto main_loop; | ||
} | ||
} | ||
|
||
// expensive part before main loop should help when expensive part | ||
// it forms a separate while loop. | ||
expensive_part: | ||
if( delegate.expensive_part(aligned_f) ) return; | ||
aligned_f += iterator_cardinal_v<I>; | ||
main_loop: | ||
// handles aligned_f == aligned_l | ||
if( this->main_loop(traits, aligned_f, unroll_l, aligned_l, delegate) ) goto expensive_part; | ||
} | ||
|
||
if( aligned_l == l ) return; | ||
{ | ||
eve::ignore_last ignore_last {aligned_l + iterator_cardinal_v<I> - l}; | ||
if( !delegate.step(aligned_l, ignore_first && ignore_last) ) return; | ||
l = aligned_l; // hack that pevents comming here after the expensive part | ||
goto expensive_part; | ||
} | ||
} | ||
}; | ||
} | ||
|
||
//================================================================================================ | ||
//! @addtogroup algos | ||
//! @{ | ||
//! @var for_each_iteration_with_expensive_optional_part | ||
//! | ||
//! @brief low level util for writing algorithms. A variation on for_each_iteration that has a | ||
//! place for work we don't want duplicated in assembly. | ||
//! | ||
//! **Defined in Header** | ||
//! | ||
//! @code | ||
//! #include <eve/module/algo.hpp> | ||
//! @endcode | ||
//! | ||
//! `for_each_iteration`, even if not unrolled, generates a few copies of the | ||
//! callback code. For some algorithms we want to move out a piece of callback code | ||
//! but we still don't want a function call. Think search: we want to move the more | ||
//! expensive part of validating match outside. | ||
//! | ||
//! You can find example usage in the search implementation. | ||
//! @} | ||
//================================================================================================ | ||
struct | ||
{ | ||
template<typename Traits, iterator I, sentinel_for<I> S> | ||
auto operator()(Traits traits, I f, S l) const | ||
{ | ||
EVE_ASSERT(f != l, | ||
"for_each_iteration_with_expensive_optional_part requires a non-empty range"); | ||
if constexpr( !Traits::contains(no_aligning) && !partially_aligned_iterator<I> ) | ||
{ | ||
return detail::for_each_iteration_with_expensive_optional_part_aligning {traits, f, l}; | ||
} | ||
else if constexpr( Traits::contains(divisible_by_cardinal) ) | ||
{ | ||
return detail::for_each_iteration_with_expensive_optional_part_precise_f_l {traits, f, l}; | ||
} | ||
else | ||
{ | ||
return detail::for_each_iteration_with_expensive_optional_part_precise_f {traits, f, l}; | ||
} | ||
} | ||
} inline constexpr for_each_iteration_with_expensive_optional_part; | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can mark the paramter [[maybe_unused]], it is better than random cast to void IMO.