Skip to content

Commit

Permalink
*improve AVX2 optimizations of class ResizerFloatBilinear (part 3: ca…
Browse files Browse the repository at this point in the history
…se of small scale, channels = [4..7]).
  • Loading branch information
ermig1979 committed Jan 16, 2025
1 parent 2eb0085 commit c8b0b06
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
26 changes: 23 additions & 3 deletions src/Simd/SimdAvx2ResizerBilinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,7 @@ namespace Simd
if (_rowBuf)
{
size_t rs = _param.dstW * cn, rsH = AlignLo(rs, HF), rsF = AlignLo(rs, F);
size_t rs3 = rs - 3, rs6 = AlignLoAny(rs3, 6), rscn = rs - cn, cnHF = cn - HF;
float* pbx[2] = { _bx[0].data, _bx[1].data };
int32_t prev = -2;
for (size_t dy = 0; dy < _param.dstH; dy++, dst += dstStride)
Expand Down Expand Up @@ -900,10 +901,8 @@ namespace Simd
_mm_store_ps(pb + dx, _mm_add_ps(m0, m1));
}
}
else if (cn == 3 && rs > 3)
else if (cn == 3)
{
size_t rs3 = rs - 3;
size_t rs6 = AlignLoAny(rs3, 6);
for (; dx < rs6; dx += 6)
{
__m256 s0 = Load<false>(ps + _ix[dx + 0] + 0, ps + _ix[dx + 3] + 0);
Expand Down Expand Up @@ -940,6 +939,27 @@ namespace Simd
_mm_storeu_ps(pb + dx, _mm_add_ps(_mm_mul_ps(fx0, s0), _mm_mul_ps(fx1, s1)));
}
}
else if (cn < 8)
{
for (; dx < rscn; dx += cn)
{
const float* ps0 = ps + _ix[dx];
__m256 s0 = _mm256_loadu_ps(ps0);
__m256 s1 = _mm256_loadu_ps(ps0 + cn);
__m256 fx1 = _mm256_loadu_ps(_ax.data + dx);
__m256 fx0 = _mm256_sub_ps(_1, fx1);
_mm256_storeu_ps(pb + dx, _mm256_fmadd_ps(fx0, s0, _mm256_mul_ps(fx1, s1)));
}
for (; dx < rs; dx += cn)
{
const float* ps0 = ps + _ix[dx], * ps1 = ps0 + cn;
__m256 s0 = Load<false>(ps0, ps0 + cnHF);
__m256 s1 = Load<false>(ps1, ps1 + cnHF);
__m256 fx1 = Load<false>(_ax.data + dx, _ax.data + dx + cnHF);
__m256 fx0 = _mm256_sub_ps(_1, fx1);
Store<false>(pb + dx, pb + dx + cnHF, _mm256_fmadd_ps(fx0, s0, _mm256_mul_ps(fx1, s1)));
}
}
else if (!Avx2::SlowGather)
{
__m256i _cn = _mm256_set1_epi32((int)cn);
Expand Down
2 changes: 1 addition & 1 deletion src/Test/TestResize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ namespace Test

bool ResizerAutoTest(const FuncRS & f1, const FuncRS & f2)
{
//return ResizerAutoTest(SimdResizeMethodBilinear, SimdResizeChannelFloat, 2, f1, f2);
//return ResizerAutoTest(SimdResizeMethodBilinear, SimdResizeChannelFloat, 6, f1, f2);
bool result = true;

#if 1
Expand Down

0 comments on commit c8b0b06

Please sign in to comment.