Skip to content

Commit

Permalink
*improve AVX2 optimizations of class ResizerFloatBilinear (part 10: c…
Browse files Browse the repository at this point in the history
…ase of large scale, channels >= 8).
  • Loading branch information
ermig1979 committed Jan 17, 2025
1 parent 87bb494 commit 4edcec4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
28 changes: 23 additions & 5 deletions src/Simd/SimdAvx2ResizerBilinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1009,11 +1009,6 @@ namespace Simd
}
else
{
if (cn > 7)
{
Sse41::ResizerFloatBilinear::Run(src, srcStride, dst, dstStride);
return;
}
for (size_t dy = 0; dy < _param.dstH; dy++, dst += dstStride)
{
__m256 fy1 = _mm256_set1_ps(_ay[dy]);
Expand Down Expand Up @@ -1212,6 +1207,29 @@ namespace Simd
}
}
}
else
{
for (size_t dx = 0; dx < dw; dx++)
{
size_t os = _ix[dx], eF = os + cnF, od = dx * cn;
__m256 fx1 = _mm256_set1_ps(_ax[dx]);
__m256 fx0 = _mm256_sub_ps(_1, fx1);
for (; os < eF; os += F, od += F)
{
__m256 r0 = _mm256_fmadd_ps(_mm256_loadu_ps(src0 + os), fx0, _mm256_mul_ps(_mm256_loadu_ps(src0 + os + cn), fx1));
__m256 r1 = _mm256_fmadd_ps(_mm256_loadu_ps(src1 + os), fx0, _mm256_mul_ps(_mm256_loadu_ps(src1 + os + cn), fx1));
_mm256_storeu_ps(dst + od, _mm256_fmadd_ps(r0, fy0, _mm256_mul_ps(r1, fy1)));
}
if (cnTF)
{
os += cnLF;
od += cnLF;
__m256 r0 = _mm256_fmadd_ps(_mm256_loadu_ps(src0 + os), fx0, _mm256_mul_ps(_mm256_loadu_ps(src0 + os + cn), fx1));
__m256 r1 = _mm256_fmadd_ps(_mm256_loadu_ps(src1 + os), fx0, _mm256_mul_ps(_mm256_loadu_ps(src1 + os + cn), fx1));
_mm256_storeu_ps(dst + od, _mm256_fmadd_ps(r0, fy0, _mm256_mul_ps(r1, fy1)));
}
}
}
}
}
}
Expand Down
8 changes: 6 additions & 2 deletions src/Simd/SimdAvx512bwResizerBilinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,9 +732,9 @@ namespace Simd

void ResizerFloatBilinear::Run(const float* src, size_t srcStride, float* dst, size_t dstStride)
{
size_t cn = _param.channels;
if (_rowBuf)
{
size_t cn = _param.channels;
size_t rs = _param.dstW * cn;
float* pbx[2] = { _bx[0].data, _bx[1].data };
int32_t prev = -2;
Expand Down Expand Up @@ -861,7 +861,11 @@ namespace Simd
}
else
{
Avx2::ResizerFloatBilinear::Run(src, srcStride, dst, dstStride);
if (cn > 0)
{
Avx2::ResizerFloatBilinear::Run(src, srcStride, dst, dstStride);
return;
}
}
}

Expand Down

0 comments on commit 4edcec4

Please sign in to comment.