Skip to content

Commit

Permalink
*fix bug: Error in AVX-512BW optimizations of function SynetSoftmaxLa…
Browse files Browse the repository at this point in the history
…yerForward.
  • Loading branch information
ermig1979 committed Nov 20, 2023
1 parent 42b7706 commit e6f080d
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 6 deletions.
1 change: 1 addition & 0 deletions docs/2023.html
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ <h4>Algorithms</h4>
<h5>Bug fixing</h5>
<ul>
<li>Wrong order of SIMD_DEPRECATED macro.</li>
<li>Error in AVX-512BW optimizations of function SynetSoftmaxLayerForward.</li>
</ul>

<a href="#HOME">Home</a>
Expand Down
1 change: 1 addition & 0 deletions src/Simd/SimdAvx512bwSynetSoftmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ namespace Simd
max = _mm512_max_ps(max, a4);
max = _mm512_max_ps(max, a5);
max = _mm512_max_ps(max, a6);
max = _mm512_max_ps(max, a7);
max = _mm512_max_ps(max, a8);
max = _mm512_max_ps(max, a9);
max = _mm512_max_ps(max, aA);
Expand Down
8 changes: 2 additions & 6 deletions src/Simd/SimdExp.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace Simd
{
class Exp
{
__m128i _exponent, _mantissa, _127;
__m128i _127;
__m128 _1_0, _0_5, _min, _max, _exp0, _exp1, _exp2, _exp3, _exp4, _exp5, _k;

SIMD_INLINE __m128 Poly5(__m128 x) const
Expand Down Expand Up @@ -75,8 +75,6 @@ namespace Simd

SIMD_INLINE Exp(float k = 1.0f)
{
_exponent = _mm_set1_epi32(0x7F800000);
_mantissa = _mm_set1_epi32(0x007FFFFF);
_127 = _mm_set1_epi32(127);
_1_0 = _mm_set1_ps(1.0f);
_0_5 = _mm_set1_ps(0.5f);
Expand Down Expand Up @@ -212,7 +210,7 @@ namespace Simd
{
class Exp
{
__m256i _exponent, _mantissa, _127;
__m256i _127;
__m256 _1_0, _0_5, _min, _max, _exp0, _exp1, _exp2, _exp3, _exp4, _exp5, _k;

SIMD_INLINE __m256 Poly5(__m256 x) const
Expand Down Expand Up @@ -240,8 +238,6 @@ namespace Simd

SIMD_INLINE Exp(float k = 1.0f)
{
_exponent = _mm256_set1_epi32(0x7F800000);
_mantissa = _mm256_set1_epi32(0x007FFFFF);
_127 = _mm256_set1_epi32(127);
_1_0 = _mm256_set1_ps(1.0f);
_0_5 = _mm256_set1_ps(0.5f);
Expand Down

0 comments on commit e6f080d

Please sign in to comment.