Skip to content

Commit

Permalink
AVX-512 implementation of h_sum, h_sqr_sum and h_abs_sum
Browse files Browse the repository at this point in the history
  • Loading branch information
sadko4u committed Dec 11, 2024
1 parent 7dd57a1 commit 5ddbbfb
Show file tree
Hide file tree
Showing 9 changed files with 435 additions and 12 deletions.
33 changes: 33 additions & 0 deletions include/private/dsp/arch/x86/avx512/hmath.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 11 дек. 2024 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#ifndef PRIVATE_DSP_ARCH_X86_AVX512_HMATH_H_
#define PRIVATE_DSP_ARCH_X86_AVX512_HMATH_H_

#ifndef PRIVATE_DSP_ARCH_X86_AVX512_IMPL
#error "This header should not be included directly"
#endif /* PRIVATE_DSP_ARCH_X86_AVX512_IMPL */


#include <private/dsp/arch/x86/avx512/hmath/hsum.h>


#endif /* PRIVATE_DSP_ARCH_X86_AVX512_HMATH_H_ */
351 changes: 351 additions & 0 deletions include/private/dsp/arch/x86/avx512/hmath/hsum.h

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions src/main/x86/avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <private/dsp/arch/x86/avx512/dynamics.h>
#include <private/dsp/arch/x86/avx512/float.h>
#include <private/dsp/arch/x86/avx512/graphics/axis.h>
#include <private/dsp/arch/x86/avx512/hmath.h>
#include <private/dsp/arch/x86/avx512/msmatrix.h>
#include <private/dsp/arch/x86/avx512/pcomplex.h>
#include <private/dsp/arch/x86/avx512/pmath.h>
Expand Down Expand Up @@ -315,14 +316,16 @@
CEXPORT1(vl, mix2);
CEXPORT1(vl, mix_copy2);
CEXPORT1(vl, mix_add2);

CEXPORT1(vl, mix3);
CEXPORT1(vl, mix_copy3);
CEXPORT1(vl, mix_add3);

CEXPORT1(vl, mix4);
CEXPORT1(vl, mix_copy4);
CEXPORT1(vl, mix_add4);

CEXPORT1(vl, h_sum);
CEXPORT1(vl, h_sqr_sum);
CEXPORT1(vl, h_abs_sum);
}
} /* namespace avx2 */
} /* namespace lsp */
Expand Down
10 changes: 8 additions & 2 deletions src/test/ptest/hmath/h_abs_sum.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
Expand Down Expand Up @@ -44,6 +44,11 @@ namespace lsp
{
float h_abs_sum(const float *src, size_t count);
}

namespace avx512
{
float h_abs_sum(const float *src, size_t count);
}
)

IF_ARCH_ARM(
Expand Down Expand Up @@ -98,6 +103,7 @@ PTEST_BEGIN("dsp.hmath", hsum, 5, 10000)
CALL(generic::h_abs_sum);
IF_ARCH_X86(CALL(sse::h_abs_sum));
IF_ARCH_X86(CALL(avx::h_abs_sum));
IF_ARCH_X86(CALL(avx512::h_abs_sum));
IF_ARCH_ARM(CALL(neon_d32::h_abs_sum));
IF_ARCH_AARCH64(CALL(asimd::h_abs_sum));
PTEST_SEPARATOR;
Expand Down
10 changes: 8 additions & 2 deletions src/test/ptest/hmath/h_sqr_sum.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
Expand Down Expand Up @@ -45,6 +45,11 @@ namespace lsp
float h_sqr_sum(const float *src, size_t count);
float h_sqr_sum_fma3(const float *src, size_t count);
}

namespace avx512
{
float h_sqr_sum(const float *src, size_t count);
}
)

IF_ARCH_ARM(
Expand Down Expand Up @@ -100,6 +105,7 @@ PTEST_BEGIN("dsp.hmath", h_sqr_sum, 5, 10000)
IF_ARCH_X86(CALL(sse::h_sqr_sum));
IF_ARCH_X86(CALL(avx::h_sqr_sum));
IF_ARCH_X86(CALL(avx::h_sqr_sum_fma3));
IF_ARCH_X86(CALL(avx512::h_sqr_sum));
IF_ARCH_ARM(CALL(neon_d32::h_sqr_sum));
IF_ARCH_AARCH64(CALL(asimd::h_sqr_sum));
PTEST_SEPARATOR;
Expand Down
10 changes: 8 additions & 2 deletions src/test/ptest/hmath/h_sum.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
Expand Down Expand Up @@ -44,6 +44,11 @@ namespace lsp
{
float h_sum(const float *src, size_t count);
}

namespace avx512
{
float h_sum(const float *src, size_t count);
}
)

IF_ARCH_ARM(
Expand Down Expand Up @@ -98,6 +103,7 @@ PTEST_BEGIN("dsp.hmath", h_sum, 5, 10000)
CALL(generic::h_sum);
IF_ARCH_X86(CALL(sse::h_sum));
IF_ARCH_X86(CALL(avx::h_sum));
IF_ARCH_X86(CALL(avx512::h_sum));
IF_ARCH_ARM(CALL(neon_d32::h_sum));
IF_ARCH_AARCH64(CALL(asimd::h_sum));
PTEST_SEPARATOR;
Expand Down
10 changes: 8 additions & 2 deletions src/test/utest/hmath/h_abs_sum.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
Expand Down Expand Up @@ -49,6 +49,11 @@ namespace lsp
{
float h_abs_sum(const float *src, size_t count);
}

namespace avx512
{
float h_abs_sum(const float *src, size_t count);
}
)

IF_ARCH_ARM(
Expand Down Expand Up @@ -110,6 +115,7 @@ UTEST_BEGIN("dsp.hmath", h_abs_sum)

IF_ARCH_X86(CALL(generic::h_abs_sum, sse::h_abs_sum, 16));
IF_ARCH_X86(CALL(generic::h_abs_sum, avx::h_abs_sum, 32));
IF_ARCH_X86(CALL(generic::h_abs_sum, avx512::h_abs_sum, 64));
IF_ARCH_ARM(CALL(generic::h_abs_sum, neon_d32::h_abs_sum, 16));
IF_ARCH_AARCH64(CALL(generic::h_abs_sum, asimd::h_abs_sum, 16));
}
Expand Down
6 changes: 6 additions & 0 deletions src/test/utest/hmath/h_sqr_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ namespace lsp
float h_sqr_sum(const float *src, size_t count);
float h_sqr_sum_fma3(const float *src, size_t count);
}

namespace avx512
{
float h_sqr_sum(const float *src, size_t count);
}
)

IF_ARCH_ARM(
Expand Down Expand Up @@ -114,6 +119,7 @@ UTEST_BEGIN("dsp.hmath", h_sqr_sum)
IF_ARCH_X86(CALL(generic::h_sqr_sum, sse::h_sqr_sum, 16));
IF_ARCH_X86(CALL(generic::h_sqr_sum, avx::h_sqr_sum, 32));
IF_ARCH_X86(CALL(generic::h_sqr_sum, avx::h_sqr_sum_fma3, 32));
IF_ARCH_X86(CALL(generic::h_sqr_sum, avx512::h_sqr_sum, 64));
IF_ARCH_ARM(CALL(generic::h_sqr_sum, neon_d32::h_sqr_sum, 16));
IF_ARCH_AARCH64(CALL(generic::h_sqr_sum, asimd::h_sqr_sum, 16));
}
Expand Down
10 changes: 8 additions & 2 deletions src/test/utest/hmath/h_sum.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
Expand Down Expand Up @@ -49,6 +49,11 @@ namespace lsp
{
float h_sum(const float *src, size_t count);
}

namespace avx512
{
float h_sum(const float *src, size_t count);
}
)

IF_ARCH_ARM(
Expand Down Expand Up @@ -110,6 +115,7 @@ UTEST_BEGIN("dsp.hmath", h_sum)

IF_ARCH_X86(CALL(generic::h_sum, sse::h_sum, 16));
IF_ARCH_X86(CALL(generic::h_sum, avx::h_sum, 32));
IF_ARCH_X86(CALL(generic::h_sum, avx512::h_sum, 64));
IF_ARCH_ARM(CALL(generic::h_sum, neon_d32::h_sum, 16));
IF_ARCH_AARCH64(CALL(generic::h_sum, asimd::h_sum, 16));
}
Expand Down

0 comments on commit 5ddbbfb

Please sign in to comment.