Skip to content

Commit

Permalink
AVX-512 implementation of h_dotp, h_sqr_dotp, h_abs_dotp, updated per…
Browse files Browse the repository at this point in the history
…formance tests
  • Loading branch information
sadko4u committed Dec 11, 2024
1 parent 847a2a5 commit f5a1538
Show file tree
Hide file tree
Showing 12 changed files with 873 additions and 63 deletions.
1 change: 1 addition & 0 deletions include/private/dsp/arch/x86/avx512/hmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#endif /* PRIVATE_DSP_ARCH_X86_AVX512_IMPL */


#include <private/dsp/arch/x86/avx512/hmath/hdotp.h>
#include <private/dsp/arch/x86/avx512/hmath/hsum.h>


Expand Down
354 changes: 354 additions & 0 deletions include/private/dsp/arch/x86/avx512/hmath/hdotp.h

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions src/main/x86/avx512.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2023 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 24 мая 2023 г.
Expand Down Expand Up @@ -326,6 +326,10 @@
CEXPORT1(vl, h_sum);
CEXPORT1(vl, h_sqr_sum);
CEXPORT1(vl, h_abs_sum);

CEXPORT1(vl, h_dotp);
CEXPORT1(vl, h_sqr_dotp);
CEXPORT1(vl, h_abs_dotp);
}
} /* namespace avx2 */
} /* namespace lsp */
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
/*
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2020 Vladimir Sadovnikov <[email protected]>
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
* Created on: 11 дек. 2024 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
Expand All @@ -25,56 +25,51 @@
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 8
#define MIN_RANK 5
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
float h_dotp(const float *a, const float *b, size_t count);
float h_sqr_dotp(const float *a, const float *b, size_t count);
float h_abs_dotp(const float *a, const float *b, size_t count);
}

IF_ARCH_X86(
namespace sse
{
float h_dotp(const float *a, const float *b, size_t count);
float h_sqr_dotp(const float *a, const float *b, size_t count);
float h_abs_dotp(const float *a, const float *b, size_t count);
}

namespace avx
{
float h_dotp(const float *a, const float *b, size_t count);
float h_sqr_dotp(const float *a, const float *b, size_t count);
float h_abs_dotp(const float *a, const float *b, size_t count);
}

namespace avx512
{
float h_abs_dotp(const float *a, const float *b, size_t count);
}
)

IF_ARCH_ARM(
namespace neon_d32
{
float h_dotp(const float *a, const float *b, size_t count);
float h_sqr_dotp(const float *a, const float *b, size_t count);
float h_abs_dotp(const float *a, const float *b, size_t count);
}
)

IF_ARCH_AARCH64(
namespace asimd
{
float h_dotp(const float *a, const float *b, size_t count);
float h_sqr_dotp(const float *a, const float *b, size_t count);
float h_abs_dotp(const float *a, const float *b, size_t count);
}
)

typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
}

PTEST_BEGIN("dsp.hmath", hdotp, 5, 10000)
PTEST_BEGIN("dsp.hmath", h_abs_dotp, 5, 5000)

void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
{
Expand Down Expand Up @@ -106,26 +101,13 @@ PTEST_BEGIN("dsp.hmath", hdotp, 5, 10000)
{
size_t count = 1 << i;

CALL(generic::h_dotp);
IF_ARCH_X86(CALL(sse::h_dotp));
IF_ARCH_X86(CALL(avx::h_dotp));
IF_ARCH_ARM(CALL(neon_d32::h_dotp));
IF_ARCH_AARCH64(CALL(asimd::h_dotp));
PTEST_SEPARATOR;

CALL(generic::h_sqr_dotp);
IF_ARCH_X86(CALL(sse::h_sqr_dotp));
IF_ARCH_X86(CALL(avx::h_sqr_dotp));
IF_ARCH_ARM(CALL(neon_d32::h_sqr_dotp));
IF_ARCH_AARCH64(CALL(asimd::h_sqr_dotp));
PTEST_SEPARATOR;

CALL(generic::h_abs_dotp);
IF_ARCH_X86(CALL(sse::h_abs_dotp));
IF_ARCH_X86(CALL(avx::h_abs_dotp));
IF_ARCH_X86(CALL(avx512::h_abs_dotp));
IF_ARCH_ARM(CALL(neon_d32::h_abs_dotp));
IF_ARCH_AARCH64(CALL(asimd::h_abs_dotp));
PTEST_SEPARATOR2;
PTEST_SEPARATOR;
}

free_aligned(data);
Expand Down
4 changes: 2 additions & 2 deletions src/test/ptest/hmath/h_abs_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/common/alloc.h>

#define MIN_RANK 8
#define MIN_RANK 5
#define MAX_RANK 16

namespace lsp
Expand Down Expand Up @@ -68,7 +68,7 @@ namespace lsp
typedef float (* h_sum_t)(const float *src, size_t count);
}

PTEST_BEGIN("dsp.hmath", h_abs_sum, 5, 10000)
PTEST_BEGIN("dsp.hmath", h_abs_sum, 5, 5000)

void call(const char *label, float *src, size_t count, h_sum_t func)
{
Expand Down
119 changes: 119 additions & 0 deletions src/test/ptest/hmath/h_dotp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 31 мар. 2020 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#include <lsp-plug.in/common/alloc.h>
#include <lsp-plug.in/common/types.h>
#include <lsp-plug.in/dsp/dsp.h>
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 5
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
float h_dotp(const float *a, const float *b, size_t count);
}

IF_ARCH_X86(
namespace sse
{
float h_dotp(const float *a, const float *b, size_t count);
}

namespace avx
{
float h_dotp(const float *a, const float *b, size_t count);
}

namespace avx512
{
float h_dotp(const float *a, const float *b, size_t count);
}
)

IF_ARCH_ARM(
namespace neon_d32
{
float h_dotp(const float *a, const float *b, size_t count);
}
)

IF_ARCH_AARCH64(
namespace asimd
{
float h_dotp(const float *a, const float *b, size_t count);
}
)

typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
}

PTEST_BEGIN("dsp.hmath", h_dotp, 5, 5000)

void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
{
if (!PTEST_SUPPORTED(func))
return;

char buf[80];
snprintf(buf, sizeof(buf), "%s x %d", label, int(count));
printf("Testing %s numbers...\n", buf);

PTEST_LOOP(buf,
func(a, b, count);
);
}

PTEST_MAIN
{
size_t buf_size = 1 << MAX_RANK;
uint8_t *data = NULL;
float *a = alloc_aligned<float>(data, buf_size * 2, 64);
float *b = &a[buf_size];

randomize_sign(a, buf_size * 2);

#define CALL(func) \
call(#func, a, b, count, func)

for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
{
size_t count = 1 << i;

CALL(generic::h_dotp);
IF_ARCH_X86(CALL(sse::h_dotp));
IF_ARCH_X86(CALL(avx::h_dotp));
IF_ARCH_X86(CALL(avx512::h_dotp));
IF_ARCH_ARM(CALL(neon_d32::h_dotp));
IF_ARCH_AARCH64(CALL(asimd::h_dotp));
PTEST_SEPARATOR;
}

free_aligned(data);
}

PTEST_END



119 changes: 119 additions & 0 deletions src/test/ptest/hmath/h_sqr_dotp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
* (C) 2024 Vladimir Sadovnikov <[email protected]>
*
* This file is part of lsp-dsp-lib
* Created on: 11 дек. 2024 г.
*
* lsp-dsp-lib is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* lsp-dsp-lib is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
*/

#include <lsp-plug.in/common/alloc.h>
#include <lsp-plug.in/common/types.h>
#include <lsp-plug.in/dsp/dsp.h>
#include <lsp-plug.in/test-fw/helpers.h>
#include <lsp-plug.in/test-fw/ptest.h>

#define MIN_RANK 5
#define MAX_RANK 16

namespace lsp
{
namespace generic
{
float h_sqr_dotp(const float *a, const float *b, size_t count);
}

IF_ARCH_X86(
namespace sse
{
float h_sqr_dotp(const float *a, const float *b, size_t count);
}

namespace avx
{
float h_sqr_dotp(const float *a, const float *b, size_t count);
}

namespace avx512
{
float h_sqr_dotp(const float *a, const float *b, size_t count);
}
)

IF_ARCH_ARM(
namespace neon_d32
{
float h_sqr_dotp(const float *a, const float *b, size_t count);
}
)

IF_ARCH_AARCH64(
namespace asimd
{
float h_sqr_dotp(const float *a, const float *b, size_t count);
}
)

typedef float (* h_dotp_t)(const float *a, const float *b, size_t count);
}

PTEST_BEGIN("dsp.hmath", h_sqr_dotp, 5, 5000)

void call(const char *label, float *a, float *b, size_t count, h_dotp_t func)
{
if (!PTEST_SUPPORTED(func))
return;

char buf[80];
snprintf(buf, sizeof(buf), "%s x %d", label, int(count));
printf("Testing %s numbers...\n", buf);

PTEST_LOOP(buf,
func(a, b, count);
);
}

PTEST_MAIN
{
size_t buf_size = 1 << MAX_RANK;
uint8_t *data = NULL;
float *a = alloc_aligned<float>(data, buf_size * 2, 64);
float *b = &a[buf_size];

randomize_sign(a, buf_size * 2);

#define CALL(func) \
call(#func, a, b, count, func)

for (size_t i=MIN_RANK; i <= MAX_RANK; ++i)
{
size_t count = 1 << i;

CALL(generic::h_sqr_dotp);
IF_ARCH_X86(CALL(sse::h_sqr_dotp));
IF_ARCH_X86(CALL(avx::h_sqr_dotp));
IF_ARCH_X86(CALL(avx512::h_sqr_dotp));
IF_ARCH_ARM(CALL(neon_d32::h_sqr_dotp));
IF_ARCH_AARCH64(CALL(asimd::h_sqr_dotp));
PTEST_SEPARATOR;
}

free_aligned(data);
}

PTEST_END



Loading

0 comments on commit f5a1538

Please sign in to comment.