Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Surround "#pragma clang" with checks for Clang #192

Open
wants to merge 2 commits into
base: main-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions include/simsimd/binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,11 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_serial(simsimd_b8_t const* a, simsimd_b8_
#if SIMSIMD_TARGET_NEON
#pragma GCC push_options
#pragma GCC target("arch=armv8.2-a+simd")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+simd"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_hamming_b8_neon(simsimd_b8_t const* a, simsimd_b8_t const* b, simsimd_size_t n_words,
simsimd_distance_t* result) {
simsimd_i32_t differences = 0;
Expand Down Expand Up @@ -113,15 +116,20 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_neon(simsimd_b8_t const* a, simsimd_b8_t
*result = (union_ != 0) ? 1 - (simsimd_f64_t)intersection / (simsimd_f64_t)union_ : 1;
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_NEON

#if SIMSIMD_TARGET_SVE
#pragma GCC push_options
#pragma GCC target("arch=armv8.2-a+sve")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+sve"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_hamming_b8_sve(simsimd_b8_t const* a, simsimd_b8_t const* b, simsimd_size_t n_words,
simsimd_distance_t* result) {
simsimd_size_t i = 0;
Expand Down Expand Up @@ -151,7 +159,9 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_sve(simsimd_b8_t const* a, simsimd_b8_t c
*result = (union_ != 0) ? 1 - (simsimd_f64_t)intersection / (simsimd_f64_t)union_ : 1;
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_SVE
#endif // SIMSIMD_TARGET_ARM
Expand All @@ -160,9 +170,12 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_sve(simsimd_b8_t const* a, simsimd_b8_t c
#if SIMSIMD_TARGET_ICE
#pragma GCC push_options
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "avx512bw", "avx512vpopcntdq")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2,avx512bw,avx512vpopcntdq"))), \
apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_hamming_b8_ice(simsimd_b8_t const* a, simsimd_b8_t const* b, simsimd_size_t n_words,
simsimd_distance_t* result) {
__m512i differences_vec = _mm512_setzero_si512();
Expand Down Expand Up @@ -216,15 +229,20 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_ice(simsimd_b8_t const* a, simsimd_b8_t c
*result = (union_ != 0) ? 1 - (simsimd_f64_t)intersection / (simsimd_f64_t)union_ : 1;
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_ICE

#if SIMSIMD_TARGET_HASWELL
#pragma GCC push_options
#pragma GCC target("popcnt")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("popcnt"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_hamming_b8_haswell(simsimd_b8_t const* a, simsimd_b8_t const* b, simsimd_size_t n_words,
simsimd_distance_t* result) {
// x86 supports unaligned loads and works just fine with the scalar version for small vectors.
Expand All @@ -248,7 +266,9 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_haswell(simsimd_b8_t const* a, simsimd_b8
*result = (union_ != 0) ? 1 - (simsimd_f64_t)intersection / (simsimd_f64_t)union_ : 1;
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_HASWELL
#endif // SIMSIMD_TARGET_X86
Expand Down
35 changes: 35 additions & 0 deletions include/simsimd/curved.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,11 @@ SIMSIMD_MAKE_MAHALANOBIS(accurate, bf16, f64, SIMSIMD_BF16_TO_F32) // simsimd_ma
#if SIMSIMD_TARGET_NEON
#pragma GCC push_options
#pragma GCC target("arch=armv8.2-a+simd")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+simd"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_bilinear_f32_neon(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_f32_t const* c,
simsimd_size_t n, simsimd_distance_t* result) {
float32x4_t sum_vec = vdupq_n_f32(0);
Expand Down Expand Up @@ -220,15 +223,20 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f32_neon(simsimd_f32_t const* a, simsimd
*result = _simsimd_sqrt_f64_neon(sum);
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_NEON

#if SIMSIMD_TARGET_NEON_F16
#pragma GCC push_options
#pragma GCC target("arch=armv8.2-a+simd+fp16")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+simd+fp16"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_bilinear_f16_neon(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_f16_t const* c,
simsimd_size_t n, simsimd_distance_t* result) {
float32x4_t sum_vec = vdupq_n_f32(0);
Expand Down Expand Up @@ -301,15 +309,20 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f16_neon(simsimd_f16_t const* a, simsimd
*result = _simsimd_sqrt_f32_neon(sum);
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_NEON_F16

#if SIMSIMD_TARGET_NEON_BF16
#pragma GCC push_options
#pragma GCC target("arch=armv8.6-a+simd+bf16")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("arch=armv8.6-a+simd+bf16"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_bilinear_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16_t const* b,
simsimd_bf16_t const* c, simsimd_size_t n, simsimd_distance_t* result) {
float32x4_t sum_vec = vdupq_n_f32(0);
Expand Down Expand Up @@ -401,7 +414,9 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_neon(simsimd_bf16_t const* a, simsi
*result = _simsimd_sqrt_f32_neon(sum);
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_NEON_BF16

Expand All @@ -411,8 +426,11 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_neon(simsimd_bf16_t const* a, simsi
#if SIMSIMD_TARGET_HASWELL
#pragma GCC push_options
#pragma GCC target("avx2", "f16c", "fma")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("avx2,f16c,fma"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_bilinear_f16_haswell(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_f16_t const* c,
simsimd_size_t n, simsimd_distance_t* result) {
__m256 sum_vec = _mm256_setzero_ps();
Expand Down Expand Up @@ -558,15 +576,20 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_haswell(simsimd_bf16_t const* a, si
*result = _simsimd_sqrt_f32_haswell(sum);
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_HASWELL

#if SIMSIMD_TARGET_SKYLAKE
#pragma GCC push_options
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2"))), apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_bilinear_f32_skylake(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_f32_t const* c,
simsimd_size_t n, simsimd_distance_t* result) {
simsimd_size_t tail_length = n % 16;
Expand Down Expand Up @@ -634,16 +657,21 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f32_skylake(simsimd_f32_t const* a, sims
*result = _simsimd_sqrt_f64_haswell(_mm512_reduce_add_ps(sum_vec));
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_SKYLAKE

#if SIMSIMD_TARGET_GENOA
#pragma GCC push_options
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "avx512bw", "avx512bf16")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2,avx512bw,avx512bf16"))), \
apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_bilinear_bf16_genoa(simsimd_bf16_t const* a, simsimd_bf16_t const* b,
simsimd_bf16_t const* c, simsimd_size_t n, simsimd_distance_t* result) {
simsimd_size_t tail_length = n % 32;
Expand Down Expand Up @@ -711,16 +739,21 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_bf16_genoa(simsimd_bf16_t const* a, sims
*result = _simsimd_sqrt_f32_haswell(_mm512_reduce_add_ps(sum_vec));
}

#ifdef __clang__
#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_GENOA

#if SIMSIMD_TARGET_SAPPHIRE
#pragma GCC push_options
#pragma GCC target("avx2", "avx512f", "avx512vl", "bmi2", "avx512bw", "avx512fp16")
#ifdef __clang__
#pragma clang attribute push(__attribute__((target("avx2,avx512f,avx512vl,bmi2,avx512bw,avx512fp16"))), \
apply_to = function)

#endif

SIMSIMD_PUBLIC void simsimd_bilinear_f16_sapphire(simsimd_f16_t const* a, simsimd_f16_t const* b,
simsimd_f16_t const* c, simsimd_size_t n,
simsimd_distance_t* result) {
Expand Down Expand Up @@ -792,7 +825,9 @@ SIMSIMD_PUBLIC void simsimd_mahalanobis_f16_sapphire(simsimd_f16_t const* a, sim
*result = _simsimd_sqrt_f32_haswell(_mm512_reduce_add_ph(sum_vec));
}


#pragma clang attribute pop
#endif
#pragma GCC pop_options
#endif // SIMSIMD_TARGET_SAPPHIRE
#endif // SIMSIMD_TARGET_X86
Expand Down
Loading
Loading