Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arm: Rename internal ARM ROL/ROR/LSR/LSL functions with a SIMDE prefix. #1252

Merged
merged 3 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions simde/arm/neon/sha256.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

#define ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift)))
#define ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift)))
#define LSR(operand, shift) ((operand) >> (shift))
#define LSL(operand, shift) ((operand) << (shift))
#define SIMDE_ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift)))
#define SIMDE_ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift)))
#define SIMDE_LSR(operand, shift) ((operand) >> (shift))
#define SIMDE_LSL(operand, shift) ((operand) << (shift))

static uint32_t simde_SHAchoose(uint32_t x, uint32_t y, uint32_t z) {
return (((y ^ z) & x) ^ z);
Expand All @@ -47,11 +47,11 @@ static uint32_t simde_SHAmajority(uint32_t x, uint32_t y, uint32_t z) {
}

static uint32_t simde_SHAhashSIGMA0(uint32_t x) {
return ROR32(x, 2) ^ ROR32(x, 13) ^ ROR32(x, 22);
return SIMDE_ROR32(x, 2) ^ SIMDE_ROR32(x, 13) ^ SIMDE_ROR32(x, 22);
}

static uint32_t simde_SHAhashSIGMA1(uint32_t x) {
return ROR32(x, 6) ^ ROR32(x, 11) ^ ROR32(x, 25);
return SIMDE_ROR32(x, 6) ^ SIMDE_ROR32(x, 11) ^ SIMDE_ROR32(x, 25);
}

static simde_uint32x4_t
Expand Down Expand Up @@ -127,7 +127,7 @@ simde_vsha256su0q_u32(simde_uint32x4_t w0_3, simde_uint32x4_t w4_7) {
uint32_t elt;
for(int i = 0; i < 4; ++i) {
elt = T_.values[i];
elt = ROR32(elt, 7) ^ ROR32(elt, 18) ^ LSR(elt, 3);
elt = SIMDE_ROR32(elt, 7) ^ SIMDE_ROR32(elt, 18) ^ SIMDE_LSR(elt, 3);
r_.values[i] = elt + x_.values[i];
}
return simde_uint32x4_from_private(r_);
Expand Down Expand Up @@ -161,15 +161,15 @@ simde_vsha256su1q_u32(simde_uint32x4_t tw0_3, simde_uint32x4_t w8_11, simde_uint
T1_.values[0] = z_.values[2];
for(int i = 0; i < 2; ++i) {
elt = T1_.values[i];
elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ LSR(elt, 10);
elt = SIMDE_ROR32(elt, 17) ^ SIMDE_ROR32(elt, 19) ^ SIMDE_LSR(elt, 10);
elt = elt + x_.values[i] + T0_.values[i];
r_.values[i] = elt;
}
T1_.values[1] = r_.values[1];
T1_.values[0] = r_.values[0];
for(int i = 2; i < 4; ++i) {
elt = T1_.values[i-2];
elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ LSR(elt, 10);
elt = SIMDE_ROR32(elt, 17) ^ SIMDE_ROR32(elt, 19) ^ SIMDE_LSR(elt, 10);
elt = elt + x_.values[i] + T0_.values[i];
r_.values[i] = elt;
}
Expand All @@ -182,10 +182,10 @@ simde_vsha256su1q_u32(simde_uint32x4_t tw0_3, simde_uint32x4_t w8_11, simde_uint
#define vsha256su1q_u32(tw0_3, w8_11, w12_15) simde_vsha256su1q_u32((tw0_3), (w8_11), (w12_15))
#endif

#undef ROR32
#undef ROL32
#undef LSR
#undef LSL
#undef SIMDE_ROR32
#undef SIMDE_ROL32
#undef SIMDE_LSR
#undef SIMDE_LSL

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
Expand Down
32 changes: 16 additions & 16 deletions simde/arm/neon/sha512.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

#define ROR64(operand, shift) (((operand) >> (shift)) | ((operand) << (64-shift)))
#define ROL64(operand, shift) (((operand) >> (64-shift)) | ((operand) << (shift)))
#define LSR(operand, shift) ((operand) >> (shift))
#define LSL(operand, shift) ((operand) << (shift))
#define SIMDE_ROR64(operand, shift) (((operand) >> (shift)) | ((operand) << (64-shift)))
#define SIMDE_ROL64(operand, shift) (((operand) >> (64-shift)) | ((operand) << (shift)))
#define SIMDE_LSR(operand, shift) ((operand) >> (shift))
#define SIMDE_LSL(operand, shift) ((operand) << (shift))

SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
Expand All @@ -51,11 +51,11 @@ simde_vsha512hq_u64(simde_uint64x2_t w, simde_uint64x2_t x, simde_uint64x2_t y)
y_ = simde_uint64x2_to_private(y);
uint64_t Msigma1;
uint64_t tmp;
Msigma1 = ROR64(y_.values[1], 14) ^ ROR64(y_.values[1], 18) ^ ROR64(y_.values[1], 41);
Msigma1 = SIMDE_ROR64(y_.values[1], 14) ^ SIMDE_ROR64(y_.values[1], 18) ^ SIMDE_ROR64(y_.values[1], 41);
r_.values[1] = (y_.values[1] & x_.values[0]) ^ (~(y_.values[1]) & x_.values[1]);
r_.values[1] = (r_.values[1] + Msigma1 + w_.values[1]);
tmp = r_.values[1] + y_.values[0];
Msigma1 = ROR64(tmp, 14) ^ ROR64(tmp, 18) ^ ROR64(tmp, 41);
Msigma1 = SIMDE_ROR64(tmp, 14) ^ SIMDE_ROR64(tmp, 18) ^ SIMDE_ROR64(tmp, 41);
r_.values[0] = (tmp & y_.values[1]) ^ (~(tmp) & x_.values[0]);
r_.values[0] = (r_.values[0] + Msigma1 + w_.values[0]);
return simde_uint64x2_from_private(r_);
Expand All @@ -79,10 +79,10 @@ simde_vsha512h2q_u64(simde_uint64x2_t w, simde_uint64x2_t x, simde_uint64x2_t y)
x_ = simde_uint64x2_to_private(x),
y_ = simde_uint64x2_to_private(y);
uint64_t Msigma0;
Msigma0 = ROR64(y_.values[0], 28) ^ ROR64(y_.values[0], 34) ^ ROR64(y_.values[0], 39);
Msigma0 = SIMDE_ROR64(y_.values[0], 28) ^ SIMDE_ROR64(y_.values[0], 34) ^ SIMDE_ROR64(y_.values[0], 39);
r_.values[1] = (y_.values[1] & x_.values[0]) ^ (y_.values[0] & x_.values[0]) ^ (y_.values[1] & y_.values[0]);
r_.values[1] = (r_.values[1] + Msigma0 + w_.values[1]);
Msigma0 = ROR64(r_.values[1], 28) ^ ROR64(r_.values[1], 34) ^ ROR64(r_.values[1], 39);
Msigma0 = SIMDE_ROR64(r_.values[1], 28) ^ SIMDE_ROR64(r_.values[1], 34) ^ SIMDE_ROR64(r_.values[1], 39);
r_.values[0] = (r_.values[1] & y_.values[0]) ^ (r_.values[1] & y_.values[1]) ^ (y_.values[1] & y_.values[0]);
r_.values[0] = (r_.values[0] + Msigma0 + w_.values[0]);
return simde_uint64x2_from_private(r_);
Expand All @@ -104,9 +104,9 @@ simde_vsha512su0q_u64(simde_uint64x2_t w, simde_uint64x2_t x) {
w_ = simde_uint64x2_to_private(w),
x_ = simde_uint64x2_to_private(x);
uint64_t sig0;
sig0 = ROR64(w_.values[1], 1) ^ ROR64(w_.values[1], 8) ^ (w_.values[1] >> 7);
sig0 = SIMDE_ROR64(w_.values[1], 1) ^ SIMDE_ROR64(w_.values[1], 8) ^ (w_.values[1] >> 7);
r_.values[0] = w_.values[0] + sig0;
sig0 = ROR64(x_.values[0], 1) ^ ROR64(x_.values[0], 8) ^ (x_.values[0] >> 7);
sig0 = SIMDE_ROR64(x_.values[0], 1) ^ SIMDE_ROR64(x_.values[0], 8) ^ (x_.values[0] >> 7);
r_.values[1] = w_.values[1] + sig0;
return simde_uint64x2_from_private(r_);

Expand All @@ -129,9 +129,9 @@ simde_vsha512su1q_u64(simde_uint64x2_t w, simde_uint64x2_t x, simde_uint64x2_t y
x_ = simde_uint64x2_to_private(x),
y_ = simde_uint64x2_to_private(y);
uint64_t sig1;
sig1 = ROR64(x_.values[1], 19) ^ ROR64(x_.values[1], 61) ^ (x_.values[1] >> 6);
sig1 = SIMDE_ROR64(x_.values[1], 19) ^ SIMDE_ROR64(x_.values[1], 61) ^ (x_.values[1] >> 6);
r_.values[1] = w_.values[1] + sig1 + y_.values[1];
sig1 = ROR64(x_.values[0], 19) ^ ROR64(x_.values[0], 61) ^ (x_.values[0] >> 6);
sig1 = SIMDE_ROR64(x_.values[0], 19) ^ SIMDE_ROR64(x_.values[0], 61) ^ (x_.values[0] >> 6);
r_.values[0] = w_.values[0] + sig1 + y_.values[0];
return simde_uint64x2_from_private(r_);

Expand All @@ -142,10 +142,10 @@ simde_vsha512su1q_u64(simde_uint64x2_t w, simde_uint64x2_t x, simde_uint64x2_t y
#define vsha512su1q_u64(w, x, y) simde_vsha512su1q_u64((w), (x), (y))
#endif

#undef ROR64
#undef ROL64
#undef LSR
#undef LSL
#undef SIMDE_ROR64
#undef SIMDE_ROL64
#undef SIMDE_LSR
#undef SIMDE_LSL

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
Expand Down
69 changes: 37 additions & 32 deletions simde/arm/neon/sm3.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,12 @@

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
#if HEDLEY_GCC_VERSION_CHECK(14,0,0)
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
#endif

SIMDE_BEGIN_DECLS_

#define ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift)))
#define ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift)))
#define LSR(operand, shift) ((operand) >> (shift))
#define LSL(operand, shift) ((operand) << (shift))
#define SIMDE_ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift)))
#define SIMDE_ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift)))
#define SIMDE_LSR(operand, shift) ((operand) >> (shift))
#define SIMDE_LSL(operand, shift) ((operand) << (shift))

SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
Expand All @@ -53,7 +49,7 @@ simde_vsm3ss1q_u32(simde_uint32x4_t n, simde_uint32x4_t m, simde_uint32x4_t a) {
n_ = simde_uint32x4_to_private(n),
m_ = simde_uint32x4_to_private(m),
a_ = simde_uint32x4_to_private(a);
r_.values[3] = ROL32((ROL32(n_.values[3], 12) + m_.values[3] + a_.values[3]), 7);
r_.values[3] = SIMDE_ROL32((SIMDE_ROL32(n_.values[3], 12) + m_.values[3] + a_.values[3]), 7);
r_.values[2] = 0;
r_.values[1] = 0;
r_.values[0] = 0;
Expand All @@ -65,6 +61,11 @@ simde_vsm3ss1q_u32(simde_uint32x4_t n, simde_uint32x4_t m, simde_uint32x4_t a) {
#define vsm3ss1q_u32(n, m, a) simde_vsm3ss1q_u32((n), (m), (a))
#endif

#if defined(SIMDE_ARCH_RISCV64) && HEDLEY_GCC_VERSION_CHECK(14,0,0)
HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vsm3tt1aq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c, const int imm2)
Expand All @@ -78,11 +79,11 @@ simde_vsm3tt1aq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c,
uint32_t WjPrime, TT1, SS2;

WjPrime = c_.values[imm2];
SS2 = b_.values[3] ^ ROL32(a_.values[3], 12);
SS2 = b_.values[3] ^ SIMDE_ROL32(a_.values[3], 12);
TT1 = a_.values[1] ^ (a_.values[3] ^ a_.values[2]);
TT1 = (TT1 + a_.values[0] + SS2 + WjPrime);
r_.values[0] = a_.values[1];
r_.values[1] = ROL32(a_.values[2], 9);
r_.values[1] = SIMDE_ROL32(a_.values[2], 9);
r_.values[2] = a_.values[3];
r_.values[3] = TT1;
return simde_uint32x4_from_private(r_);
Expand All @@ -108,11 +109,11 @@ simde_vsm3tt1bq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c,
uint32_t WjPrime, TT1, SS2;

WjPrime = c_.values[imm2];
SS2 = b_.values[3] ^ ROL32(a_.values[3], 12);
SS2 = b_.values[3] ^ SIMDE_ROL32(a_.values[3], 12);
TT1 = (a_.values[3] & a_.values[1]) | (a_.values[3] & a_.values[2]) | (a_.values[1] & a_.values[2]);
TT1 = (TT1 + a_.values[0] + SS2 + WjPrime);
r_.values[0] = a_.values[1];
r_.values[1] = ROL32(a_.values[2], 9);
r_.values[1] = SIMDE_ROL32(a_.values[2], 9);
r_.values[2] = a_.values[3];
r_.values[3] = TT1;
return simde_uint32x4_from_private(r_);
Expand Down Expand Up @@ -141,9 +142,9 @@ simde_vsm3tt2aq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c,
TT2 = a_.values[1] ^ (a_.values[3] ^ a_.values[2]);
TT2 = (TT2 + a_.values[0] + b_.values[3] + Wj);
r_.values[0] = a_.values[1];
r_.values[1] = ROL32(a_.values[2], 19);
r_.values[1] = SIMDE_ROL32(a_.values[2], 19);
r_.values[2] = a_.values[3];
r_.values[3] = TT2 ^ ROL32(TT2, 9) ^ ROL32(TT2, 17);
r_.values[3] = TT2 ^ SIMDE_ROL32(TT2, 9) ^ SIMDE_ROL32(TT2, 17);
return simde_uint32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_SM3)
Expand All @@ -170,9 +171,9 @@ simde_vsm3tt2bq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c,
TT2 = (a_.values[3] & a_.values[2]) | (~(a_.values[3]) & a_.values[1]);
TT2 = (TT2 + a_.values[0] + b_.values[3] + Wj);
r_.values[0] = a_.values[1];
r_.values[1] = ROL32(a_.values[2], 19);
r_.values[1] = SIMDE_ROL32(a_.values[2], 19);
r_.values[2] = a_.values[3];
r_.values[3] = TT2 ^ ROL32(TT2, 9) ^ ROL32(TT2, 17);
r_.values[3] = TT2 ^ SIMDE_ROL32(TT2, 9) ^ SIMDE_ROL32(TT2, 17);
return simde_uint32x4_from_private(r_);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_SM3)
Expand All @@ -183,6 +184,10 @@ simde_vsm3tt2bq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c,
#define vsm3tt2bq_u32(a, b, c, imm2) simde_vsm3tt2bq_u32((a), (b), (c), (imm2))
#endif

#if defined(SIMDE_ARCH_RISCV64) && HEDLEY_GCC_VERSION_CHECK(14,0,0)
HEDLEY_DIAGNOSTIC_POP
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vsm3partw1q_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) {
Expand All @@ -194,14 +199,14 @@ simde_vsm3partw1q_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b),
c_ = simde_uint32x4_to_private(c);
r_.values[2] = (a_.values[2] ^ b_.values[2]) ^ (ROL32(c_.values[3], 15));
r_.values[1] = (a_.values[1] ^ b_.values[1]) ^ (ROL32(c_.values[2], 15));
r_.values[0] = (a_.values[0] ^ b_.values[0]) ^ (ROL32(c_.values[1], 15));
r_.values[2] = (a_.values[2] ^ b_.values[2]) ^ (SIMDE_ROL32(c_.values[3], 15));
r_.values[1] = (a_.values[1] ^ b_.values[1]) ^ (SIMDE_ROL32(c_.values[2], 15));
r_.values[0] = (a_.values[0] ^ b_.values[0]) ^ (SIMDE_ROL32(c_.values[1], 15));
for(int i = 0; i < 4; ++i) {
if (i == 3) {
r_.values[3] = (a_.values[3] ^ b_.values[3]) ^ (ROL32(r_.values[0], 15));
r_.values[3] = (a_.values[3] ^ b_.values[3]) ^ (SIMDE_ROL32(r_.values[0], 15));
}
r_.values[i] = r_.values[i] ^ ROL32(r_.values[i], 15) ^ ROL32(r_.values[i], 23);
r_.values[i] = r_.values[i] ^ SIMDE_ROL32(r_.values[i], 15) ^ SIMDE_ROL32(r_.values[i], 23);
}
return simde_uint32x4_from_private(r_);
#endif
Expand All @@ -224,16 +229,16 @@ simde_vsm3partw2q_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c
b_ = simde_uint32x4_to_private(b),
c_ = simde_uint32x4_to_private(c);
uint32_t tmp2;
tmp_.values[3] = b_.values[3] ^ (ROL32(c_.values[3], 7));
tmp_.values[2] = b_.values[2] ^ (ROL32(c_.values[2], 7));
tmp_.values[1] = b_.values[1] ^ (ROL32(c_.values[1], 7));
tmp_.values[0] = b_.values[0] ^ (ROL32(c_.values[0], 7));
tmp_.values[3] = b_.values[3] ^ (SIMDE_ROL32(c_.values[3], 7));
tmp_.values[2] = b_.values[2] ^ (SIMDE_ROL32(c_.values[2], 7));
tmp_.values[1] = b_.values[1] ^ (SIMDE_ROL32(c_.values[1], 7));
tmp_.values[0] = b_.values[0] ^ (SIMDE_ROL32(c_.values[0], 7));
r_.values[3] = a_.values[3] ^ tmp_.values[3];
r_.values[2] = a_.values[2] ^ tmp_.values[2];
r_.values[1] = a_.values[1] ^ tmp_.values[1];
r_.values[0] = a_.values[0] ^ tmp_.values[0];
tmp2 = ROL32(tmp_.values[0], 15);
tmp2 = tmp2 ^ ROL32(tmp2, 15) ^ ROL32(tmp2, 23);
tmp2 = SIMDE_ROL32(tmp_.values[0], 15);
tmp2 = tmp2 ^ SIMDE_ROL32(tmp2, 15) ^ SIMDE_ROL32(tmp2, 23);
r_.values[3] = r_.values[3] ^ tmp2;

return simde_uint32x4_from_private(r_);
Expand All @@ -244,10 +249,10 @@ simde_vsm3partw2q_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c
#define vsm3partw2q_u32(a, b, c) simde_vsm3partw2q_u32((a), (b), (c))
#endif

#undef ROR32
#undef ROL32
#undef LSR
#undef LSL
#undef SIMDE_ROR32
#undef SIMDE_ROL32
#undef SIMDE_LSR
#undef SIMDE_LSL

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
Expand Down
20 changes: 10 additions & 10 deletions simde/arm/neon/sm4.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

#define ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift)))
#define ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift)))
#define LSR(operand, shift) ((operand) >> (shift))
#define LSL(operand, shift) ((operand) << (shift))
#define SIMDE_ROR32(operand, shift) (((operand) >> (shift)) | ((operand) << (32-shift)))
#define SIMDE_ROL32(operand, shift) (((operand) >> (32-shift)) | ((operand) << (shift)))
#define SIMDE_LSR(operand, shift) ((operand) >> (shift))
#define SIMDE_LSL(operand, shift) ((operand) << (shift))

static const uint8_t simde_sbox_sm4[256] = {
0xd6,0x90,0xe9,0xfe,0xcc,0xe1,0x3d,0xb7,0x16,0xb6,0x14,0xc2,0x28,0xfb,0x2c,0x05,
Expand Down Expand Up @@ -91,7 +91,7 @@ simde_vsm4eq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
_intval[i] = simde_sbox_sm4[_intval[i]];
}
simde_u32_from_u8x4(_intval, &intval);
intval = intval ^ ROL32(intval, 2) ^ ROL32(intval, 10) ^ ROL32(intval, 18) ^ ROL32(intval, 24);
intval = intval ^ SIMDE_ROL32(intval, 2) ^ SIMDE_ROL32(intval, 10) ^ SIMDE_ROL32(intval, 18) ^ SIMDE_ROL32(intval, 24);
intval = intval ^ a_.values[0];

a_.values[0] = a_.values[1];
Expand Down Expand Up @@ -128,7 +128,7 @@ simde_vsm4ekeyq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
_intval[i] = simde_sbox_sm4[_intval[i]];
}
simde_u32_from_u8x4(_intval, &intval);
intval = intval ^ ROL32(intval, 13) ^ ROL32(intval, 23);
intval = intval ^ SIMDE_ROL32(intval, 13) ^ SIMDE_ROL32(intval, 23);
intval = intval ^ a_.values[0];

a_.values[0] = a_.values[1];
Expand All @@ -144,10 +144,10 @@ simde_vsm4ekeyq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#define vsm4ekeyq_u32(a, b) simde_vsm4ekeyq_u32((a), (b))
#endif

#undef ROR32
#undef ROL32
#undef LSR
#undef LSL
#undef SIMDE_ROR32
#undef SIMDE_ROL32
#undef SIMDE_LSR
#undef SIMDE_LSL

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP
Expand Down
2 changes: 1 addition & 1 deletion simde/x86/sse2.h
Original file line number Diff line number Diff line change
Expand Up @@ -5150,7 +5150,7 @@ simde_mm_pause (void) {
#elif defined(SIMDE_LOONGARCH_LSX_NATIVE)
__asm__ __volatile ("dbar 0");
#elif defined(HEDLEY_GCC_VERSION)
#if defined(SIMDE_ARCH_RISCV)
#if defined(SIMDE_ARCH_RISCV32) || defined(SIMDE_ARCH_RISCV64)
__builtin_riscv_pause();
#else
__asm__ __volatile__ ("nop" ::: "memory");
Expand Down
Loading