From 1075472af267695730adba03311d5b62b266cdf9 Mon Sep 17 00:00:00 2001 From: Rafael Gago Date: Tue, 31 Aug 2021 20:49:25 +0200 Subject: [PATCH] nearbyhint: Fix for ffast-math The fix on 45cad81a305b09c4b1c9db55c23fcbb0f4e01ee4 wasn't working on Clang. On ffast-math the compiler is free to assume that "x + v -v = x". 45cad81a305 was workarounding this fact by storing "x + v" on a volatile variable. For Clang this wasn't enough to stop optimizing, as it correctly detected that the variable is local-scope, so no one can take a reference to it. This commit reworks the fix by defining a function to do the operation and disabling optimizations on that function for all supported compilers (and those using the same frontend). For non-supported compilers an #error is emitted, as the workaround wasn't safe enough. It could even break between compiler versions. This avoids potentially weird behaviour on the future. --- .../xsimd/arch/generic/xsimd_generic_math.hpp | 46 ++++++++++++++----- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/include/xsimd/arch/generic/xsimd_generic_math.hpp b/include/xsimd/arch/generic/xsimd_generic_math.hpp index 56e4d98bb..a5d2b57a6 100644 --- a/include/xsimd/arch/generic/xsimd_generic_math.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_math.hpp @@ -1707,6 +1707,40 @@ namespace xsimd { } +#if !defined(__FAST_MATH__) + template + T conformant_add_then_sub (T x, T v) + { + return x + v - v; + } +#else +#if defined(__clang__) + // available on clang 4 + #define XSIMD_NO_OPTIMIZATION_ATTRIBUTE __attribute__((optnone)) + #define XSIMD_NO_OPTIMIZATION_PRAGMA +#elif defined(__GNUC__) + // available on GCC 4.9 + #define XSIMD_NO_OPTIMIZATION_ATTRIBUTE __attribute__((optimize("O0"))) + #define XSIMD_NO_OPTIMIZATION_PRAGMA +#elif defined(_MSC_VER) + // available Visual Studio 2015 + #define XSIMD_NO_OPTIMIZATION_ATTRIBUTE + #define XSIMD_NO_OPTIMIZATION_PRAGMA __pragma(optimize("", off)) +#else + // Under fast-math, the compiler will assume (x - v + v = x). + // + // This error is hit it is because you are using an unsuported compiler. + // Consider submitting a patch, as workaunding it is easy. + #error "Unoptimized version of x + y - y required. See the code for details." +#endif + XSIMD_NO_OPTIMIZATION_PRAGMA + template + XSIMD_NO_OPTIMIZATION_ATTRIBUTE T conformant_add_then_sub (T x, T v) + { + return x + v - v; + } +#endif + // nearbyint template::value, void>::type> batch nearbyint(batch const& self, requires_arch) { @@ -1718,16 +1752,7 @@ namespace xsimd { batch_type s = bitofsign(self); batch_type v = self ^ s; batch_type t2n = constants::twotonmb(); - // Under fast-math, reordering is possible and the compiler optimizes d - // to v. That's not what we want, so prevent compiler optimization here. - // FIXME: it may be better to emit a memory barrier here (?). -#ifdef __FAST_MATH__ - volatile batch_type d0 = v + t2n; - batch_type d = *(batch_type*)(void*)(&d0) - t2n; -#else - batch_type d0 = v + t2n; - batch_type d = d0 - t2n; -#endif + batch_type d = conformant_add_then_sub (v, t2n); return s ^ select(v < t2n, d, v); } } @@ -2199,4 +2224,3 @@ namespace xsimd { } #endif -