-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #88 from JuliaMath/better-sin_sum
better sin_sum
- Loading branch information
Showing
3 changed files
with
107 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,92 @@ | ||
# function to more accurately compute cos(x + xn) | ||
# see https://github.com/heltonmc/Bessels.jl/pull/13 | ||
# written by @oscardssmith | ||
function cos_sum(x, xn) | ||
s = x + xn | ||
n, r = reduce_pi02_med(s) | ||
lo = r.lo - ((s - x) - xn) | ||
hi = r.hi + lo | ||
y = Base.Math.DoubleFloat64(hi, r.hi-hi+lo) | ||
n = n&3 | ||
using Base.Math: sin_kernel, cos_kernel, sincos_kernel, rem_pio2_kernel, DoubleFloat64, DoubleFloat32 | ||
|
||
""" | ||
computes sin(sum(xs)) where xs are sorted by absolute value | ||
Doing this is much more accurate than the naive sin(sum(xs)) | ||
""" | ||
function sin_sum(xs::Vararg{T})::T where T<:Base.IEEEFloat | ||
n, y = rem_pio2_sum(xs...) | ||
n &= 3 | ||
if n == 0 | ||
return Base.Math.cos_kernel(y) | ||
return sin_kernel(y) | ||
elseif n == 1 | ||
return -Base.Math.sin_kernel(y) | ||
return cos_kernel(y) | ||
elseif n == 2 | ||
return -Base.Math.cos_kernel(y) | ||
return -sin_kernel(y) | ||
else | ||
return Base.Math.sin_kernel(y) | ||
return -cos_kernel(y) | ||
end | ||
end | ||
# function to more accurately compute sin(x + xn) | ||
function sin_sum(x, xn) | ||
s = x + xn | ||
n, r = reduce_pi02_med(s) | ||
lo = r.lo - ((s - x) - xn) | ||
hi = r.hi + lo | ||
y = Base.Math.DoubleFloat64(hi, r.hi-hi+lo) | ||
n = n&3 | ||
|
||
""" | ||
computes sincos(sum(xs)) where xs are sorted by absolute value | ||
Doing this is much more accurate than the naive sincos(sum(xs)) | ||
""" | ||
function sincos_sum(xs::Vararg{T})::T where T<:Base.IEEEFloat | ||
n, y = rem_pio2_sum(xs...) | ||
n &= 3 | ||
si, co = sincos_kernel(y) | ||
if n == 0 | ||
return Base.Math.sin_kernel(y) | ||
return si, co | ||
elseif n == 1 | ||
return Base.Math.cos_kernel(y) | ||
return co, -si | ||
elseif n == 2 | ||
return -Base.Math.sin_kernel(y) | ||
return -si, -co | ||
else | ||
return -Base.Math.cos_kernel(y) | ||
return -co, si | ||
end | ||
end | ||
|
||
function rem_pio2_sum(xs::Vararg{Float64}) | ||
n = 0 | ||
hi, lo = 0.0, 0.0 | ||
for x in xs | ||
if abs(x) <= pi/4 | ||
s = x + hi | ||
lo += (x - (s - hi)) | ||
else | ||
n_i, y = rem_pio2_kernel(x) | ||
n += n_i | ||
s = y.hi + hi | ||
lo += (y.hi - (s - hi)) + y.lo | ||
end | ||
hi = s | ||
end | ||
while hi > pi/4 | ||
hi -= pi/2 | ||
lo -= 6.123233995736766e-17 | ||
n += 1 | ||
end | ||
while hi < -pi/4 | ||
hi += pi/2 | ||
lo += 6.123233995736766e-17 | ||
n -= 1 | ||
end | ||
return n, DoubleFloat64(hi, lo) | ||
end | ||
|
||
function rem_pio2_sum(xs::Vararg{Float32}) | ||
y = 0.0 | ||
n = 0 | ||
# The minimum cosine or sine of any Float32 that gets reduced is 1.6e-9 | ||
# so reducing at 2^22 prevents catastrophic loss of precision. | ||
# There probably is a case where this loses some digits but it is a decent | ||
# tradeoff between accuracy and speed. | ||
@fastmath for x in xs | ||
if x > 0x1p22 | ||
n_i, y_i = rem_pio2_kernel(Float32(x)) | ||
n += n_i | ||
y += y_i.hi | ||
else | ||
y += x | ||
end | ||
end | ||
n_i, y = rem_pio2_kernel(y) | ||
return n + n_i, DoubleFloat32(y.hi) | ||
end | ||
@inline function reduce_pi02_med(x::Float64) | ||
pio2_1 = 1.57079632673412561417e+00 | ||
|
||
fn = round(x*(2/pi)) | ||
r = muladd(-fn, pio2_1, x) | ||
w = fn * 6.07710050650619224932e-11 | ||
y = r-w | ||
return unsafe_trunc(Int, fn), Base.Math.DoubleFloat64(y, (r-y)-w) | ||
function rem_pio2_sum(xs::Vararg{Float16}) | ||
y = sum(Float64, xs) #Float16 can be losslessly accumulated in Float64 | ||
n, y = rem_pio2_kernel(y) | ||
return n, DoubleFloat32(y.hi) | ||
end |