Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EnzymeCore weakdep and an extension with a custom rule for the Levin transformation #97

Merged
merged 17 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021-2022 Michael Helton, Oscar Smith, and contributors
Copyright (c) 2021-2023 Michael Helton, Oscar Smith, Chris Geoga, and contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
8 changes: 7 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@ version = "0.3.0-DEV"
SIMDMath = "5443be0b-e40a-4f70-a07e-dcd652efc383"

[compat]
julia = "1.8"
SIMDMath = "0.2.5"
julia = "1.8"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[weakdeps]
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"

[extensions]
BesselsEnzymeCoreExt = "EnzymeCore"

[targets]
test = ["Test"]
38 changes: 38 additions & 0 deletions ext/BesselsEnzymeCoreExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
module BesselsEnzymeCoreExt

# TODO (cg 2023/05/08 10:02): Compat of any kind.

using Bessels, EnzymeCore
using EnzymeCore.EnzymeRules
using Bessels.Math

# A manual method that separately transforms the `val` and `dval`, because
# sometimes the `val` can converge while the `dval` hasn't, so just using an
# early return or something can give incorrect derivatives in edge cases.
#
# https://github.com/JuliaMath/Bessels.jl/issues/96
#
# and links with for discussion.
#
# TODO (cg 2023/05/08 10:00): I'm not entirely sure how best to "generalize"
# this to cases like a return type of DuplicatedNoNeed, or something being a
# `Enzyme.Const`. These shouldn't in principle affect the "point" of this
# function (which is just to check for convergence before applying a
# function), but on its face this approach would mean I need a lot of
# hand-written extra methods. I have an open issue on the Enzyme.jl repo at
#
# https://github.com/EnzymeAD/Enzyme.jl/issues/786
#
# that gets at this problem a bit. But it's a weird request and I'm sure Billy
# has a lot of asks on his time.
function EnzymeRules.forward(func::Const{typeof(levin_transform)},
::Type{<:Duplicated},
s::Duplicated,
w::Duplicated)
(sv, dv, N) = (s.val, s.dval, length(s.val))
ls = levin_transform(sv, w.val)
dls = levin_transform(dv, w.dval)
Duplicated(ls, dls)
end

end
129 changes: 91 additions & 38 deletions src/BesselFunctions/besselk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -499,38 +499,25 @@ besselk_power_series(v, x::Float32) = Float32(besselk_power_series(v, Float64(x)
besselk_power_series(v, x::ComplexF32) = ComplexF32(besselk_power_series(v, ComplexF64(x)))

function besselk_power_series(v, x::ComplexOrReal{T}) where T
MaxIter = 1000
S = eltype(x)
v, x = S(v), S(x)

z = x / 2
zz = z * z
logz = log(z)
xd2_v = exp(v*logz)
xd2_nv = inv(xd2_v)

# use the reflection identify to calculate gamma(-v)
# use relation gamma(v)*v = gamma(v+1) to avoid two gamma calls
gam_v = gamma(v)
gam_nv = π / (sinpi(-abs(v)) * gam_v * v)
gam_1mv = -gam_nv * v
gam_1mnv = gam_v * v

_t1 = gam_v * xd2_nv * gam_1mv
_t2 = gam_nv * xd2_v * gam_1mnv
(xd2_pow, fact_k, out) = (one(S), one(S), zero(S))
for k in 0:MaxIter
t1 = xd2_pow * T(0.5)
tmp = muladd(_t1, gam_1mnv, _t2 * gam_1mv)
tmp *= inv(gam_1mv * gam_1mnv * fact_k)
term = t1 * tmp
out += term
abs(term / out) < eps(T) && break
(gam_1mnv, gam_1mv) = (gam_1mnv*(one(S) + v + k), gam_1mv*(one(S) - v + k))
xd2_pow *= zz
fact_k *= k + one(S)
Math.isnearint(v) && return besselk_power_series_int(v, x)
MaxIter = 5000
gam = gamma(v)
ngam = π / (sinpi(-abs(v)) * gam * v)

s1, s2 = zero(T), zero(T)
t1, t2 = one(T), one(T)

for k in 1:MaxIter
s1 += t1
s2 += t2
t1 *= x^2 / (4k * (k - v))
t2 *= x^2 / (4k * (k + v))
abs(t1) < eps(T) && break
end
return out

xpv = (x/2)^v
s = gam * s1 + xpv^2 * ngam * s2
return s / (2*xpv)
end
besselk_power_series_cutoff(nu, x::Float64) = x < 2.0 || nu > 1.6x - 1.0
besselk_power_series_cutoff(nu, x::Float32) = x < 10.0f0 || nu > 1.65f0*x - 8.0f0
Expand Down Expand Up @@ -578,15 +565,16 @@ end
@generated function besselkx_levin(v, x::T, ::Val{N}) where {T <: FloatTypes, N}
:(
begin
s_0 = zero(T)
s = zero(T)
t = one(T)
@nexprs $N i -> begin
s_{i} = s_{i-1} + t
t *= (4*v^2 - (2i - 1)^2) / (8 * x * i)
w_{i} = 1 / t
end
sequence = @ntuple $N i -> s_{i}
weights = @ntuple $N i -> w_{i}
s += t
t *= (4*v^2 - (2i - 1)^2) / (8 * x * i)
s_{i} = s
w_{i} = t
heltonmc marked this conversation as resolved.
Show resolved Hide resolved
end
sequence = @ntuple $N i -> s_{i}
weights = @ntuple $N i -> w_{i}
heltonmc marked this conversation as resolved.
Show resolved Hide resolved
return levin_transform(sequence, weights) * sqrt(π / 2x)
end
)
Expand Down Expand Up @@ -614,3 +602,68 @@ end
end
)
end

# This is an expansion of the function
#
# f_0(v, x) = (x^v)*gamma(-v) + (x^(-v))*gamma(v)
# = (x^v)*(gamma(-v) + (x^(-2*v))*gamma(v))
#
# around v ∼ 0. As you can see by plugging that second form into Wolfram Alpha
# and getting an expansion back, this is actually a bivariate polynomial in
# (v^2, log(x)). So that's how this is structured.
@inline function f0_local_expansion_v0(v, x)
lx = log(x)
c0 = evalpoly(lx, (-1.1544313298030657, -2.0))
c2 = evalpoly(lx, ( 1.4336878944573288, -1.978111990655945, -0.5772156649015329, -0.3333333333333333))
c4 = evalpoly(lx, (-0.6290784463642211, -1.4584260788225176, -0.23263776388631713, -0.32968533177599085, -0.048101305408461074, -0.016666666666666666))
evalpoly(v*v, (c0,c2,c4))/2
end

# This function assumes |v| < 1e-6 or 1e-7!
#
# TODO (cg 2023/05/16 18:07): lots of micro-optimizations.
function besselk_power_series_temme_basal(v::V, x::Float64) where{V}
max_iter = 50
T = promote_type(V,Float64)
z = x/2
zz = z*z
fk = f0_local_expansion_v0(v, x/2)
zv = z^v
znv = inv(zv)
gam_1pv = GammaFunctions.gamma_near_1(1+v)
Copy link
Member

@heltonmc heltonmc May 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make sure that these two gamma_near_1 calls are auto-vectorizing. They probably all but it might be worth a @inline declaration on their function definition which should help the SIMD.

This is a good way to do this though as using SIMD will be faster than using reflection formula!

gam_1nv = GammaFunctions.gamma_near_1(1-v)
(pk, qk, _ck, factk, vv) = (znv*gam_1pv/2, zv*gam_1nv/2, one(T), one(T), v*v)
(out_v, out_vp1) = (zero(T), zero(T))
for k in 1:max_iter
# add to the series:
ck = _ck/factk
term_v = ck*fk
term_vp1 = ck*(pk - (k-1)*fk)
out_v += term_v
out_vp1 += term_vp1
# check for convergence:
((abs(term_v) < eps(T)) && (abs(term_vp1) < eps(T))) && break
# otherwise, increment new quantities:
fk = (k*fk + pk + qk)/(k^2 - vv)
pk /= (k-v)
qk /= (k+v)
_ck *= zz
factk *= k
end
(out_v, out_vp1/z)
end

function besselk_power_series_int(v, x::Float64)
v < zero(v) && return besselk_power_series_int(-v, x)
flv = Int(floor(v))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's usually easiest to just indiscrimantely have v = abs(v) at the top. I'm wondering how this line affects derivative information in anyway. As the zero order derivative is obviously zero from this line. Though we don't really need this check because we are checking at the top level but I think this is fine.

I think I generally like using the modf function and I think this would be a good fit for these type of problems.

_v = v - flv
(kv, kvp1) = besselk_power_series_temme_basal(_v, x)
abs(v) < 1/2 && return kv
twodx = 2/x
for _ in 1:(flv-1)
_v += 1
(kv, kvp1) = (kvp1, muladd(twodx*_v, kvp1, kv))
end
kvp1
end

2 changes: 2 additions & 0 deletions src/GammaFunctions/gamma.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,5 @@ function gamma(n::Integer)
n > 20 && return gamma(float(n))
@inbounds return Float64(factorial(n-1))
end

gamma_near_1(x) = evalpoly(x-one(x), (1.0, -0.5772156649015329, 0.9890559953279725, -0.23263776388631713))
8 changes: 6 additions & 2 deletions src/Math/Math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,12 @@ end
#@inline levin_scale(B::T, n, k) where T = -(B + n) * (B + n + k)^(k - one(T)) / (B + n + k + one(T))^k
@inline levin_scale(B::T, n, k) where T = -(B + n + k) * (B + n + k - 1) / ((B + n + 2k) * (B + n + 2k - 1))

@inline @generated function levin_transform(s::NTuple{N, T}, w::NTuple{N, T}) where {N, T <: FloatTypes}
@inline @generated function levin_transform(s::NTuple{N, T},
w::NTuple{N, T}) where {N, T <: FloatTypes}
len = N - 1
:(
begin
@nexprs $N i -> a_{i} = Vec{2, T}((s[i] * w[i], w[i]))
@nexprs $N i -> a_{i} = iszero(w[i]) ? (return s[i]) : Vec{2, T}((s[i] / w[i], 1 / w[i]))
@nexprs $len k -> (@nexprs ($len-k) i -> a_{i} = fmadd(a_{i}, levin_scale(one(T), i, k-1), a_{i+1}))
return (a_1[1] / a_1[2])
end
Expand All @@ -153,4 +154,7 @@ end
)
end

# TODO (cg 2023/05/16 18:09): dispute this cutoff.
cgeoga marked this conversation as resolved.
Show resolved Hide resolved
isnearint(x) = abs(x-round(x)) < 1e-7

end
Loading