Skip to content

Commit

Permalink
Remove some comptime stuff that wasn't really necessary for perf in the
Browse files Browse the repository at this point in the history
rchol.
  • Loading branch information
Chris Geoga committed Apr 3, 2023
1 parent 55a7b47 commit 3143d9d
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
7 changes: 3 additions & 4 deletions src/rcholesky.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,15 @@ end
# TODO (cg 2022/05/30 11:05): Continually look to squeeze allocations out of
# here. Maybe I can pre-allocate things for the BLAS calls, even?
function rchol_instantiate!(strbuf::RCholesky{T}, V::VecchiaConfig{H,D,F},
params::AbstractVector{T},
::Val{Z}, ::Val{N}) where{H,D,F,T,Z,N}
params::AbstractVector{T}, ::Val{Z}) where{H,D,F,T,Z}
checkthreads()
@assert !strbuf.is_instantiated[] RCHOL_INSTANTIATE_ERROR
strbuf.is_instantiated[] = true
kernel = V.kernel
cpts_sz = V.chunksize*V.blockrank
pts_sz = V.chunksize
# allocate three buffers:
bufs = allocate_crchol_bufs(Val(N), Val(D), Val(Z), cpts_sz, pts_sz)
bufs = allocate_crchol_bufs(Threads.nthreads(), Val(D), Val(Z), cpts_sz, pts_sz)
# do the main loop:
m = cld(length(V.condix), Threads.nthreads())
@sync for (i, chunk) in enumerate(Iterators.partition(1:length(V.condix), m))
Expand Down Expand Up @@ -115,7 +114,7 @@ function rchol(V::VecchiaConfig{H,D,F}, params::AbstractVector{T};
# compute the out type and the number of threads to pass in as vals:
Z = promote_type(H, T)
N = Threads.nthreads()
rchol_instantiate!(out, V, params, Val(Z), Val(N))
rchol_instantiate!(out, V, params, Val(Z))
out
end

Expand Down
8 changes: 8 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,14 @@ end
end
end

# I think I went a little overboard with the compile-time stuff above. This
# allocation happens once in a function call that will take a long time for real
# problems, so it doesn't seem worth the compiler stress.
function allocate_crchol_bufs(n::Int64, ::Val{D}, ::Val{Z},
cpts_sz, pts_sz) where{N,D,Z}
[crcholbuf(Val(D), Val(Z), cpts_sz, pts_sz) for _ in 1:n]
end

function pretty_print_number(x)
if x < zero(x)
@printf "-%06.3f " abs(x)
Expand Down

0 comments on commit 3143d9d

Please sign in to comment.