Skip to content

Commit

Permalink
variability change to gpu code
Browse files Browse the repository at this point in the history
  • Loading branch information
palumbom committed Nov 15, 2024
1 parent 5ebc758 commit 287fec6
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 55 deletions.
20 changes: 11 additions & 9 deletions src/GRASS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,12 @@ import Polynomials: fit as pfit, coeffs
import Base: AbstractArray as AA
import Base: AbstractFloat as AF

# configure directories
include("config.jl")

# get kernels for SPICE stuff
include("get_kernels.jl")

#set required body paramters as global variables
#E,S,M radii (units:km)
earth_radius = bodvrd("EARTH", "RADII")[1]
Expand All @@ -45,12 +50,12 @@ sun_radius = bodvrd("SUN","RADII")[1]
moon_radius = bodvrd("MOON", "RADII")[1]

#collect LD info as global variables - (units: nm)
quad_ld_coeff_SSD = CSV.read("data/quad_ld_coeff_SSD.csv", DataFrame)
quad_ld_coeff_300 = CSV.read("data/quad_ld_coeff_300.csv", DataFrame)
quad_ld_coeff_HD = CSV.read("data/quad_ld_coeff_HD.csv", DataFrame)
quad_ld_coeff_NL94 = CSV.read("data/quad_ld_coeff_NL94.csv", DataFrame)
quad_ld_coeff_SSD = CSV.read(joinpath(datdir, "quad_ld_coeff_SSD.csv"), DataFrame)
quad_ld_coeff_300 = CSV.read(joinpath(datdir, "quad_ld_coeff_300.csv"), DataFrame)
quad_ld_coeff_HD = CSV.read(joinpath(datdir, "quad_ld_coeff_HD.csv"), DataFrame)
quad_ld_coeff_NL94 = CSV.read(joinpath(datdir, "quad_ld_coeff_NL94.csv"), DataFrame)

NL94_coeff = CSV.read("data/NL94_coeff.csv", DataFrame)
NL94_coeff = CSV.read(joinpath(datdir, "NL94_coeff.csv"), DataFrame)
lambda_nm = NL94_coeff.lambda_nm
a0 = NL94_coeff.a0
a1 = NL94_coeff.a1
Expand All @@ -59,10 +64,7 @@ a3 = NL94_coeff.a3
a4 = NL94_coeff.a4
a5 = NL94_coeff.a5

extinction_coeff = DataFrame(CSV.File("data/NEID_three_extinction.csv"))

# configure directories
include("config.jl")
extinction_coeff = DataFrame(CSV.File(joinpath(datdir, "NEID_three_extinction.csv")))

# ancillary functions + constants
include("utils.jl")
Expand Down
39 changes: 16 additions & 23 deletions src/get_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,28 @@ const BPC = "https://naif.jpl.nasa.gov/pub/naif/generic_kernels/pck/moon_pa_de44
const EARTH_BPC = "https://naif.jpl.nasa.gov/pub/naif/generic_kernels/pck/earth_latest_high_prec.bpc"
const EARTH_default = "https://naif.jpl.nasa.gov/pub/naif/generic_kernels/fk/planets/earth_assoc_itrf93.tf"
const TPC = "https://naif.jpl.nasa.gov/pub/naif/generic_kernels/pck/pck00010.tpc"
const moddir = abspath(joinpath(@__DIR__, ".."))
const datdir = joinpath(moddir, "data/")

if !isdir(datdir)
mkdir(datdir)
end

# function to download and load kernels
function get_kernels()
# Download kernels
if !isfile(joinpath(datdir, "de440.bsp")); download(SPK, datdir * "de440.bsp"); end
if !isfile(datdir * "naif0012.tls"); download(LSK, datdir * "naif0012.tls"); end
if !isfile(datdir * "pck00010.tpc"); download(TPC, datdir * "pck00010.tpc"); end
if !isfile(datdir * "jup365.bsp"); download(SPK_JUP, datdir * "jup365.bsp"); end
if !isfile(datdir * "moon_pa_de440_200625.bpc"); download(BPC, datdir * "moon_pa_de440_200625.bpc"); end
if !isfile(datdir * "earth_latest_high_prec.bpc"); download(EARTH_BPC, datdir * "earth_latest_high_prec.bpc"); end
if !isfile(datdir * "earth_assoc_itrf93.tf"); download(EARTH_default, datdir * "earth_assoc_itrf93.tf"); end

if !isfile(joinpath(datdir, "de440.bsp")); download(SPK, joinpath(datdir, "de440.bsp")); end
if !isfile(joinpath(datdir, "naif0012.tls")); download(LSK, joinpath(datdir, "naif0012.tls")); end
if !isfile(joinpath(datdir, "pck00010.tpc")); download(TPC, joinpath(datdir, "pck00010.tpc")); end
if !isfile(joinpath(datdir, "jup365.bsp")); download(SPK_JUP, joinpath(datdir, "jup365.bsp")); end
if !isfile(joinpath(datdir, "moon_pa_de440_200625.bpc")); download(BPC, joinpath(datdir, "moon_pa_de440_200625.bpc")); end
if !isfile(joinpath(datdir, "earth_latest_high_prec.bpc")); download(EARTH_BPC, joinpath(datdir, "earth_latest_high_prec.bpc")); end
if !isfile(joinpath(datdir, "earth_assoc_itrf93.tf")); download(EARTH_default, joinpath(datdir, "earth_assoc_itrf93.tf")); end

# load into memory
furnsh(datdir * "naif0012.tls")
furnsh(datdir * "de440.bsp")
furnsh(datdir * "moon_pa_de440_200625.bpc")
furnsh(datdir * "pck00010.tpc")
furnsh(datdir * "earth_latest_high_prec.bpc")
furnsh(datdir * "earth_assoc_itrf93.tf")
furnsh(datdir * "jup365.bsp")
# furnsh(datdir * "eclipse_SPK.bsp")
furnsh(datdir * "ID_mapping.txt")
furnsh(joinpath(datdir, "naif0012.tls"))
furnsh(joinpath(datdir, "de440.bsp"))
furnsh(joinpath(datdir, "moon_pa_de440_200625.bpc"))
furnsh(joinpath(datdir, "pck00010.tpc"))
furnsh(joinpath(datdir, "earth_latest_high_prec.bpc"))
furnsh(joinpath(datdir, "earth_assoc_itrf93.tf"))
furnsh(joinpath(datdir, "jup365.bsp"))
# furnsh(joinpath(datdir, "eclipse_SPK.bsp"))
furnsh(joinpath(datdir, "ID_mapping.txt"))

return nothing
end
Expand Down
33 changes: 21 additions & 12 deletions src/gpu/gpu_sim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,27 @@ function disk_sim_gpu(spec::SpecParams{T1}, disk::DiskParams{T1}, soldata::GPUSo
threads5 = 1024
blocks5 = cld(CUDA.length(prof), prod(threads5))

# allocate arrays for fresh copy of input data to copy to each loop
# allocate destinations for interpolations
@cusync begin
bisall_gpu_loop = CUDA.zeros(T2, CUDA.size(bisall_gpu))
intall_gpu_loop = CUDA.zeros(T2, CUDA.size(intall_gpu))
widall_gpu_loop = CUDA.zeros(T2, CUDA.size(widall_gpu))
bisall_gpu_loop = CUDA.copy(bisall_gpu)
intall_gpu_loop = CUDA.copy(intall_gpu)
widall_gpu_loop = CUDA.copy(widall_gpu)
end

# allocate memory for means
@cusync begin
bisall_mean = CUDA.zeros(CUDA.eltype(bisall_gpu_loop), 100, CUDA.size(bisall_gpu_loop, 3))
intall_mean = CUDA.zeros(CUDA.eltype(intall_gpu_loop), 100, CUDA.size(intall_gpu_loop, 3))
widall_mean = CUDA.zeros(CUDA.eltype(widall_gpu_loop), 100, CUDA.size(widall_gpu_loop, 3))
end

threads6 = (4, 16)
blocks6 = cld(length(lenall_gpu) * 100, prod(threads6))

@cusync @cuda threads=threads6 blocks=blocks6 time_average_bis!(lenall_gpu, bisall_mean, intall_mean,
widall_mean, bisall_gpu, intall_gpu,
widall_gpu)

# get weighted disk average cbs
@cusync sum_wts = CUDA.sum(wts)
@cusync z_cbs_avg = CUDA.sum(z_cbs .* wts) / sum_wts
Expand All @@ -75,19 +89,14 @@ function disk_sim_gpu(spec::SpecParams{T1}, disk::DiskParams{T1}, soldata::GPUSo

# loop over lines to synthesize
for l in eachindex(spec.lines)
# get a fresh copy of the untrimmed bisector + width data
@cusync begin
CUDA.copyto!(bisall_gpu_loop, bisall_gpu)
CUDA.copyto!(intall_gpu_loop, intall_gpu)
CUDA.copyto!(widall_gpu_loop, widall_gpu)
end

# trim all the bisector data
@cusync @cuda threads=threads2 blocks=blocks2 trim_bisector_gpu!(spec.depths[l], spec.variability[l],
depcontrast_gpu, lenall_gpu,
bisall_gpu_loop, intall_gpu_loop,
widall_gpu_loop, bisall_gpu,
intall_gpu, widall_gpu)
intall_gpu, widall_gpu,
bisall_mean, intall_mean,
widall_mean)

# assemble line shape on even int grid
@cusync @cuda threads=threads3 blocks=blocks3 fill_workspaces!(spec.lines[l], spec.variability[l],
Expand Down
75 changes: 64 additions & 11 deletions src/gpu/gpu_trim.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
function trim_bisector_gpu!(depth, variability, depcontrast, lenall, bisall_out,
intall_out, widall_out, bisall_in, intall_in, widall_in)
intall_out, widall_out, bisall_in, intall_in, widall_in,
bisall_mean, intall_mean, widall_mean)
# get indices from GPU blocks + threads
idx = threadIdx().x + blockDim().x * (blockIdx().x-1)
sdx = blockDim().x * gridDim().x
Expand All @@ -20,18 +21,19 @@ function trim_bisector_gpu!(depth, variability, depcontrast, lenall, bisall_out,
continue
end

# get views of the correct time slice in input
bist_in = CUDA.view(bisall_in, :, j, i)
intt_in = CUDA.view(intall_in, :, j, i)

# get views of the correct time slice in output
bist_out = CUDA.view(bisall_out, :, j, i)
intt_out = CUDA.view(intall_out, :, j, i)

# get step size for loop over length of bisector
step = dtrim/(CUDA.length(intt_in) - 1)
widt_out = CUDA.view(widall_out, :, j, i)

if variability
# get views of the correct time slice in input
bist_in = CUDA.view(bisall_in, :, j, i)
intt_in = CUDA.view(intall_in, :, j, i)

# get step size for loop over length of bisector
step = dtrim/(CUDA.length(intt_in) - 1)

# set up interpolator
itp = linear_interp_gpu(intt_in, bist_in)

Expand All @@ -44,13 +46,64 @@ function trim_bisector_gpu!(depth, variability, depcontrast, lenall, bisall_out,
@inbounds intt_out[k] = new_intt
end
else
# get views of the correct time slice in input
bist_in = CUDA.view(bisall_mean, :, i)
intt_in = CUDA.view(intall_mean, :, i)
widt_in = CUDA.view(widall_mean, :, i)

# get step size for loop over length of bisector
step = dtrim/(CUDA.length(intt_in) - 1)

# set up interpolator
itp = linear_interp_gpu(intt_in, bist_in)

# loop over the length of the bisector
for k in idz:sdz:CUDA.size(bisall_in, 1)
@inbounds intt_out[k] = (1.0 - dtrim) + (k-1) * step
@inbounds bist_out[k] = 0.0
@inbounds widall_out[k,j,i] = widall_in[k,1,i]
new_intt = (1.0 - dtrim) + (k-1) * step
if (1.0 - dtrim) >= CUDA.first(intt_in)
@inbounds bist_out[k] = itp(new_intt)
end
@inbounds intt_out[k] = new_intt
@inbounds widt_out[k] = widt_in[k]
end
end
end
end
return nothing
end

function time_average_bis!(lenall, bisall_mean, intall_mean, widall_mean, bisall_in, intall_in, widall_in)
# get indices from GPU blocks + threads
idx = threadIdx().x + blockDim().x * (blockIdx().x-1)
sdx = blockDim().x * gridDim().x
idy = threadIdx().y + blockDim().y * (blockIdx().y-1)
sdy = blockDim().y * gridDim().y
# idz = threadIdx().z + blockDim().z * (blockIdx().z-1)
# sdz = blockDim().z * gridDim().z

# loop over disk positions for bisectors
for i in idx:sdx:CUDA.length(lenall)

# loop over intensity in bisector
for k in idy:sdy:CUDA.size(bisall_in, 1)

# holder for mean
bis_sum = CUDA.zero(eltype(bisall_mean))
int_sum = CUDA.zero(eltype(intall_mean))
wid_sum = CUDA.zero(eltype(widall_mean))

# loop over epochs of bisectors
for j in 1:lenall[i]
bis_sum += bisall_in[k, j, i]
int_sum += intall_in[k, j, i]
wid_sum += widall_in[k, j, i]
end

# take the mean and allocate
@inbounds bisall_mean[k, i] = bis_sum / lenall[i]
@inbounds intall_mean[k, i] = int_sum / lenall[i]
@inbounds widall_mean[k, i] = wid_sum / lenall[i]
end
end
return nothing
end

0 comments on commit 287fec6

Please sign in to comment.