From 391edf49e5caf44a9397f45c37358a02492593bb Mon Sep 17 00:00:00 2001 From: Chris Geoga Date: Wed, 4 Jan 2023 16:22:50 -0500 Subject: [PATCH] Change the SAA vectors in the EM demo to being stored as a BitArray, reducing 450MiB of serialized data to 5MiB. --- paperscripts/EM/fit.jl | 2 +- paperscripts/EM/setup.jl | 9 ++++----- paperscripts/EM/shared.jl | 5 +++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/paperscripts/EM/fit.jl b/paperscripts/EM/fit.jl index 925db92..4354eb9 100644 --- a/paperscripts/EM/fit.jl +++ b/paperscripts/EM/fit.jl @@ -17,7 +17,7 @@ end function extract_saa(j) _saa = deserialize("./data/saa.jls") # v1.7 serialization! - _saa[:,:,j] + booltosgn.(_saa[:,:,j]) end # Wrapping in a function just to be obviously sure that there is no global scope diff --git a/paperscripts/EM/setup.jl b/paperscripts/EM/setup.jl index 226f511..b495511 100644 --- a/paperscripts/EM/setup.jl +++ b/paperscripts/EM/setup.jl @@ -15,13 +15,12 @@ function simulate(init_seed, n, true_parms, nsamp) (pts=points, data=data) end -# TODO (cg 2023/01/04 10:25): What I should have done here is saved this as a -# BitArray and then converted on the fly to random signs. That would have turned -# 400 MiB of disk to 4 MiB of disk. But at this point I am not interested to -# play with this code. +# Note that this comes out as a BitArray now to save space. Since all we need is +# the sign (each entry is +1 or -1), we can do that with one bit. And so using a +# BitArray instead of storing them as floats turns 450 MiB into 5 MiB. function generate_saa(init_seed, n, m, l) rng = StableRNG(init_seed) - rand(rng, (-1.0, 1.0), n, m, l) + BitArray(sgntobool.(rand(rng, (-1.0, 1.0), n, m, l))) end if !isinteractive() diff --git a/paperscripts/EM/shared.jl b/paperscripts/EM/shared.jl index b1ccb3d..e998b49 100644 --- a/paperscripts/EM/shared.jl +++ b/paperscripts/EM/shared.jl @@ -11,6 +11,11 @@ if !isinteractive() end end +# Just two little trick functions to work with the SAA vectors that have been +# stored in a BitArray. Your computers allocator will thank me. +sgntobool(x) = x > zero(x) +booltosgn(x) = x ? 1.0 : -1.0 + function kernel_nonugget(x, y, p) (sg2, rho, nu, nug2) = p scaledist = norm(x-y)/rho