From 391edf49e5caf44a9397f45c37358a02492593bb Mon Sep 17 00:00:00 2001
From: Chris Geoga <cgeoga@protonmail.com>
Date: Wed, 4 Jan 2023 16:22:50 -0500
Subject: [PATCH] Change the SAA vectors in the EM demo to being stored as a
 BitArray, reducing 450MiB of serialized data to 5MiB.

---
 paperscripts/EM/fit.jl    | 2 +-
 paperscripts/EM/setup.jl  | 9 ++++-----
 paperscripts/EM/shared.jl | 5 +++++
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/paperscripts/EM/fit.jl b/paperscripts/EM/fit.jl
index 925db92..4354eb9 100644
--- a/paperscripts/EM/fit.jl
+++ b/paperscripts/EM/fit.jl
@@ -17,7 +17,7 @@ end
 
 function extract_saa(j)
   _saa  = deserialize("./data/saa.jls") # v1.7 serialization!
-  _saa[:,:,j]
+  booltosgn.(_saa[:,:,j])
 end
 
 # Wrapping in a function just to be obviously sure that there is no global scope
diff --git a/paperscripts/EM/setup.jl b/paperscripts/EM/setup.jl
index 226f511..b495511 100644
--- a/paperscripts/EM/setup.jl
+++ b/paperscripts/EM/setup.jl
@@ -15,13 +15,12 @@ function simulate(init_seed, n, true_parms, nsamp)
   (pts=points, data=data)
 end
 
-# TODO (cg 2023/01/04 10:25): What I should have done here is saved this as a
-# BitArray and then converted on the fly to random signs. That would have turned
-# 400 MiB of disk to 4 MiB of disk. But at this point I am not interested to
-# play with this code. 
+# Note that this comes out as a BitArray now to save space. Since all we need is
+# the sign (each entry is +1 or -1), we can do that with one bit. And so using a
+# BitArray instead of storing them as floats turns 450 MiB into 5 MiB. 
 function generate_saa(init_seed, n, m, l)
   rng = StableRNG(init_seed)
-  rand(rng, (-1.0, 1.0), n, m, l)
+  BitArray(sgntobool.(rand(rng, (-1.0, 1.0), n, m, l)))
 end
 
 if !isinteractive()
diff --git a/paperscripts/EM/shared.jl b/paperscripts/EM/shared.jl
index b1ccb3d..e998b49 100644
--- a/paperscripts/EM/shared.jl
+++ b/paperscripts/EM/shared.jl
@@ -11,6 +11,11 @@ if !isinteractive()
   end
 end
 
+# Just two little trick functions to work with the SAA vectors that have been
+# stored in a BitArray. Your computers allocator will thank me.
+sgntobool(x) = x > zero(x)
+booltosgn(x) = x ? 1.0 : -1.0
+
 function kernel_nonugget(x, y, p)
   (sg2, rho, nu, nug2) = p
   scaledist  = norm(x-y)/rho