Skip to content

Commit

Permalink
Agressive debugging, especially of GC
Browse files Browse the repository at this point in the history
  • Loading branch information
lukstafi committed Dec 14, 2024
1 parent 7d3eeba commit 586c9d4
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 24 deletions.
6 changes: 3 additions & 3 deletions arrayjit/lib/assignments.ml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ let sequence l =
{ asgns = sts; embedded_nodes = embs } { asgns = another_st; embedded_nodes = emb } ->
{ asgns = Seq (sts, another_st); embedded_nodes = Set.union embs emb })

let%diagn1_sexp to_low_level code =
let%diagn2_sexp to_low_level code =
let open Indexing in
let get buffer idcs =
let tn = match buffer with Node tn -> tn | Merge_buffer tn -> tn in
Expand Down Expand Up @@ -362,9 +362,9 @@ let fprint_hum ?name ?static_indices () ppf c =
loop c;
fprintf ppf "@]"

let lower ~unoptim_ll_source ~ll_source ~cd_source ~name static_indices (proc : t) :
let%track6_sexp lower ~unoptim_ll_source ~ll_source ~cd_source ~name static_indices (proc : t) :
Low_level.optimized =
let llc = to_low_level proc in
let llc: Low_level.t = to_low_level proc in
(* Generate the low-level code before outputting the assignments, to force projections. *)
(match cd_source with
| None -> ()
Expand Down
4 changes: 3 additions & 1 deletion arrayjit/lib/backend_impl.ml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ module No_device_buffer_and_copying () :
let get_used_memory () = Atomic.get used_memory

let alloc_impl ~size_in_bytes =
let finalize _ptr = ignore (Atomic.fetch_and_add used_memory ~-size_in_bytes : int) in
let%track7_l_sexp finalize (_ptr : buffer_ptr) : unit =
ignore (Atomic.fetch_and_add used_memory ~-size_in_bytes : int)
in
let ptr = Ctypes.(to_voidp @@ allocate_n int8_t ~count:size_in_bytes) in
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
Stdlib.Gc.finalise finalize ptr;
Expand Down
16 changes: 10 additions & 6 deletions arrayjit/lib/backends.ml
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,10 @@ module Add_buffer_retrieval_and_syncing (Backend : No_buffer_retrieval_or_syncin
Hashtbl.clear s.updating_for)
end

let lower_assignments ?name bindings asgns =
let name = Option.value_or_thunk name ~default:(fun () -> Assignments.get_name_exn asgns) in
let%track6_sexp lower_assignments ?name bindings asgns =
let name : string =
Option.value_or_thunk name ~default:(fun () -> Assignments.get_name_exn asgns)
in
let unoptim_ll_source = Utils.get_debug_formatter ~fname:(name ^ "-unoptimized.ll") in
let ll_source = Utils.get_debug_formatter ~fname:(name ^ ".ll") in
let cd_source = Utils.get_debug_formatter ~fname:(name ^ ".cd") in
Expand Down Expand Up @@ -319,9 +321,11 @@ module Raise_backend (Device : Lowered_backend) : Backend = struct
[@@deriving sexp_of]

let%debug3_sexp compile ?name bindings (comp : Assignments.comp) : code =
let name, lowered = lower_assignments ?name bindings comp.Assignments.asgns in
let code = compile ~name bindings lowered in
let from_prior_context =
let (name : string), (lowered : Low_level.optimized) =
lower_assignments ?name bindings comp.Assignments.asgns
in
let code : Device.code = compile ~name bindings lowered in
let from_prior_context : Tn.t_set =
Set.diff (Assignments.context_nodes ~use_host_memory comp.asgns) comp.embedded_nodes
in
{ from_prior_context; name; lowered; code; expected_merge_node = lowered.Low_level.merge_node }
Expand Down Expand Up @@ -500,7 +504,7 @@ let finalize (type buffer_ptr dev runner event)
&& not (Hashtbl.mem ctx.stream.device.cross_stream_candidates key)
then mem_free ctx.stream data)))

let fresh_backend ?backend_name ?(config = Only_devices_parallel) () =
let%track5_sexp fresh_backend ?backend_name ?(config = Only_devices_parallel) () =
let backend =
match
Option.value_or_thunk backend_name ~default:(fun () ->
Expand Down
3 changes: 2 additions & 1 deletion arrayjit/lib/cc_backend.ml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ let c_compile_and_load ~f_name =
invalid_arg errors);
(* Note: RTLD_DEEPBIND not available on MacOS. *)
let result = { lib = Dl.dlopen ~filename:libname ~flags:[ RTLD_NOW ]; libname } in
Stdlib.Gc.finalise (fun lib -> Dl.dlclose ~handle:lib.lib) result;
let%track7_l_sexp finalize (lib : library) : unit = Dl.dlclose ~handle:lib.lib in
Stdlib.Gc.finalise finalize result;
result

module C_syntax_config (Input : sig
Expand Down
2 changes: 1 addition & 1 deletion arrayjit/lib/cuda_backend.cudajit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ let%track3_sexp cleanup_device (device : device) =
Hashtbl.iter device.cross_stream_candidates ~f:(fun buffer_ptr ->
Cu.Deviceptr.mem_free buffer_ptr)

let%track5_sexp finalize_device device =
let%track5_l_sexp finalize_device (device : device) =
if Atomic.compare_and_set device.released false true then cleanup_device device

let%track3_sexp get_device ~(ordinal : int) : device =
Expand Down
3 changes: 2 additions & 1 deletion arrayjit/lib/gcc_backend.gccjit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -601,8 +601,9 @@ let compile ~(name : string) bindings (lowered : Low_level.optimized) =
(if Utils.settings.output_debug_files_in_build_directory then
let f_name = Utils.build_file @@ name ^ "-gccjit-debug.c" in
Context.dump_to_file ctx ~update_locs:true f_name);
let%track7_l_sexp finalize result = Result.release result in
let result = Context.compile ctx in
Stdlib.Gc.finalise Result.release result;
Stdlib.Gc.finalise finalize result;
Context.release ctx;
{ info; result; bindings; name; params = List.map ~f:snd params }

Expand Down
2 changes: 1 addition & 1 deletion arrayjit/lib/low_level.ml
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ let visit_llc traced_store ~merge_node_id reverse_node_map ~max_visits llc =
Tn.update_memory_mode tn (Hosted (Changed_on_devices Unset)) 38
else Tn.update_memory_mode tn Materialized 36))

let%diagn_sexp check_and_store_virtual traced static_indices top_llc =
let%diagn2_sexp check_and_store_virtual traced static_indices top_llc =
let exception Non_virtual of int in
let static_indices =
Set.of_list (module Indexing.Symbol)
Expand Down
6 changes: 3 additions & 3 deletions arrayjit/lib/ndarray.ml
Original file line number Diff line number Diff line change
Expand Up @@ -386,15 +386,15 @@ let create_array ~debug:_debug prec ~dims init_op =
let size_in_bytes =
(if Array.length dims = 0 then 0 else Array.reduce_exn dims ~f:( * )) * Ops.prec_in_bytes prec
in
let%diagn2_sexp finalizer _result =
let%track7_l_sexp finalizer (_result : t) =
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
[%log "Deleting", _debug, ptr_to_string_hum _result]
[%log3 "Deleting", _debug, ptr_to_string_hum _result]
in
let f prec = as_array prec @@ create_bigarray prec ~dims init_op in
let result = Ops.map_prec { f } prec in
Stdlib.Gc.finalise finalizer result;
let _ : int = Atomic.fetch_and_add used_memory size_in_bytes in
[%debug2_sexp
[%debug3_l_sexp
[%log_block
"create_array";
[%log _debug, ptr_to_string_hum result]]];
Expand Down
14 changes: 8 additions & 6 deletions arrayjit/lib/schedulers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -191,25 +191,27 @@ module Multicore (Backend : For_add_scheduler) :
reader_streams = Hashtbl.create (module Tnode);
})

module Dynarr = Stdlib.Dynarray

let num_devices () = 1
let suggested_num_streams _device = Domain.recommended_domain_count () - 1

let cleanup_stream (stream : stream) =
assert (Domain.is_main_domain ());
let%track7_l_sexp cleanup_stream (stream : stream) : unit =
(* Allow running in parallel. *)
(* assert (Domain.is_main_domain ()); *)
[%log "cleanup_stream: await stream"];
await stream;
let r = stream.runner in
let r : runner = stream.runner in
r.state.keep_spinning <- false;
[%log "cleanup_stream: broadcasting r.state.dev_wait_for_work to wake up the worker"];
Stdlib.Condition.broadcast r.state.dev_wait_for_work;
[%log "cleanup_stream: joining the domain"];
Domain.join r.domain

let get_device ~ordinal =
if ordinal <> 0 then
invalid_arg [%string "Multicore_scheduler.get_device %{ordinal#Int}: only device 0 exists"];
device

let new_stream _device =
let%track5_sexp new_stream _device =
assert (Domain.is_main_domain ());
let stream = spinup_stream () in
Stdlib.Gc.finalise cleanup_stream stream;
Expand Down
2 changes: 1 addition & 1 deletion lib/tensor.ml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ type session_state = {
let session_state =
{ next_id = 0; forward_roots = Map.empty (module Int); backprop_roots = Map.empty (module Int) }

let unsafe_reinitialize () =
let%track5_sexp unsafe_reinitialize () =
session_state.next_id <- 0;
session_state.forward_roots <- Map.empty (module Int);
session_state.backprop_roots <- Map.empty (module Int);
Expand Down

0 comments on commit 586c9d4

Please sign in to comment.