-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcompilation_speed.ml
88 lines (81 loc) · 3.07 KB
/
compilation_speed.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
open Base
open Ocannl
module Tn = Arrayjit.Tnode
module IDX = Train.IDX
module CDSL = Train.CDSL
module TDSL = Operation.TDSL
module NTDSL = Operation.NTDSL
module Rand = Arrayjit.Rand.Lib
module type Backend = Arrayjit.Backend_intf.Backend
let fresh_backend = Arrayjit.Backends.fresh_backend
let benchmark_overhead backend () =
let n_data = 20 in
let module Backend = (val backend : Backend) in
(* Utils.settings.with_debug <- true; *)
(* Utils.settings.output_debug_files_in_build_directory <- true; *)
(* Utils.settings.debug_log_from_routines <- true; *)
CDSL.disable_all_debugs ();
Stdio.prerr_endline @@ "\n\n****** Benchmarking " ^ Backend.name ^ " ******";
Rand.init 0;
let init_time = Time_now.nanoseconds_since_unix_epoch () in
let%op f = (3 *. ("x" [ 5 ] **. 2)) - (4 *. x) + 5 in
Train.set_hosted f.value;
(* Train.every_non_literal_on_host f; *)
let stream = Backend.(new_stream @@ get_device ~ordinal:0) in
let ctx = Backend.make_context stream in
let init_mem = Backend.(get_used_memory stream.device) in
let update_f = Train.grad_update f in
(* Initialize the context with a mock update of x to ensure that it is not optimized as a
constant. *)
let%cd mock_update_x = x =: 42 in
let init_assign_x =
Train.to_routine (module Backend) ctx ~name:"init_assign_x" IDX.empty mock_update_x
in
let f_routine =
Train.to_routine (module Backend) init_assign_x.context IDX.empty update_f.fwd_bprop
in
Tensor.print_tree ~with_grad:true ~with_backend_info:true ~depth:9 f;
let xs = Array.init n_data ~f:Float.(fun i -> of_int i - (of_int n_data /. 2.)) in
let open Operation.At in
(* Note: this compiles entirely fresh code for each step of the loop. *)
let ys =
Array.map xs ~f:(fun v ->
let%cd update_x = x =: !.v in
let assign_x =
Train.to_routine (module Backend) f_routine.context ~name:"assign_x" IDX.empty update_x
in
Train.run assign_x;
Train.run f_routine;
f.@[0])
in
let plot_box =
PrintBox_utils.plot ~small:true ~x_label:"x" ~y_label:"f(x)"
[ Scatterplot { points = Array.zip_exn xs ys; content = PrintBox.line "#" } ]
in
let final_time = Time_now.nanoseconds_since_unix_epoch () in
let time_in_sec = Int63.(to_float @@ (final_time - init_time)) /. 1000_000_000. in
let mem_in_bytes = Backend.(get_used_memory stream.device) - init_mem in
let result =
PrintBox_utils.Benchmark
{
bench_title = Backend.name ^ " overhead";
time_in_sec;
mem_in_bytes;
result_label = "x, f(x)";
result =
[%sexp_of: (float * float) list]
@@ [ (xs.(0), ys.(0)); (xs.(n_data / 2), ys.(n_data / 2)) ];
}
in
PrintBox_text.output Stdio.stdout plot_box;
Stdio.print_endline "\n";
result
let benchmarks =
[
(* benchmark_overhead (fresh_backend "gccjit" ()); *)
benchmark_overhead (fresh_backend ~backend_name:"cc" ());
benchmark_overhead (fresh_backend ~backend_name:"cuda" ());
]
let () =
List.map benchmarks ~f:(fun bench -> bench ())
|> PrintBox_utils.table |> PrintBox_text.output Stdio.stdout