diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 8ce84acb30901..b96d054651785 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -999,8 +999,7 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN return load; } -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false); - +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false, jl_count_box_type log_reason=JL_DONT_LOG_BOX); static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull); static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull) @@ -2672,6 +2671,7 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo) } } ++EmittedArraylen; + // TODO(PR): HERE? Value *t = boxed(ctx, tinfo); Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), @@ -2723,6 +2723,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false) { + // TODO(PR): here? Value *t = boxed(ctx, tinfo); return emit_arrayptr_internal(ctx, tinfo, decay_derived(ctx, t), AddressSpace::Loaded, isboxed); } @@ -2809,6 +2810,7 @@ static Value *emit_array_nd_index( const jl_cgval_t *argv, size_t nidxs, jl_value_t *inbounds) { ++EmittedArrayNdIndex; + // TODO(PR): here? Value *a = boxed(ctx, ainfo); Value *i = Constant::getNullValue(getSizeTy(ctx.builder.getContext())); Value *stride = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1); @@ -3303,7 +3305,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig // dynamically-typed value is required (e.g. argument to unknown function). // if it's already a pointer it's left alone. // Returns ctx.types().T_prjlvalue -static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable) +static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable, jl_count_box_type log_reason) { jl_value_t *jt = vinfo.typ; if (jt == jl_bottom_type || jt == NULL) @@ -3320,6 +3322,17 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab return vinfo.V; } + if (log_reason != JL_DONT_LOG_BOX) { + Function *F; + if (log_reason == JL_COUNT_BOX_INPUTS) { + F = prepare_call(jl_log_box_func_INPUTS); + } else { + F = prepare_call(jl_log_box_func_RETURNS); + } + ctx.builder.CreateCall(F, + literal_pointer_val(ctx, (jl_value_t*)jt)); + } + Value *box; if (vinfo.TIndex) { SmallBitVector skip_none; diff --git a/src/codegen.cpp b/src/codegen.cpp index a4773acb3fbea..d2f6d1cce9a47 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -889,6 +889,20 @@ static const auto jlegalx_func = new JuliaFunction{ AttributeSet(), None); }, }; +static const auto jl_log_box_func_INPUTS = new JuliaFunction{ + XSTR(jl_nhd_log_box_input), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); + }, + nullptr, +}; +static const auto jl_log_box_func_RETURNS = new JuliaFunction{ + XSTR(jl_nhd_log_box_return), + [](LLVMContext &C) { + return FunctionType::get(getVoidTy(C), {getInt8PtrTy(C)}, false); + }, + nullptr, +}; static const auto jl_alloc_obj_func = new JuliaFunction{ "julia.gc_alloc_obj", [](LLVMContext &C) { @@ -4072,7 +4086,7 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *t if (theF) theArgs.push_back(theF); for (size_t i = 0; i < nargs; i++) { - Value *arg = boxed(ctx, argv[i]); + Value *arg = boxed(ctx, argv[i], false, JL_COUNT_BOX_INPUTS); // log the boxes theArgs.push_back(arg); } CallInst *result = ctx.builder.CreateCall(TheTrampoline, theArgs); @@ -4144,6 +4158,9 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_ jl_cgval_t arg = argv[i]; if (isboxed) { assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue); + // Note(NHD): I don't think this needs a log: this is only boxing it if it's actually + // needed by the callee, for already resolved static dispatch, which cannot be + // avoided. argvals[idx] = boxed(ctx, arg); } else if (et->isAggregateType()) { @@ -6723,7 +6740,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret break; } } - ctx.builder.CreateRet(boxed(ctx, retval)); + // NOTE(PR): log this box for the return values + ctx.builder.CreateRet(boxed(ctx, retval, false, JL_COUNT_BOX_RETURNS)); return w; } @@ -7618,7 +7636,11 @@ static jl_llvm_functions_t } } else { - Value *argp = boxed(ctx, theArg); + // This boxes the args? + // TODO(PR): What's this? It didn't seem to have any affect on hash + // I *think* this function is also for static, already resolved dispatch, + // so there's nothing that could be avoided here, and nothing to log. + Value *argp = boxed(ctx, theArg); //, false, true); ctx.builder.CreateStore(argp, vi.boxroot); } } @@ -7996,7 +8018,8 @@ static jl_llvm_functions_t Type *retty = f->getReturnType(); switch (returninfo.cc) { case jl_returninfo_t::Boxed: - retval = boxed(ctx, retvalinfo); // skip the gcroot on the return path + // NOTE(PR): here? return values? + retval = boxed(ctx, retvalinfo, false, JL_COUNT_BOX_RETURNS); // skip the gcroot on the return path break; case jl_returninfo_t::Register: if (type_is_ghost(retty)) @@ -8224,7 +8247,8 @@ static jl_llvm_functions_t else if (VN->getType() == ctx.types().T_prjlvalue) { // Includes the jl_is_uniontype(phiType) && !TindexN case // TODO: if convert_julia_type says it is wasted effort and to skip it, is it worth using Constant::getNullValue(ctx.types().T_prjlvalue) (dynamically)? - V = boxed(ctx, val); + // NOTE(PR): This one is the boxing for hash. + V = boxed(ctx, val, false, JL_COUNT_BOX_INPUTS); } else { // must be careful to emit undef here (rather than a bitcast or diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index 1bcbeb2189f5f..ed5c0ddf578d2 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -7,6 +7,7 @@ #include #include +#include using std::string; using std::vector; @@ -74,10 +75,59 @@ jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT { }; } +#include +#include +#include + // == exported interface == extern "C" { // Needed since these functions doesn't take any arguments. +extern uint64_t num_boxes_inputs = 0; +extern uint64_t num_boxes_returns = 0; + +JL_DLLEXPORT uint64_t jl_nhd_total_boxes() { + return num_boxes_inputs + num_boxes_returns; +} + +JL_DLLEXPORT uint64_t jl_nhd_boxes_inputs() { + return num_boxes_inputs; +} +JL_DLLEXPORT uint64_t jl_nhd_boxes_returns() { + return num_boxes_returns; +} + +static float extra_allocs_rate = 0.0f; +JL_DLLEXPORT void jl_nhd_set_extra_allocs_rate(float rate) { + extra_allocs_rate = rate; +} + +JL_DLLEXPORT void jl_nhd_log_box_input(jl_datatype_t* type) { + // Randomly, with a probability of `extra_allocs_rate`, we will allocate some number of + // extra objects. This is to measure the impact of reducing the number of allocations. + // If the rate is >1, we may allocate more than once. + // We pick a random number between 0 and extra_allocs_rate, then round it, and allocate + // that many extra objects. + // TODO(PR): ... Dunno why sometimes we get an invalid type in here.... + if (jl_is_datatype(type)) { + float num_extra_allocs = extra_allocs_rate; + jl_task_t *ct = jl_current_task; \ + while (num_extra_allocs > 1) { + num_extra_allocs--; + jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); + } + // For the last one, we use a random float to decide whether to allocate or not. + float sample = float(rand()) / float(RAND_MAX); + if (sample < num_extra_allocs) { + jl_gc_alloc(ct->ptls, jl_datatype_size(type), type); + } + } + num_boxes_inputs++; +} +JL_DLLEXPORT void jl_nhd_log_box_return(jl_value_t* _type) { + num_boxes_returns++; +} + JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { // We only need to do this once, the first time this is called. size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h index fcd8e45caa2d8..a20c304d43021 100644 --- a/src/gc-alloc-profiler.h +++ b/src/gc-alloc-profiler.h @@ -17,6 +17,12 @@ extern "C" { // Forward-declaration to avoid dependency in header file. struct jl_raw_alloc_t; // Defined in gc-alloc-profiler.cpp +enum jl_count_box_type { + JL_DONT_LOG_BOX, + JL_COUNT_BOX_INPUTS, + JL_COUNT_BOX_RETURNS, +}; + typedef struct { struct jl_raw_alloc_t *allocs; size_t num_allocs; diff --git a/src/julia.h b/src/julia.h index a34c1f06d0cc1..3ea503af568d2 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1439,6 +1439,8 @@ JL_DLLEXPORT const char *jl_typename_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT const char *jl_typeof_str(jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b); +JL_DLLEXPORT void jl_nhd_log_box(const char* key); + STATIC_INLINE int jl_is_dispatch_tupletype(jl_value_t *v) JL_NOTSAFEPOINT { return jl_is_datatype(v) && ((jl_datatype_t*)v)->isdispatchtuple;