diff --git a/examples/vtune_integration/basic_usage_vtune_profiler.py b/examples/vtune_integration/basic_usage_vtune_profiler.py new file mode 100644 index 000000000..36f0734b9 --- /dev/null +++ b/examples/vtune_integration/basic_usage_vtune_profiler.py @@ -0,0 +1,22 @@ +import pandas as pd +from numba import njit +import sdc +import vtune as vt +from vtune import task_begin, task_end, domain, string_handle_create +import ctypes +import itt + +handle = string_handle_create("Function") + + +@njit +def dataframe_head(df): + task_begin(domain, handle) + series = df['A'].head(n=4) + task_end(domain) + return series + + +df = pd.DataFrame({'A': [1, 2, 4, 6, 4, 2], 'B': [3., 2., 77., 2., 5., 6.5]}) + +print(dataframe_head(df)) diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index 696544ce1..816fdc5f0 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -67,6 +67,7 @@ from sdc.functions import numpy_like from sdc.hiframes.api import isna from sdc.datatypes.hpat_pandas_groupby_functions import init_series_groupby +from vtune import vtune_profiling_overload from .pandas_series_functions import apply from .pandas_series_functions import map as _map @@ -2268,6 +2269,7 @@ def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None @sdc_overload_method(SeriesType, 'head') +@vtune_profiling_overload(name_handle="Series_head") def hpat_pandas_series_head(self, n=5): """ Intel Scalable Dataframe Compiler User Guide diff --git a/sdc/hiframes/boxing.py b/sdc/hiframes/boxing.py index 26dbb9904..ed17c62ab 100644 --- a/sdc/hiframes/boxing.py +++ b/sdc/hiframes/boxing.py @@ -45,6 +45,7 @@ box_categorical_array, unbox_categorical_array) from sdc.hiframes.pd_series_ext import SeriesType from sdc.hiframes.pd_series_type import _get_series_array_type +from vtune import vtune_profiling_boxing from .. import hstr_ext import llvmlite.binding as ll @@ -74,6 +75,7 @@ def typeof_pd_str_series(val, c): @unbox(DataFrameType) +@vtune_profiling_boxing(name_handle="Dataframe_unbox") def unbox_dataframe(typ, val, c): """unbox dataframe to an empty DataFrame struct columns will be extracted later if necessary. @@ -196,6 +198,7 @@ def _infer_index_type(index): @box(DataFrameType) +@vtune_profiling_boxing(name_handle="Dataframe_box") def box_dataframe(typ, val, c): context = c.context builder = c.builder @@ -286,6 +289,7 @@ def codegen(context, builder, sig, args): @unbox(SeriesType) +@vtune_profiling_boxing(name_handle="Series_unbox") def unbox_series(typ, val, c): arr_obj = c.pyapi.object_getattr_string(val, "values") series = cgutils.create_struct_proxy(typ)(c.context, c.builder) @@ -322,6 +326,7 @@ def _unbox_series_data(dtype, data_typ, arr_obj, c): @box(SeriesType) +@vtune_profiling_boxing(name_handle="Series_box") def box_series(typ, val, c): """ """ diff --git a/vtune.py b/vtune.py new file mode 100644 index 000000000..40f60a411 --- /dev/null +++ b/vtune.py @@ -0,0 +1,97 @@ +import ctypes +import itt +import llvmlite.binding as ll +from llvmlite.llvmpy.core import Type as LLType +from llvmlite import ir as lir +from llvmlite.llvmpy.core import Constant +from inspect import signature +import numba + +functype_domain = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_char_p) +ctypes_domain = functype_domain(itt.__itt_domain_create) +domain = ctypes_domain(b"VTune.Profiling.SDC\0") + +functype_string_handle_create = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_char_p) +ctypes_string_handle_create = functype_string_handle_create(itt.__itt_string_handle_create) + +functype_task_begin = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p) +task_begin = functype_task_begin(itt.__itt_task_begin) + +functype_task_end = ctypes.CFUNCTYPE(None, ctypes.c_void_p) +task_end = functype_task_end(itt.__itt_task_end) + +ll.add_symbol('__itt_task_begin_new', itt.__itt_task_begin) +ll.add_symbol('__itt_task_end_new', itt.__itt_task_end) + + +def string_handle_create(string): + return ctypes_string_handle_create(string.encode()) + + +def vtune_profiling_boxing(name_handle): + def task(func): + handle = string_handle_create(name_handle) + + def wrapper(typ, val, c): + fnty = LLType.function(LLType.void(), [c.pyapi.voidptr, c.pyapi.voidptr]) + fn = c.pyapi._get_function(fnty, name="__itt_task_begin_new") + domain_const = lir.Constant(LLType.int(64), domain) + handle_const = lir.Constant(LLType.int(64), handle) + c.builder.call(fn, [Constant.inttoptr(domain_const, c.pyapi.voidptr), + Constant.inttoptr(handle_const, c.pyapi.voidptr)]) + + return_value = func(typ, val, c) + + fnty_end = LLType.function(LLType.void(), [c.pyapi.voidptr]) + fn_end = c.pyapi._get_function(fnty_end, name="__itt_task_end_new") + c.builder.call(fn_end, [Constant.inttoptr(domain_const, c.pyapi.voidptr)]) + + return return_value + + return wrapper + + return task + + +def vtune_profiling_overload(name_handle): + def task(func): + handle = string_handle_create(name_handle) + args = signature(func) + return exec_impl(func, handle) + + return task + + +def codegen_exec_impl(func, handle): + sig = signature(func) + sig_str = str(sig) + args_str = ', '.join(sig.parameters.keys()) + func_lines = [f"def wrapper{sig_str}:", + f" overload_result = func({args_str})", + f" result = numba.njit(overload_result)", + f" def for_jit{sig_str}:", + f" task_begin(domain, handle)", + f" return_value = result({args_str})", + f" task_end(domain)", + f" return return_value", + f" return for_jit"] + + func_text = '\n'.join(func_lines) + global_vars = {"func": func, + "numba": numba, + "domain": domain, + "handle": handle, + "task_begin": task_begin, + "task_end": task_end + } + + return func_text, global_vars + + +def exec_impl(func, handle): + func_text, global_vars = codegen_exec_impl(func, handle) + loc_vars = {} + exec(func_text, global_vars, loc_vars) + _impl = loc_vars['wrapper'] + + return _impl