Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

impl loc callable #501

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,12 @@ def hpat_pandas_series_iloc_impl(self, idx):

return hpat_pandas_series_iloc_impl

def hpat_pandas_series_iloc_callable_impl(self, idx):
index = numpy.asarray(list(map(idx, self._series._data)))
return pandas.Series(self._series._data[index], self._series.index[index], self._series._name)
if isinstance(idx, types.Callable):
def hpat_pandas_series_iloc_callable_impl(self, idx):
index = numpy.asarray(list(map(idx, self._series._data)))
return pandas.Series(self._series._data[index], self._series.index[index], self._series._name)

return hpat_pandas_series_iloc_callable_impl
return hpat_pandas_series_iloc_callable_impl

raise TypingError('{} The index must be an Integer, Slice or List of Integer or a callable.\
Given: {}'.format(_func_name, idx))
Expand All @@ -124,6 +125,8 @@ def hpat_pandas_series_iat_impl(self, idx):
# Note: Loc return Series
# Note: Index 0 in slice not supported
# Note: Loc slice and callable with String not implement
# Note: Loc callable return float Series
series_dtype = self.series.data.dtype
index_is_none = (self.series.index is None or
isinstance(self.series.index, numba.types.misc.NoneType))
if isinstance(idx, types.SliceType) and index_is_none:
Expand All @@ -139,7 +142,7 @@ def hpat_pandas_series_loc_slice_noidx_impl(self, idx):

return hpat_pandas_series_loc_slice_noidx_impl

if isinstance(idx, (int, types.Integer, types.UnicodeType, types.StringLiteral)):
if isinstance(idx, (int, types.Number, types.UnicodeType, types.StringLiteral)):
def hpat_pandas_series_loc_impl(self, idx):
index = self._series.index
mask = numpy.empty(len(self._series._data), numpy.bool_)
Expand All @@ -149,11 +152,28 @@ def hpat_pandas_series_loc_impl(self, idx):

return hpat_pandas_series_loc_impl

if isinstance(idx, types.Callable):
def hpat_pandas_series_loc_callable_impl(self, idx):
series = self._series
index = series.index
res = numpy.asarray(list(map(idx, self._series._data)))
new_series = pandas.Series(numpy.empty(0, numpy.float64), numpy.empty(0, series_dtype), series._name)
for i in numba.prange(len(res)):
tmp = series.loc[res[i]]
if len(tmp) > 0:
new_series = new_series.append(tmp)
else:
new_series = new_series.append(pandas.Series(numpy.array([numpy.nan]), numpy.array([res[i]])))

return new_series

return hpat_pandas_series_loc_callable_impl

raise TypingError('{} The index must be an Number, Slice, String, List, Array or a callable.\
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we need to remove the exception raising.

Given: {}'.format(_func_name, idx))

if accessor == 'at':
if isinstance(idx, (int, types.Integer, types.UnicodeType, types.StringLiteral)):
if isinstance(idx, (int, types.Number, types.UnicodeType, types.StringLiteral)):
def hpat_pandas_series_at_impl(self, idx):
index = self._series.index
mask = numpy.empty(len(self._series._data), numpy.bool_)
Expand Down
18 changes: 18 additions & 0 deletions sdc/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,24 @@ def test_impl(A):
S = pd.Series([2, 4, 6], ['1', '3', '5'])
np.testing.assert_array_equal(hpat_func(S), test_impl(S))

@skip_parallel
@skip_sdc_jit('Not impl in old style')
def test_series_loc_callable(self):
def test_impl(S):
return S.loc[lambda a: a ** 2]
hpat_func = self.jit(test_impl)
S = pd.Series([0, 6, 4, 7, 8], [0, 6, 66, 6, 8])
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

# Loc callable return float Series
@unittest.expectedFailure
def test_series_loc_callable2(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about merging of test_series_loc_callable and test_series_loc_callable2?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not do that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because implementation has limit and always return float Series. In that case pandas return int Series, and we return float Series. I wrote about it in notes in loc.
Test skips with expected failure

def test_impl(S):
return S.loc[lambda a: a]
hpat_func = self.jit(test_impl)
S = pd.Series([0, 6, 8, 8, 8], [0, 6, 66, 6, 8])
pd.testing.assert_series_equal(hpat_func(S), test_impl(S))

@skip_sdc_jit('Not impl in old style')
def test_series_at_str(self):
def test_impl(A):
Expand Down