Skip to content

Commit

Permalink
ci: auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Mar 12, 2024
1 parent b81daf3 commit 18d3376
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 16 deletions.
8 changes: 5 additions & 3 deletions src/sk_transformers/generic_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:

agg_df = X.groupby(groupby_columns).agg(
[
getattr(pl, agg_func)(agg_column).alias(agg_new_column)
if isinstance(agg_func, str)
else pl.col(agg_column).apply(agg_func).alias(agg_new_column)
(
getattr(pl, agg_func)(agg_column).alias(agg_new_column)
if isinstance(agg_func, str)
else pl.col(agg_column).apply(agg_func).alias(agg_new_column)
)
for (agg_column, agg_func, agg_new_column) in agg_features
]
)
Expand Down
30 changes: 17 additions & 13 deletions src/sk_transformers/string_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,9 +489,11 @@ def __init__(
) -> None:
super().__init__()
self.features = [
(split_tuple[0], split_tuple[1], split_tuple[2])
if len(split_tuple) == 3
else (split_tuple[0], split_tuple[1], -1)
(
(split_tuple[0], split_tuple[1], split_tuple[2])
if len(split_tuple) == 3
else (split_tuple[0], split_tuple[1], -1)
)
for split_tuple in features
]

Expand All @@ -515,20 +517,22 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
]

select_with_expr = [
pl.col(column)
.str.splitn(by=separator, n=max_possible_split + 1)
.struct.rename_fields(
[column + f"_part_{i}" for i in range(1, max_possible_split + 2)]
)
.alias(column + "_alias")
if maxsplit in [0, -1] or maxsplit > max_possible_split
else (
(
pl.col(column)
.str.splitn(by=separator, n=maxsplit + 1)
.str.splitn(by=separator, n=max_possible_split + 1)
.struct.rename_fields(
[column + f"_part_{i}" for i in range(1, maxsplit + 2)]
[column + f"_part_{i}" for i in range(1, max_possible_split + 2)]
)
.alias(column + "_alias")
if maxsplit in [0, -1] or maxsplit > max_possible_split
else (
pl.col(column)
.str.splitn(by=separator, n=maxsplit + 1)
.struct.rename_fields(
[column + f"_part_{i}" for i in range(1, maxsplit + 2)]
)
.alias(column + "_alias")
)
)
for (column, separator, maxsplit), max_possible_split in zip(
self.features, max_possible_splits_list
Expand Down

0 comments on commit 18d3376

Please sign in to comment.