diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f821d4b..b67bd20 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ ci: repos: - repo: https://github.com/compilerla/conventional-pre-commit - rev: v3.1.0 + rev: v3.2.0 hooks: - id: conventional-pre-commit stages: [commit-msg] @@ -36,7 +36,7 @@ repos: ] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -70,7 +70,7 @@ repos: - id: text-unicode-replacement-char - repo: https://github.com/Lucas-C/pre-commit-hooks-safety - rev: v1.3.2 + rev: v1.3.3 hooks: - id: python-safety-dependencies-check files: pyproject.toml @@ -81,7 +81,7 @@ repos: - id: docformatter - repo: https://github.com/nbQA-dev/nbQA - rev: 1.7.1 + rev: 1.8.5 hooks: - id: nbqa-check-ast - id: nbqa-mypy @@ -89,17 +89,17 @@ repos: - id: nbqa-pyupgrade - repo: https://github.com/bwhmather/ssort - rev: 0.12.2 + rev: 0.12.4 hooks: - id: ssort - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 + rev: v3.15.2 hooks: - id: pyupgrade - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.4.0 hooks: - id: black - id: black-jupyter @@ -112,13 +112,13 @@ repos: args: ["--profile=black"] - repo: https://github.com/PyCQA/pylint - rev: v3.0.3 + rev: v3.1.0 hooks: - id: pylint args: ["--rcfile=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.8.0 + rev: v1.9.0 hooks: - id: mypy args: @@ -126,7 +126,7 @@ repos: exclude: "tests" - repo: https://github.com/PyCQA/bandit - rev: 1.7.6 + rev: 1.7.8 hooks: - id: bandit exclude: "^tests/" @@ -134,12 +134,12 @@ repos: - "-r" - repo: https://github.com/kynan/nbstripout - rev: 0.6.1 + rev: 0.7.1 hooks: - id: nbstripout - repo: https://github.com/python-poetry/poetry - rev: 1.7.0 + rev: 1.8.0 hooks: - id: poetry-check - id: poetry-lock diff --git a/src/sk_transformers/generic_transformer.py b/src/sk_transformers/generic_transformer.py index c145d45..e882950 100644 --- a/src/sk_transformers/generic_transformer.py +++ b/src/sk_transformers/generic_transformer.py @@ -291,9 +291,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: agg_df = X.groupby(groupby_columns).agg( [ - getattr(pl, agg_func)(agg_column).alias(agg_new_column) - if isinstance(agg_func, str) - else pl.col(agg_column).apply(agg_func).alias(agg_new_column) + ( + getattr(pl, agg_func)(agg_column).alias(agg_new_column) + if isinstance(agg_func, str) + else pl.col(agg_column).apply(agg_func).alias(agg_new_column) + ) for (agg_column, agg_func, agg_new_column) in agg_features ] ) diff --git a/src/sk_transformers/string_transformer.py b/src/sk_transformers/string_transformer.py index d7bb218..22e064e 100644 --- a/src/sk_transformers/string_transformer.py +++ b/src/sk_transformers/string_transformer.py @@ -489,9 +489,11 @@ def __init__( ) -> None: super().__init__() self.features = [ - (split_tuple[0], split_tuple[1], split_tuple[2]) - if len(split_tuple) == 3 - else (split_tuple[0], split_tuple[1], -1) + ( + (split_tuple[0], split_tuple[1], split_tuple[2]) + if len(split_tuple) == 3 + else (split_tuple[0], split_tuple[1], -1) + ) for split_tuple in features ] @@ -515,20 +517,22 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: ] select_with_expr = [ - pl.col(column) - .str.splitn(by=separator, n=max_possible_split + 1) - .struct.rename_fields( - [column + f"_part_{i}" for i in range(1, max_possible_split + 2)] - ) - .alias(column + "_alias") - if maxsplit in [0, -1] or maxsplit > max_possible_split - else ( + ( pl.col(column) - .str.splitn(by=separator, n=maxsplit + 1) + .str.splitn(by=separator, n=max_possible_split + 1) .struct.rename_fields( - [column + f"_part_{i}" for i in range(1, maxsplit + 2)] + [column + f"_part_{i}" for i in range(1, max_possible_split + 2)] ) .alias(column + "_alias") + if maxsplit in [0, -1] or maxsplit > max_possible_split + else ( + pl.col(column) + .str.splitn(by=separator, n=maxsplit + 1) + .struct.rename_fields( + [column + f"_part_{i}" for i in range(1, maxsplit + 2)] + ) + .alias(column + "_alias") + ) ) for (column, separator, maxsplit), max_possible_split in zip( self.features, max_possible_splits_list