From 1b58b40dce90aecc9620a0dd4854b305f7d234e8 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Tue, 19 Mar 2024 20:29:03 -0300 Subject: [PATCH 1/8] issue #439 --- verde/tests/test_utils.py | 13 +++++++++++++ verde/utils.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/verde/tests/test_utils.py b/verde/tests/test_utils.py index b39cf70ea..4a883de3b 100644 --- a/verde/tests/test_utils.py +++ b/verde/tests/test_utils.py @@ -27,9 +27,22 @@ meshgrid_to_1d, parse_engine, partition_by_sum, + fill_nans ) +def test_fill_nans(): + """ + This function tests the fill_nans function. + """ + + grid = np.array([[1, np.nan, 3], + [4, 5, np.nan], + [np.nan, 7, 8]]) + filled_grid = fill_nans(grid) + assert np.isnan(filled_grid).sum() == 0 + + def test_parse_engine(): "Check that it works for common input" assert parse_engine("numba") == "numba" diff --git a/verde/utils.py b/verde/utils.py index 2761aea04..44c7ef6aa 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -14,6 +14,7 @@ import pandas as pd import xarray as xr from scipy.spatial import cKDTree +from sklearn.impute import KNNImputer try: from pykdtree.kdtree import KDTree as pyKDTree @@ -681,6 +682,36 @@ def kdtree(coordinates, use_pykdtree=True, **kwargs): return tree +def fill_nans(grid, n_neighbors=1): + """ + This methos is responsible for fill the NaN values in the grid using the KNN algorithm. + + Parameters + ---------- + grid : :class:`xarray.Dataset` or :class:`xarray.DataArray` + A 2D grid with one or more data variables. + n_neighbors : int + Number of nearest neighbors to use to fill the NaN values in the grid. + The greater the quantity, the longer the processing time, depending on the size of the matrix + + Returns + ------- + grid : :class:`xarray.Dataset` or :class:`xarray.DataArray` + A 2D grid with the NaN values filled. + """ + + not_nan_values = np.argwhere(~np.isnan(grid)).reshape(-1, 1) + unknown_indices = np.argwhere(np.isnan(grid)) + + knn_imputer = KNNImputer(n_neighbors=n_neighbors) + knn_imputer.fit(not_nan_values) + + predicted_values = knn_imputer.transform(not_nan_values) + for i, idx in enumerate(unknown_indices): + grid[tuple(idx)] = predicted_values[i] + + return grid + def partition_by_sum(array, parts): """ Partition an array into parts of approximately equal sum. From 3cdc02469310ad24077a5055631ca47bc76f8d66 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Sun, 24 Mar 2024 21:27:36 -0300 Subject: [PATCH 2/8] Resolved the requested points in the pull request review. --- verde/tests/test_utils.py | 16 ++++++++++----- verde/utils.py | 42 ++++++++++++++++++++------------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/verde/tests/test_utils.py b/verde/tests/test_utils.py index 4a883de3b..8893dd084 100644 --- a/verde/tests/test_utils.py +++ b/verde/tests/test_utils.py @@ -35,13 +35,19 @@ def test_fill_nans(): """ This function tests the fill_nans function. """ - - grid = np.array([[1, np.nan, 3], - [4, 5, np.nan], - [np.nan, 7, 8]]) + + grid = xr.DataArray([[1, np.nan, 3], + [4, 5, np.nan], + [np.nan, 7, 8]]) + filled_grid = fill_nans(grid) + expected_values = xr.DataArray([[1, 1, 3], + [4, 5, 3], + [4, 7, 8]]) + assert np.isnan(filled_grid).sum() == 0 - + assert np.allclose(filled_grid, expected_values) + def test_parse_engine(): "Check that it works for common input" diff --git a/verde/utils.py b/verde/utils.py index 44c7ef6aa..8d4ac1eb4 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -13,8 +13,9 @@ import numpy as np import pandas as pd import xarray as xr +import verde as vd from scipy.spatial import cKDTree -from sklearn.impute import KNNImputer + try: from pykdtree.kdtree import KDTree as pyKDTree @@ -682,35 +683,36 @@ def kdtree(coordinates, use_pykdtree=True, **kwargs): return tree -def fill_nans(grid, n_neighbors=1): +def fill_nans(grid): """ This methos is responsible for fill the NaN values in the grid using the KNN algorithm. - + Parameters ---------- - grid : :class:`xarray.Dataset` or :class:`xarray.DataArray` + grid : :class:`xarray.DataArray` A 2D grid with one or more data variables. - n_neighbors : int - Number of nearest neighbors to use to fill the NaN values in the grid. - The greater the quantity, the longer the processing time, depending on the size of the matrix - Returns ------- - grid : :class:`xarray.Dataset` or :class:`xarray.DataArray` + grid : :class:`xarray.DataArray` A 2D grid with the NaN values filled. """ - - not_nan_values = np.argwhere(~np.isnan(grid)).reshape(-1, 1) - unknown_indices = np.argwhere(np.isnan(grid)) - - knn_imputer = KNNImputer(n_neighbors=n_neighbors) - knn_imputer.fit(not_nan_values) - - predicted_values = knn_imputer.transform(not_nan_values) + + filled_grid = grid.copy() + + not_nan_values = np.argwhere(~np.isnan(grid.values)) + unknown_indices = np.argwhere(np.isnan(grid.values)) + + knn_imputer = vd.KNeighbors() + easting, northing = not_nan_values[:, 0], not_nan_values[:, 1] + knn_imputer.fit((easting, northing), grid.values[not_nan_values[:, 0], + not_nan_values[:, 1]]) + predicted_values = knn_imputer.predict((easting, northing)) + for i, idx in enumerate(unknown_indices): - grid[tuple(idx)] = predicted_values[i] - - return grid + filled_grid[tuple(idx)] = predicted_values[i] + + return filled_grid + def partition_by_sum(array, parts): """ From dae71a041371bacd54bd648cde51fccc1b505d4f Mon Sep 17 00:00:00 2001 From: Pedro Henrique Silva <82620183+Phssilva@users.noreply.github.com> Date: Thu, 11 Apr 2024 19:57:01 -0300 Subject: [PATCH 3/8] Update verde/tests/test_utils.py Co-authored-by: Leonardo Uieda --- verde/tests/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/verde/tests/test_utils.py b/verde/tests/test_utils.py index 8893dd084..3fb2485d7 100644 --- a/verde/tests/test_utils.py +++ b/verde/tests/test_utils.py @@ -35,7 +35,6 @@ def test_fill_nans(): """ This function tests the fill_nans function. """ - grid = xr.DataArray([[1, np.nan, 3], [4, 5, np.nan], [np.nan, 7, 8]]) From b547d407ee222357c1be3721bfa7b670c1d20c8f Mon Sep 17 00:00:00 2001 From: Pedro Henrique Silva <82620183+Phssilva@users.noreply.github.com> Date: Thu, 11 Apr 2024 19:57:10 -0300 Subject: [PATCH 4/8] Update verde/tests/test_utils.py Co-authored-by: Leonardo Uieda --- verde/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verde/tests/test_utils.py b/verde/tests/test_utils.py index 3fb2485d7..7b891881c 100644 --- a/verde/tests/test_utils.py +++ b/verde/tests/test_utils.py @@ -33,7 +33,7 @@ def test_fill_nans(): """ - This function tests the fill_nans function. + Test filling NaNs on a small sample grid """ grid = xr.DataArray([[1, np.nan, 3], [4, 5, np.nan], From cfd2b319c75a8961bcade3f06be2cc98a202e4e4 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Silva <82620183+Phssilva@users.noreply.github.com> Date: Thu, 11 Apr 2024 19:57:20 -0300 Subject: [PATCH 5/8] Update verde/tests/test_utils.py Co-authored-by: Leonardo Uieda --- verde/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verde/tests/test_utils.py b/verde/tests/test_utils.py index 7b891881c..27b5ed82b 100644 --- a/verde/tests/test_utils.py +++ b/verde/tests/test_utils.py @@ -44,7 +44,7 @@ def test_fill_nans(): [4, 5, 3], [4, 7, 8]]) - assert np.isnan(filled_grid).sum() == 0 + assert np.any(np.isnan(filled_grid)) assert np.allclose(filled_grid, expected_values) From 40e92902e6aa47f722848eb8234ea1c965ab1d41 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Silva <82620183+Phssilva@users.noreply.github.com> Date: Thu, 11 Apr 2024 19:57:30 -0300 Subject: [PATCH 6/8] Update verde/utils.py Co-authored-by: Leonardo Uieda --- verde/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verde/utils.py b/verde/utils.py index 8d4ac1eb4..57764fb17 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -685,7 +685,7 @@ def kdtree(coordinates, use_pykdtree=True, **kwargs): def fill_nans(grid): """ - This methos is responsible for fill the NaN values in the grid using the KNN algorithm. + Fill missing values in a grid by nearest neighbor interpolation Parameters ---------- From 05fe6c74039189993374de0a35e5aba90e6f8101 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Silva <82620183+Phssilva@users.noreply.github.com> Date: Thu, 11 Apr 2024 19:57:51 -0300 Subject: [PATCH 7/8] Update verde/utils.py Co-authored-by: Leonardo Uieda --- verde/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verde/utils.py b/verde/utils.py index 57764fb17..85a87706b 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -702,7 +702,7 @@ def fill_nans(grid): not_nan_values = np.argwhere(~np.isnan(grid.values)) unknown_indices = np.argwhere(np.isnan(grid.values)) - knn_imputer = vd.KNeighbors() + knn = vd.KNeighbors() easting, northing = not_nan_values[:, 0], not_nan_values[:, 1] knn_imputer.fit((easting, northing), grid.values[not_nan_values[:, 0], not_nan_values[:, 1]]) From cb812cc66950ee95f90cf12b16c3b84c85ce4dec Mon Sep 17 00:00:00 2001 From: Pedro Henrique Silva <82620183+Phssilva@users.noreply.github.com> Date: Thu, 11 Apr 2024 19:58:04 -0300 Subject: [PATCH 8/8] Update verde/utils.py Co-authored-by: Leonardo Uieda --- verde/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verde/utils.py b/verde/utils.py index 85a87706b..3930f11b2 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -704,7 +704,7 @@ def fill_nans(grid): knn = vd.KNeighbors() easting, northing = not_nan_values[:, 0], not_nan_values[:, 1] - knn_imputer.fit((easting, northing), grid.values[not_nan_values[:, 0], + knn.fit((easting, northing), grid.values[not_nan_values[:, 0], not_nan_values[:, 1]]) predicted_values = knn_imputer.predict((easting, northing))