diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ddd2def..9693786 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ v1.2.1.5 - Changed docstrings style to numpydoc. - Added default class_ref DataFrame as a built-in object. User now has the option to use this new default or continue to load a .csv file. +- Added voxels.py module to create voxels from point clouds. +- Added voxelization step in automated_separation.large_tree_1 to improve performance in path_detection. v1.2.1.4 diff --git a/tlseparation/classification/path_detection.py b/tlseparation/classification/path_detection.py index 46a7738..3369d8f 100644 --- a/tlseparation/classification/path_detection.py +++ b/tlseparation/classification/path_detection.py @@ -25,17 +25,15 @@ __email__ = "matheus.boni.vicari@gmail.com" __status__ = "Development" -import numpy as np + import datetime -import pandas as pd -import struct +import numpy as np from sklearn.neighbors import NearestNeighbors from ..utility.shortpath import (array_to_graph, extract_path_info) - -def detect_main_pathways(point_cloud, k_retrace, knn, nbrs_threshold, voxel=.1, verbose=False): - +def detect_main_pathways(point_cloud, k_retrace, knn, nbrs_threshold, + verbose=False): """ Detects the main pathways of an unordered 3D point cloud. Set as true @@ -82,38 +80,19 @@ def detect_main_pathways(point_cloud, k_retrace, knn, nbrs_threshold, voxel=.1, assert point_cloud.shape[1] == 3, "point_cloud must be a 3D point cloud.\ Make sure it has the shape n_points x 3 (x, y, z)." - # voxelise the data - point_cloud_v = pd.DataFrame(point_cloud, columns=['x', 'y', 'z']) - point_cloud_v.loc[:, 'xx'] = (point_cloud_v.x // voxel) * voxel - point_cloud_v.loc[:, 'yy'] = (point_cloud_v.y // voxel) * voxel - point_cloud_v.loc[:, 'zz'] = (point_cloud_v.z // voxel) * voxel - - point_cloud_v.loc[:, 'xxb'] = point_cloud_v.xx.apply(lambda x: struct.pack('f', x )) - point_cloud_v.loc[:, 'yyb'] = point_cloud_v.yy.apply(lambda x: struct.pack('f', x )) - point_cloud_v.loc[:, 'zzb'] = point_cloud_v.zz.apply(lambda x: struct.pack('f', x )) - point_cloud_v.loc[:, 'I'] = point_cloud_v.xxb + point_cloud_v.yyb + point_cloud_v.zzb - - point_cloud_w = point_cloud_v.groupby(['xx', 'yy', 'zz']).size().reset_index() - point_cloud_w.loc[:, 'xxb'] = point_cloud_w.xx.apply(lambda x: struct.pack('f', x )) - point_cloud_w.loc[:, 'yyb'] = point_cloud_w.yy.apply(lambda x: struct.pack('f', x )) - point_cloud_w.loc[:, 'zzb'] = point_cloud_w.zz.apply(lambda x: struct.pack('f', x )) - point_cloud_w.loc[:, 'I'] = point_cloud_w.xxb + point_cloud_w.yyb + point_cloud_w.zzb - # Getting root index (base_id) from point cloud. - base_id = point_cloud_w.zz.idxmin() + base_id = np.argmin(point_cloud[:, 2]) # Generating graph from point cloud and extracting shortest path # information. - if verbose: print(str(datetime.datetime.now()) + ' | >>> generating graph from \ point cloud and extracting shortest path information') - G = array_to_graph(point_cloud_w[['xx', 'yy', 'zz']], base_id, 3, 100, 0.05, 0.02) - + G = array_to_graph(point_cloud, base_id, 3, knn, nbrs_threshold, 0.02) nodes_ids, D, path_list = extract_path_info(G, base_id, return_path=True) # Obtaining nodes coordinates from shortest path information. - nodes = point_cloud_w.loc[nodes_ids] + nodes = point_cloud[nodes_ids] # Converting list of shortest path distances to array. D = np.asarray(D) @@ -132,8 +111,8 @@ def detect_main_pathways(point_cloud, k_retrace, knn, nbrs_threshold, voxel=.1, # Generating array of all indices from 'arr' and all indices to process # 'idx'. - idx_base = np.arange(point_cloud_w.shape[0], dtype=int) - idx = np.arange(point_cloud_w.shape[0], dtype=int) + idx_base = np.arange(point_cloud.shape[0], dtype=int) + idx = np.arange(point_cloud.shape[0], dtype=int) # Initializing NearestNeighbors search and searching for all 'knn' # neighboring points arround each point in 'arr'. @@ -142,8 +121,8 @@ def detect_main_pathways(point_cloud, k_retrace, knn, nbrs_threshold, voxel=.1, NearestNeighbors search and searching for all knn neighboring points \ arround each point in arr') nbrs = NearestNeighbors(n_neighbors=knn, metric='euclidean', - leaf_size=15, n_jobs=-1).fit(point_cloud_w[['xx', 'yy', 'zz']]) - distances, indices = nbrs.kneighbors(point_cloud_w[['xx', 'yy', 'zz']]) + leaf_size=15, n_jobs=-1).fit(point_cloud) + distances, indices = nbrs.kneighbors(point_cloud) indices = indices.astype(int) # Initializing variables for current ids being processed (current_idx) @@ -272,14 +251,14 @@ def detect_main_pathways(point_cloud, k_retrace, knn, nbrs_threshold, voxel=.1, processed_idx = np.unique(processed_idx).astype(int) # Generating list of remaining proints to process. - path_mask = np.zeros(point_cloud_w.shape[0], dtype=bool) + idx = idx_base[np.in1d(idx_base, processed_idx, invert=True)] + + # Generating final path mask and setting processed indices as True. + path_mask = np.zeros(point_cloud.shape[0], dtype=bool) path_mask[processed_idx] = True - - # identifying points in stem voxels and attributing True - path_mask_all = np.zeros(point_cloud_v.shape[0], dtype=bool) - path_mask_all[point_cloud_v[point_cloud_v.I.isin(point_cloud_w.loc[path_mask].I)].index] = True - return path_mask_all + return path_mask + def get_base(point_cloud, base_height): @@ -302,4 +281,4 @@ def get_base(point_cloud, base_height): """ - return point_cloud[:, 2] <= base_height + return point_cloud[:, 2] <= base_height \ No newline at end of file diff --git a/tlseparation/future_code/path_detection_voxel.py b/tlseparation/future_code/path_detection_voxel.py new file mode 100644 index 0000000..cc70bdc --- /dev/null +++ b/tlseparation/future_code/path_detection_voxel.py @@ -0,0 +1,308 @@ +# Copyright (c) 2017, Matheus Boni Vicari, TLSeparation Project +# All rights reserved. +# +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +__author__ = "Matheus Boni Vicari" +__copyright__ = "Copyright 2017, TLSeparation Project" +__credits__ = ["Matheus Boni Vicari", "Phil Wilkes"] +__license__ = "GPL3" +__version__ = "1.2.1.5" +__maintainer__ = "Matheus Boni Vicari" +__email__ = "matheus.boni.vicari@gmail.com" +__status__ = "Development" + +import numpy as np +import datetime +import pandas as pd +import struct +from sklearn.neighbors import NearestNeighbors +from ..utility.shortpath import (array_to_graph, extract_path_info) + + +def detect_main_pathways(point_cloud, k_retrace, knn, nbrs_threshold, voxel=.1, + verbose=False): + + """ + Detects the main pathways of an unordered 3D point cloud. Set as true + all points detected as part of all detected pathways that down to the + base of the graph. + + Parameters + ---------- + point_cloud : array + Three-dimensional point cloud of a single tree to perform the + wood-leaf separation. This should be a n-dimensional array (m x n) + containing a set of coordinates (n) over a set of points (m). + k_retrace : int + Number of steps in the graph to retrace back to graph's base. Every + node in graph will be moved k_retrace steps from the extremities + towards to base. + knn : int + Number of neighbors to fill gaps in detected paths. The larger the + better. A large knn will increase memory usage. Recommended value + between 50 and 150. + nbrs_threshold : float + Maximum distance to valid neighboring points used to fill gaps in + detected paths. + verbose: bool + Option to set verbose on/off. + + Returns + ------- + path_mask : array + Boolean mask where 'True' represents points detected as part of the + main pathways and 'False' represents points not part of the pathways. + + Raises + ------ + AssertionError: + point_cloud has the wrong shape or number of dimensions. + + """ + + # Making sure input point cloud has the right shape and number of + # dimensions. + assert point_cloud.ndim == 2, "point_cloud must be an array with 2\ + dimensions, n_points x 3 (x, y, z)." + assert point_cloud.shape[1] == 3, "point_cloud must be a 3D point cloud.\ + Make sure it has the shape n_points x 3 (x, y, z)." + + # Voxelise the data. + pcloud_v = pd.DataFrame(point_cloud, columns=['x', 'y', 'z']) + pcloud_v.loc[:, 'xx'] = (pcloud_v.x // voxel) * voxel + pcloud_v.loc[:, 'yy'] = (pcloud_v.y // voxel) * voxel + pcloud_v.loc[:, 'zz'] = (pcloud_v.z // voxel) * voxel + + pcloud_v.loc[:, 'xxb'] = pcloud_v.xx.apply(lambda x: struct.pack('f', x)) + pcloud_v.loc[:, 'yyb'] = pcloud_v.yy.apply(lambda x: struct.pack('f', x)) + pcloud_v.loc[:, 'zzb'] = pcloud_v.zz.apply(lambda x: struct.pack('f', x)) + pcloud_v.loc[:, 'I'] = pcloud_v.xxb + pcloud_v.yyb + pcloud_v.zzb + + pcloud_w = pcloud_v.groupby(['xx', 'yy', 'zz']).size().reset_index() + pcloud_w.loc[:, 'xxb'] = pcloud_w.xx.apply(lambda x: struct.pack('f', x)) + pcloud_w.loc[:, 'yyb'] = pcloud_w.yy.apply(lambda x: struct.pack('f', x)) + pcloud_w.loc[:, 'zzb'] = pcloud_w.zz.apply(lambda x: struct.pack('f', x)) + pcloud_w.loc[:, 'I'] = pcloud_w.xxb + pcloud_w.yyb + pcloud_w.zzb + + # Getting root index (base_id) from point cloud. + base_id = pcloud_w.zz.idxmin() + + # Generating graph from point cloud and extracting shortest path + # information. + + if verbose: + print(str(datetime.datetime.now()) + ' | >>> generating graph from \ +point cloud and extracting shortest path information') + G = array_to_graph(pcloud_w[['xx', 'yy', 'zz']], base_id, 3, 100, 0.05, + 0.02) + + nodes_ids, D, path_list = extract_path_info(G, base_id, + return_path=True) + # Obtaining nodes coordinates from shortest path information. + nodes = pcloud_w.loc[nodes_ids] + # Converting list of shortest path distances to array. + D = np.asarray(D) + + # Retracing path for nodes in G. This step aims to detect only major + # pathways in G. For a tree, these paths are expected to represent + # branches and trunk. + new_id = np.zeros(nodes.shape[0], dtype='int') + for key, values in path_list.iteritems(): + if len(values) >= k_retrace: + new_id[key] = values[len(values) - k_retrace] + else: + new_id[key] = values[0] + + # Getting unique indices after retracing path_list. + ids = np.unique(new_id) + + # Generating array of all indices from 'arr' and all indices to process + # 'idx'. + idx_base = np.arange(pcloud_w.shape[0], dtype=int) + idx = np.arange(pcloud_w.shape[0], dtype=int) + + # Initializing NearestNeighbors search and searching for all 'knn' + # neighboring points arround each point in 'arr'. + if verbose: + print(str(datetime.datetime.now()) + ' | >>> initializing \ +NearestNeighbors search and searching for all knn neighboring points \ +arround each point in arr') + nbrs = NearestNeighbors(n_neighbors=knn, metric='euclidean', + leaf_size=15, n_jobs=-1).fit(pcloud_w[['xx', 'yy', + 'zz']]) + distances, indices = nbrs.kneighbors(pcloud_w[['xx', 'yy', 'zz']]) + indices = indices.astype(int) + + # Initializing variables for current ids being processed (current_idx) + # and all ids already processed (processed_idx). + current_idx = ids + processed_idx = ids + + # Looping while there are still indices in current_idx to process. + if verbose: + print(str(datetime.datetime.now()) + ' | >>> looping while there \ +are still indices in current_idx to process') + while len(current_idx) > 0: + + # Selecting NearestNeighbors indices and distances for current + # indices being processed. + nn = indices[current_idx] + dd = distances[current_idx] + + # Masking out indices already contained in processed_idx. + mask1 = np.in1d(nn, processed_idx, invert=True).reshape(nn.shape) + # Masking neighboring points that are withing threshold distance. + mask2 = dd < nbrs_threshold + # mask1 AND mask2. This will mask only indices that are part of + # the graph and within threshold distance. + mask = np.logical_and(mask1, mask2) + + # Initializing temporary list of nearest neighbors. This list + # is latter used to accumulate points that will be added to + # processed points list. + nntemp = [] + + # Looping over current indices's set of nn points and selecting + # knn points that hasn't been added/processed yet (mask1). + for i, (n, d) in enumerate(zip(nn, dd)): + nn_idx = n[mask[i]][1:] + + # Checking if current neighbor has an accumulated distance + # shorter than central node (n[0]) minus some distance based + # on nbrs_threshold. This penalisation aims to restrict potential + # neighbors to those more likely to be along an actual path. This + # would remove points placed along the sides of a path. + for ni in nn_idx: + if D[ni] <= D[n[0]] - (nbrs_threshold / 3): + nntemp.append(ni) + + # Obtaining an unique array of points currently being processed. + current_idx = np.unique(nntemp) + # Updating array of processed indices with indices processed within + # current iteration (current_idx). + processed_idx = np.append(processed_idx, current_idx) + processed_idx = np.unique(processed_idx).astype(int) + + # Generating list of remaining proints to process. + idx = idx_base[np.in1d(idx_base, processed_idx, invert=True)] + + # Just in case of not having detected all points in the desired paths, run + # another last iteration. + + # Getting NearestNeighbors indices and distance for all indices + # that remain to be processed. + idx2 = indices[idx] + dist2 = distances[idx] + + # Masking indices in idx2 that have already been processed. The + # idea is to connect remaining points to existing graph nodes. + mask1 = np.in1d(idx2, processed_idx).reshape(idx2.shape) + # Masking neighboring points that are withing threshold distance. + mask2 = dist2 < nbrs_threshold + # mask1 AND mask2. This will mask only indices that are part of + # the graph and within threshold distance. + mask = np.logical_and(mask1, mask2) + + # Getting unique array of indices that match the criteria from + # mask1 and mask2. + temp_idx = np.unique(np.where(mask)[0]) + # Assigns remaining indices (idx) matched in temp_idx to + # current_idx. + n_idx = idx[temp_idx] + + # Selecting NearestNeighbors indices and distances for current + # indices being processed. + nn = indices[n_idx] + dd = distances[n_idx] + + # Masking points in nn that have already been processed. + # This is the oposite approach as above, where points that are + # still not in the graph are desired. Now, to make sure the + # continuity of the graph is kept, join current remaining indices + # to indices already in G. + mask = np.in1d(nn, processed_idx, invert=True).reshape(nn.shape) + + # Initializing temporary list of nearest neighbors. This list + # is latter used to accumulate points that will be added to + # processed points list. + nntemp = [] + + # Looping over current indices's set of nn points and selecting + # knn points that have alreay been added/processed (mask). + # Also, to ensure continuity over next iteration, select another + # kpairs points from indices that haven't been processed (~mask). + if verbose: + print(str(datetime.datetime.now()) + ' | >>> looping over current \ +indicess set of nn points and selecting knn points that have alreay been \ +added/processed (mask)') + for i, n in enumerate(nn): + nn_idx = n[mask[i]][1:] + + # Checking if current neighbor has an accumulated distance + # shorter than central node (n[0]). + for ni in nn_idx: + if D[ni] <= D[n[0]] - (nbrs_threshold / 3): + nntemp.append(ni) + + nn_idx = n[~mask[i]][1:] + + # Checking if current neighbor has an accumulated distance + # shorter than central node (n[0]). + for ni in nn_idx: + if D[ni] <= D[n[0]] - (nbrs_threshold / 3): + nntemp.append(ni) + + current_idx = np.unique(nntemp) + + # Appending current_idx to processed_idx. + processed_idx = np.append(processed_idx, current_idx) + processed_idx = np.unique(processed_idx).astype(int) + + # Generating list of remaining proints to process. + path_mask = np.zeros(pcloud_w.shape[0], dtype=bool) + path_mask[processed_idx] = True + + # identifying points in stem voxels and attributing True + path_mask_all = np.zeros(pcloud_v.shape[0], dtype=bool) + path_mask_all[pcloud_v[ + pcloud_v.I.isin(pcloud_w.loc[path_mask].I)].index] = True + + return path_mask_all + + +def get_base(pcloud, base_height): + + """ + Get the base of a point cloud based on a certain height from the bottom. + + Parameters + ---------- + pcloud : array + Three-dimensional point cloud of a single tree to perform the + wood-leaf separation. This should be a n-dimensional array (m x n) + containing a set of coordinates (n) over a set of points (m). + base_height : float + Height of the base slice to mask. + + Returns + ------- + mask : array + Base slice masked as True. + + """ + + return pcloud[:, 2] <= base_height diff --git a/tlseparation/scripts/automated_separation.py b/tlseparation/scripts/automated_separation.py index 095c67c..5b1982b 100644 --- a/tlseparation/scripts/automated_separation.py +++ b/tlseparation/scripts/automated_separation.py @@ -31,7 +31,7 @@ detect_main_pathways, get_base, DefaultClass) from ..utility import (get_diff, remove_duplicates, radius_filter, class_filter, cluster_filter, continuity_filter, - detect_nn_dist) + detect_nn_dist, voxelize_cloud) def large_tree_1(arr, class_file=[], cont_filt=True, @@ -101,15 +101,27 @@ def large_tree_1(arr, class_file=[], cont_filt=True, # Masking points most likely to be part of the trunk and larger branches. if verbose: print(str(datetime.datetime.now()) + ' | masking points most likely \ -to be part of the trunk and larger branches') +to be part of the trunk and larger branches') try: - trunk_mask = detect_main_pathways(arr, 80, 20, .15, voxel=.1, verbose=verbose) - - try: - trunk_mask = detect_main_pathways(arr, 80, 100, nndist, - verbose=verbose) - - trunk_ids = np.where(trunk_mask)[0] + # Voxelizing cloud: + vox = voxelize_cloud(arr, voxel_size=.1) + vox_coords = np.asarray(vox.keys()) + + # Using detect_main_pathways on voxels coordinates. + trunk_mask_voxel = detect_main_pathways(vox_coords, 80, 100, .15, + verbose=verbose) + # Obtaining indices of point in arr that are inside voxels masked as + # trunk (True) in trunk_mask_voxel. + trunk_ids = np.unique([j for i in vox_coords[trunk_mask_voxel] for + j in vox[tuple(i)]]) + + # Setting up trunk mask, as its opposite represents points not + # detected as trunk. + trunk_mask = np.zeros(arr.shape[0], dtype=bool) + trunk_mask[trunk_ids] = True + + # Obtaining indices of points that were not detected as part of the + # trunk. not_trunk_ids = np.where(~trunk_mask)[0].astype(int) except: trunk_ids = [] @@ -217,7 +229,7 @@ def large_tree_1(arr, class_file=[], cont_filt=True, if verbose: print(str(datetime.datetime.now()) + ' | applying \ radius_filter on filtered twig point cloud') - twig_2_1_mask = radius_filter(arr[twig_2], 0.05, 10) + twig_2_1_mask = radius_filter(arr[twig_2], 0.2, 5) twig_2_1 = twig_2[twig_2_1_mask] except: twig_2_1 = twig_2 @@ -228,7 +240,7 @@ def large_tree_1(arr, class_file=[], cont_filt=True, if verbose: print(str(datetime.datetime.now()) + ' | applying \ cluster_filter to remove isolated clusters of points in twig_2_mask') - twig_2_2_mask = cluster_filter(arr[twig_2_1], 0.1, 20, 3) + twig_2_2_mask = cluster_filter(arr[twig_2_1], 0.1, 10, 3) twig_2_2 = twig_2_1[twig_2_2_mask] except: twig_2_2 = twig_2_1 @@ -238,7 +250,7 @@ def large_tree_1(arr, class_file=[], cont_filt=True, if verbose: print(str(datetime.datetime.now()) + ' | applying \ radius_filter to trunk_2 point cloud') - trunk_2_1_mask = radius_filter(arr[trunk_2], 0.05, 10) + trunk_2_1_mask = radius_filter(arr[trunk_2], 0.2, 5) trunk_2_1 = trunk_2[trunk_2_1_mask] except: trunk_2_1 = trunk_2 @@ -249,7 +261,7 @@ def large_tree_1(arr, class_file=[], cont_filt=True, if verbose: print(str(datetime.datetime.now()) + ' | applying \ cluster_filter to remove isolated clusters of points in twig_2_mask') - trunk_2_2_mask = cluster_filter(arr[trunk_2_1], 0.1, 20, 3) + trunk_2_2_mask = cluster_filter(arr[trunk_2_1], 0.05, 10, 3) trunk_2_2 = trunk_2_1[trunk_2_2_mask] except: trunk_2_2 = trunk_2_1 @@ -289,7 +301,8 @@ def large_tree_1(arr, class_file=[], cont_filt=True, print(str(datetime.datetime.now()) + ' | applying continuity \ filter in an attempt to close gaps in the wood point cloud') try: - wood_final, leaf_final = continuity_filter(wood, leaf, rad=nndist) + wood_final, leaf_final = continuity_filter(wood, leaf, + rad=nndist*1.2) except: wood_final = wood leaf_final = leaf diff --git a/tlseparation/utility/__init__.py b/tlseparation/utility/__init__.py index dd1433b..5815bae 100644 --- a/tlseparation/utility/__init__.py +++ b/tlseparation/utility/__init__.py @@ -31,3 +31,4 @@ from .data_utils import * from .filtering import * from .cloud_analysis import * +from .voxels import * diff --git a/tlseparation/utility/voxels.py b/tlseparation/utility/voxels.py new file mode 100644 index 0000000..99443b8 --- /dev/null +++ b/tlseparation/utility/voxels.py @@ -0,0 +1,59 @@ +# Copyright (c) 2017, Matheus Boni Vicari, TLSeparation Project +# All rights reserved. +# +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +__author__ = "Matheus Boni Vicari" +__copyright__ = "Copyright 2017, TLSeparation Project" +__credits__ = ["Matheus Boni Vicari", "Phil Wilkes"] +__license__ = "GPL3" +__version__ = "1.2.1.5" +__maintainer__ = "Matheus Boni Vicari" +__email__ = "matheus.boni.vicari@gmail.com" +__status__ = "Development" + + +from collections import defaultdict + + +def voxelize_cloud(arr, voxel_size): + + """ + Generates a dictionary of voxels containing their central coordinates + and indices of points belonging to each voxel. + + Parameters + ---------- + arr: array + Array of points/entries to voxelize. + voxel_size: float + Length of all voxels sides/edges. + + Returns + ------- + vox: defaultdict + Dictionary containing voxels. Keys are voxels' central coordinates and + values are indices of points in arr inside each voxel. + + """ + + voxels_ids = (arr / voxel_size).astype(int) * voxel_size + vox = defaultdict(list) + + for i, v in enumerate(voxels_ids): + vox[tuple(v)].append(i) + + return vox