From a7023349c1a94137b64093a51da22989a33d1eed Mon Sep 17 00:00:00 2001 From: Matheus Boni Vicari Date: Sat, 3 Jun 2017 23:03:23 +0100 Subject: [PATCH] First commit of an initial working version. --- .gitignore | 3 + CHANGES.txt | 1 + LICENSE => LICENSE.txt | 0 MANIFEST.in | 1 + README.rst | 28 ++++ contributors.txt | 2 + lidartf/runframework.py | 123 +++++++++++++++ lidartf/testres.py | 273 ++++++++++++++++++++++++++++++++++ lidartf/tests/__init__.py | 7 + lidartf/tests/wlseparation.py | 105 +++++++++++++ lidartf/utils/__init__.py | 7 + lidartf/utils/intersection.py | 114 ++++++++++++++ requirements.txt | 4 + setup.py | 20 +++ 14 files changed, 688 insertions(+) create mode 100644 CHANGES.txt rename LICENSE => LICENSE.txt (100%) create mode 100644 MANIFEST.in create mode 100644 README.rst create mode 100644 contributors.txt create mode 100644 lidartf/runframework.py create mode 100644 lidartf/testres.py create mode 100644 lidartf/tests/__init__.py create mode 100644 lidartf/tests/wlseparation.py create mode 100644 lidartf/utils/__init__.py create mode 100644 lidartf/utils/intersection.py create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index 7bbc71c..30563bf 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,6 @@ ENV/ # mypy .mypy_cache/ + +# Compressed files +.rar diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 0000000..786ea42 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1 @@ +First commit to new repository. \ No newline at end of file diff --git a/LICENSE b/LICENSE.txt similarity index 100% rename from LICENSE rename to LICENSE.txt diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..649979b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include LICENSE.txt README.rst CHANGES.txt contributors.txt diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..65181e0 --- /dev/null +++ b/README.rst @@ -0,0 +1,28 @@ +======= +lidartf +======= + +The LiDAR Testing Framework (lidartf) aims to provide the tools necessary to assess of methodologies/tools developed to process LiDAR data. + +This is still a work in progress, requiring some polishing to improve user-friendliness and additional testing data. Also, new tests, for different types of processing, +will be added over time. + +The LiDAR Testing Framework is being developed as part of my PhD research, supervised by Dr. Mat Disney, in the Department of Geography at University College London (UCL). My research +is funded through Science Without Borders from the National Council of Technological and Scientific Development (10.13039/501100003593) – Brazil (Process number 233849/2014-9). + +Any questions or suggestions, feel free to contact me using one of the following e-mails: matheus.boni.vicari@gmail.com or matheus.vicari.15@ucl.ac.uk + +------------ +Requirements +------------ +mayavi>=4.5.0 +numpy>=1.11.3 +pandas>=0.19.2 +setuptools>=36.0.1 + + +----------------- +Who do I talk to? +----------------- + +* Matheus Boni Vicari (matheus.boni.vicari@gmail.com or matheus.vicari.15@ucl.ac.uk) diff --git a/contributors.txt b/contributors.txt new file mode 100644 index 0000000..069aaa6 --- /dev/null +++ b/contributors.txt @@ -0,0 +1,2 @@ +Matheus Boni Vicari + (matheus.boni.vicari@gmail.com or matheus.vicari.15@ucl.ac.uk) \ No newline at end of file diff --git a/lidartf/runframework.py b/lidartf/runframework.py new file mode 100644 index 0000000..ad6e9dd --- /dev/null +++ b/lidartf/runframework.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +""" +Module to perform the framework testing. + +@author: Matheus Boni Vicari (2017). +""" + +import numpy as np +import imp +import os +import pandas as pd +import sys +from tests.wlseparation import test_separation + + +def run(fun, results_folder, plot_cloud, dataset, *args): + + """ + Function to run the testing framework. This is the main function in the + package and manages input, testing and output. + + Parameters + ---------- + fun: list of str + List containing module path and function name (in this order) to import + and test. + results_folder: str + Path of the directory to save the testing results. Path must finish + with a separator (e.g. / or \). + plot_cloud: bool + Option to plot or not the separated point clouds. + dataset: list + List of dataset paths to use as data for the testing. + args: list + List of arguments necessary to run the function to be tested. + These arguments should be inserted in the same order as required by + function to be tested. Even if using only single values for each + argument, it should be inserted inside a list. + + Returns + ------- + res: pandas.DataFrame + Set of results for the testing of the function. + params: list + List of parameters used to run the tested function. + + Usage + ----- + >>> dataset = ['path/to/dataset.txt'] + >>> fun = ['path/to/module_file', 'function_name'] + >>> res, t = run(fun, 'results/', True, dataset, [10, 20, 40, 100],\ + [100, 200, 300]) + """ + + # Importing module 'm' and function 'f' to test. + m = import_(fun[0]) + f = getattr(m, fun[1]) + + # Initializing empty lists to store parameters and results of the test. + results = [] + params = [] + + # Looping over every data in dataset. + for i in dataset: + + # Importing current dataset. + data = np.loadtxt(i, delimiter=' ') + + # Extracting the dataset filename. + filename = os.path.basename(i).split('.')[0] + + # Running the separation test. + results, params = test_separation(data, f, filename, results_folder, + plot_cloud, *args) + + # Creating a pandas.DataFrame from the test results. + res = pd.DataFrame(results, columns=['accuracy', 'tp', 'fp', 'tn', 'fn', + 'F_wood', 'F_leaf', 'k', 'time']) + + # Saving the testing parameters to a text file. + np.savetxt(results_folder + 'params_' + filename + '.txt', params, + fmt='%1.2f') + + # Saving the testing results to a text file. + res.to_csv(results_folder + 'results_' + filename + '.txt', + float_format='%1.2f') + + return res, params + + +def import_(filename): + + """ + Function to import a Python module from a filename. + + Parameters + ---------- + filename: str + Path of the module to import without extension. + + """ + + # Splitting filename to extract path and name of the module. + path, name = os.path.split(filename) + # Extracting name and extension. + name, ext = os.path.splitext(name) + + # Appending modulo path to system path. + sys.path.append(path) + + # Finding module and importing it. + file_, filename, data = imp.find_module(name, [path]) + mod = imp.load_module(name, file_, filename, data) + + return mod + + +if __name__ == "__main__": + + fun = [r'path_to_module', 'name_of_function_to_test'] + dataset = [r'data_to_use_in_test_1', + r'data_to_use_in_test_1'] + res, t = run(fun, 'path_to_results', True, dataset, [list_arguments_1], [list_of_arguments_2]) diff --git a/lidartf/testres.py b/lidartf/testres.py new file mode 100644 index 0000000..8d63e85 --- /dev/null +++ b/lidartf/testres.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +""" +Module to perform the results assessment of the framework. + +@author: Matheus Boni Vicari (2017). +""" + +from __future__ import division +from utils.intersection import count_intersection + + +# Defining the summary function. +def summary(Cw, Cl, W, L): + + """ + Function to generate a summary of all result parameters calculated within + this module. + + Parameters + ---------- + R: tuple + Sets of 3D point coordinates from the points cloud output by the + tested function. + T: tuple + Sets of 3D point coordinates, referenced as truth points, from the + input test data. + F: tuple + Sets of 3D point coordinates, referenced as false points, from the + input test data. + + Returns + ------- + p_removed: int + Total amount of points not present in the result set of points. + This parameter might be helpful when assessing a classification or + filtering processing. + accuracy: float + Accuracy of the processing when observing the tests of true/false + positives and negatives. + tp: numpy.float64 + Number of true positives. + fp: numpy.float64 + Number of false positives. + tn: numpy.float64 + Number of true negatives. + fn: numpy.float64 + Number of false negatives. + Fscore: numpy.float64 + F-score calculated value. + k: numpy.float64 + Cohen's kappa. + + """ + + tp, fp, tn, fn = test_results(Cw, Cl, W, L) + + # Executing test_results. +# tp, fp, tn, fn = test_results(Cw, Cl, W, L) + + # Calculating the total amount of points processed. + total = tp + fp + tn + fn + + # Calculating the accuracy of the processing. This is done by the ratio + # of the total amount of points correctly processed (summation of true + # positives and true negatives) and the total amount of points processed. + accuracy = (tp + tn) / total + + # Executing the fscore and kappa functions. + F_wood = fscore(tp, fp, fn) + F_leaf = fscore(tn, fn, fp) + k = kappa(tp, fp, tn, fn) + + return accuracy, tp, fp, tn, fn, F_wood, F_leaf, k + + +# Defining the fscore function. +def fscore(tp, fp, fn): + + """ + This function calculates the F-score of a set of results from the testing + framework. + The calculations area based on Goutte and Gaussier (2005), Sokolova et al. + (2006) and Tao et al. (2015). + + Parameters + ---------- + R: tuple + Sets of 3D point coordinates from the points cloud output by the + tested function. + T: tuple + Sets of 3D point coordinates, referenced as truth points, from the + input test data. + F: tuple + Sets of 3D point coordinates, referenced as false points, from the + input test data. + + Returns + ------- + Fscore: numpy.float64 + F-score calculated value. + + References + ---------- + .. [1] Goutte, C., Gaussier, E., 2005. A probabilistic interpretation + of precision, recall and F-score, with implication for evaluation. + Lect. Notes Comput. Sci. 3408, 345–359. + + .. [2] Sokolova, M., Japkowicz, N., Szpakowicz, S., 2006. Beyond accuracy, + F-score and ROC: a family of discriminant measures for performance + evaluation. In: Sattar, A., Kang, B.-H. (Eds.), AI 2006: Advances in + Artificial Intelligence. Springer, Berlin, Heidelberg, pp. + 1015–1021. + + .. [3] Tao, S., Wu, F., Guo, Q., Wang, Y., Li, W., Xue, B., Hu, X., Li, P., + Tian, D., Li, C., Yao, H., Li, Y., Xu, G., Fang, J., 2015. + Segmenting tree crowns from terrestrial and mobile LiDAR data by + exploring ecological theories. ISPRS Journal of Photogrammetry and + Remote Sensing 110, 66–76. + + """ + + # Calculating recall (r) and precision (p). + r = tp / (tp + fn) + p = tp / (tp + fp) + + # Calculating the F-score. + Fscore = 2 * ((r * p) / (r + p)) + + return Fscore + + +# Defining the kappa function. +def kappa(tp, fp, tn, fn): + + """ + This function calculates kappa according to the original publication from + Jacob Cohen (1960). The only modification made for the use of Cohen's kappa + on the testing framework is the if-else control to avoid division by 0 when + the test data has no leaf/noise points. In these cases, the k variable is + set to a fill value (9999). + + Parameters + ---------- + R: tuple + Sets of 3D point coordinates from the points cloud output by the + tested function. + T: tuple + Sets of 3D point coordinates, referenced as truth points, from the + input test data. + F: tuple + Sets of 3D point coordinates, referenced as false points, from the + input test data. + + Returns + ------- + k: numpy.float64 + Cohen's kappa. + + References + ---------- + .. [1] Cohen, J. A Coefficient of Agreement for Nominal Scales. Educational + and Psychological Measurement, April 1960, 20: 37-46. + + """ + + # Executing the functions p_proportionate_agreement and + # p_random_agreement to obtain the po and pe, respectively. + po = p_proportionate_agreement(tp, fp, tn, fn) + pe = p_random_agreement(tp, fp, tn, fn) + + # Testing if pe is larger than 1. If so, calculate k and, + # if not, assign a fill value to k (9999). + if pe < 1: + k = (po - pe) / (1 - pe) + else: + k = 9999 + return k + + return abs(k) + + +# Defining the p_proportionate_agreement function. +def p_proportionate_agreement(tp, fp, tn, fn): + + """ + The probability of proportionate agreement is the relative amount + of points in agreement with the "truth", which is the sum of true + positives and false positives divided by the total number of points + + Parameters + ---------- + tp: int or float + Number of true positives. + fp: int or float + Number of false positives. + tn: int or float + Number of true negatives. + fn: int or float + Number of false negatives. + + Returns + ------- + po: numpy.float64 + probability of proportionate agreement. + + """ + + # Calculating the total amount of points. + total = tp + fp + tn + fn + + # Calculating the proportionate agreement. + po = (tp + tn) / total + return po + + +# Defining the p_random_agreement function. +def p_random_agreement(tp, fp, tn, fn): + + """ + The probability of random agreement is the probability of the "truth" + to be achieved randomly, independent of the method used to classify/ + identify each point as wood or leaf/noise. + + Parameters + ---------- + tp: int or float + Number of true positives. + fp: int or float + Number of false positives. + tn: int or float + Number of true negatives. + fn: int or float + Number of false negatives. + + Returns + ------- + pe: numpy.float64 + Probability of random agreement. + + """ + + # Calculating the total amount of points. + total = tp + fp + tn + fn + + m_original = ((tp + fn) * (tp + fp)) / total + m_classification = ((fp + tn) * (fn + tn)) / total + + # Calculating the probability of random agreement. + pe = (m_original + m_classification) / total + + return pe + + +# Defining the function test_results function. +def test_results(Cw, Cl, W, L): + +# tw = count_intersection(Cw, W) +# fw = count_intersection(Cw, L) +# tl = count_intersection(Cl, L) +# fl = count_intersection(Cl, W) + + # OR + + tw = count_intersection(Cw, W) + tl = count_intersection(Cl, L) + fw = abs(Cw.shape[0] - tw) + fl = abs(Cl.shape[0] - tl) + + return tw, fw, tl, fl + + +if __name__ == "__main__": + pass diff --git a/lidartf/tests/__init__.py b/lidartf/tests/__init__.py new file mode 100644 index 0000000..a0efdc0 --- /dev/null +++ b/lidartf/tests/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Apr 06 15:50:31 2017 + +@author: mathe +""" + diff --git a/lidartf/tests/wlseparation.py b/lidartf/tests/wlseparation.py new file mode 100644 index 0000000..4643248 --- /dev/null +++ b/lidartf/tests/wlseparation.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +""" +Module to perform the wood-leaf separation testing. + +@author: Matheus Boni Vicari (2017). +""" +import numpy as np +import itertools +import testres as tr +import pandas as pd +import time +import mayavi.mlab as mlab + + +def test_separation(data, f, filename, results_folder, plot_cloud, *args): + + # Initializing empty lists to store parameters and results of the test. + results = [] + params = [] + + # Generating the reference wood and leaf datasets. + wood = remove_duplicates(data[data[:, 3] == 0]) + leaf = remove_duplicates(data[data[:, 3] == 1]) + + # Stacking references data to create the testing dataset. + test_data = np.vstack((wood, leaf)) + + # Iterating over the product of all input arguments. This will generate + # all possible combinations of arguments from 'args' to test. + for j in itertools.product(*args): + + # Starting the time counter. + start = time.time() + + # Trying to separate the testing dataset with the current + # arguments. + try: + # Run function 'f' with arguments j. + w_out, l_out, p = f(test_data, *j) + + # If set, plot and save separated point clouds. + if plot_cloud: + mlab.figure(bgcolor=(1, 1, 1)) + mlab.points3d(w_out[:, 0], w_out[:, 1], w_out[:, 2], + color=(0.4, 0.2, 0), mode='point') + mlab.points3d(l_out[:, 0], l_out[:, 1], l_out[:, 2], + color=(0, 0.4, 0), mode='point') + mlab.savefig(results_folder + 'cloud_' + filename + '_' + + '_'.join(map(str, j)) + '.png', + size=[1920, 1080]) + mlab.close() + + # Testing the separated point clouds against the reference + # point clouds. + tempres = tr.summary(w_out, l_out, wood[:], leaf[:]) + # Joining processing time to the testing results. + tempres = tempres + (time.time() - start, ) + + # Printing current iteration results. + print('\n Current results for dataset %s and arguments %s:' % + (filename, j)) + print tempres + + # Appending the test results to results list. + results.append(tempres[:]) + + # Deleting separated point clouds and current results to avoid + # conflicts when + del(w_out, l_out, tempres) + + except: + # If not possible to separate the point cloud, fill current results + # as zeroes. + results.append((0, 0, 0, 0, 0, 0, 0, 0, 0)) + + # Append current parameters (arguments). + params.append(j) + + return results, params + + +def remove_duplicates(arr): + + """ + Function to remove duplicate rows from an array. + + Parameters + ---------- + arr: numpy.ndarray + N-dimensional array to uniquify rows. + + Returns + ------- + unique: numpy.ndarray + Array with unique rows. + + """ + + # Creating a pandas.DataFrame from the input array. + df = pd.DataFrame({'x': arr[:, 0], 'y': arr[:, 1], 'z': arr[:, 2]}) + + # Removing duplicate rows. + unique = df.drop_duplicates(['x', 'y', 'z']) + + return np.asarray(unique) diff --git a/lidartf/utils/__init__.py b/lidartf/utils/__init__.py new file mode 100644 index 0000000..a0efdc0 --- /dev/null +++ b/lidartf/utils/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Apr 06 15:50:31 2017 + +@author: mathe +""" + diff --git a/lidartf/utils/intersection.py b/lidartf/utils/intersection.py new file mode 100644 index 0000000..84d25a9 --- /dev/null +++ b/lidartf/utils/intersection.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" +Module to run an intersection analysis between two point clouds. + +@author: Matheus Boni Vicari (2017). +""" + +import numpy as np +import pandas as pd + + +def get_diff(arr1, arr2): + + """ + Function to generate a difference point cloud (points not intersected) + between point clouds. + + Parameters + ---------- + arr1: numpy.ndarray + First point cloud to analyze. + + arr2: numpy.ndarray + Second point cloud to analyze. + + Returns + ------- + diff: numpy.ndarray + Difference point cloud. + + Examples + -------- + >>> arr1 = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]]) + >>> arr1 + array([[0, 0, 0], + [1, 1, 1], + [2, 2, 2]]) + >>> arr2 = np.array([[0, 0, 0], [1, 1, 1], [3, 3, 3]]) + >>> arr2 + array([[0, 0, 0], + [1, 1, 1], + [3, 3, 3]]) + >>> get_diff(arr1, arr2) + array([[2, 2, 2], + [3, 3, 3]]) + + """ + + # Making sure arr1 and arr2 have the same number of dimensions. + assert arr1.shape[1] == arr2.shape[1] + + # Stacking both arrays. + arr3 = np.vstack((arr1, arr2)) + + # Generating a pandas.DataFrame from the stacked array. + df = pd.DataFrame(arr3) + + # Removing all points (rows) that are not unique. + diff = df.drop_duplicates(keep=False) + + return np.asarray(diff) + + +def count_intersection(arr1, arr2): + + """ + Function to calculate the number of common points between two clouds. + + Parameters + ---------- + arr1: numpy.ndarray + First point cloud to analyze. + + arr2: numpy.ndarray + Second point cloud to analyze. + + Returns + ------- + count: int + Number of common points. + + Examples + -------- + >>> arr1 = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]]) + >>> arr1 + array([[0, 0, 0], + [1, 1, 1], + [2, 2, 2]]) + >>> arr2 = np.array([[0, 0, 0], [1, 1, 1], [3, 3, 3]]) + >>> arr2 + array([[0, 0, 0], + [1, 1, 1], + [3, 3, 3]]) + >>> count_intersection(arr1, arr2) + 2 + + """ + + # Making sure arr1 and arr2 have the same number of dimensions. + assert arr1.shape[1] == arr2.shape[1] + + # Stacking both arrays. + arr3 = np.vstack((arr1, arr2)) + + # Generating a pandas.DataFrame from the stacked array. + df = pd.DataFrame(arr3) + + # Obtainin the duplicated points in the DataFrame. + diff = np.asarray(df.duplicated(keep=False)) + + # Calculating the number of intersected points. + intercount = np.sum(diff) / 2 + + return intercount.astype(np.int) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4633eb2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +mayavi>=4.5.0 +numpy>=1.11.3 +pandas>=0.19.2 +setuptools>=36.0.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e6b9731 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +""" +Setup file for the LiDAR Processing Testing Framework package. + +@author: Matheus Boni Vicari (matheus.boni.vicari@gmail.com) +""" + +from setuptools import setup + +setup( + name="LiDAR Processing Testing Framework", + version="0.0.1", + author='Matheus Boni Vicari', + author_email='matheus.boni.vicari@gmail.com', + install_requires=[ + "numpy", + "tinfiltering" + ], + # ... +)