From a7023349c1a94137b64093a51da22989a33d1eed Mon Sep 17 00:00:00 2001
From: Matheus Boni Vicari <matheus_boni_vicari@hotmail.com>
Date: Sat, 3 Jun 2017 23:03:23 +0100
Subject: [PATCH] First commit of an initial working version.

---
 .gitignore                    |   3 +
 CHANGES.txt                   |   1 +
 LICENSE => LICENSE.txt        |   0
 MANIFEST.in                   |   1 +
 README.rst                    |  28 ++++
 contributors.txt              |   2 +
 lidartf/runframework.py       | 123 +++++++++++++++
 lidartf/testres.py            | 273 ++++++++++++++++++++++++++++++++++
 lidartf/tests/__init__.py     |   7 +
 lidartf/tests/wlseparation.py | 105 +++++++++++++
 lidartf/utils/__init__.py     |   7 +
 lidartf/utils/intersection.py | 114 ++++++++++++++
 requirements.txt              |   4 +
 setup.py                      |  20 +++
 14 files changed, 688 insertions(+)
 create mode 100644 CHANGES.txt
 rename LICENSE => LICENSE.txt (100%)
 create mode 100644 MANIFEST.in
 create mode 100644 README.rst
 create mode 100644 contributors.txt
 create mode 100644 lidartf/runframework.py
 create mode 100644 lidartf/testres.py
 create mode 100644 lidartf/tests/__init__.py
 create mode 100644 lidartf/tests/wlseparation.py
 create mode 100644 lidartf/utils/__init__.py
 create mode 100644 lidartf/utils/intersection.py
 create mode 100644 requirements.txt
 create mode 100644 setup.py

diff --git a/.gitignore b/.gitignore
index 7bbc71c..30563bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,3 +99,6 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+# Compressed files
+.rar
diff --git a/CHANGES.txt b/CHANGES.txt
new file mode 100644
index 0000000..786ea42
--- /dev/null
+++ b/CHANGES.txt
@@ -0,0 +1 @@
+First commit to new repository.
\ No newline at end of file
diff --git a/LICENSE b/LICENSE.txt
similarity index 100%
rename from LICENSE
rename to LICENSE.txt
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..649979b
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include LICENSE.txt README.rst CHANGES.txt contributors.txt
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..65181e0
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,28 @@
+=======
+lidartf
+=======
+
+The LiDAR Testing Framework (lidartf) aims to provide the tools necessary to assess of methodologies/tools developed to process LiDAR data.
+
+This is still a work in progress, requiring some polishing to improve user-friendliness and additional testing data. Also, new tests, for different types of processing,
+will be added over time.
+
+The LiDAR Testing Framework is being developed as part of my PhD research, supervised by Dr. Mat Disney, in the Department of Geography at University College London (UCL). My research 
+is funded through Science Without Borders from the National Council of Technological and Scientific Development (10.13039/501100003593) – Brazil (Process number 233849/2014-9). 
+
+Any questions or suggestions, feel free to contact me using one of the following e-mails: matheus.boni.vicari@gmail.com or matheus.vicari.15@ucl.ac.uk
+
+------------
+Requirements
+------------
+mayavi>=4.5.0
+numpy>=1.11.3
+pandas>=0.19.2
+setuptools>=36.0.1
+
+
+-----------------
+Who do I talk to?
+-----------------
+
+* Matheus Boni Vicari (matheus.boni.vicari@gmail.com or matheus.vicari.15@ucl.ac.uk)
diff --git a/contributors.txt b/contributors.txt
new file mode 100644
index 0000000..069aaa6
--- /dev/null
+++ b/contributors.txt
@@ -0,0 +1,2 @@
+Matheus Boni Vicari
+ (matheus.boni.vicari@gmail.com or matheus.vicari.15@ucl.ac.uk)
\ No newline at end of file
diff --git a/lidartf/runframework.py b/lidartf/runframework.py
new file mode 100644
index 0000000..ad6e9dd
--- /dev/null
+++ b/lidartf/runframework.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Module to perform the framework testing.
+
+@author: Matheus Boni Vicari (2017).
+"""
+
+import numpy as np
+import imp
+import os
+import pandas as pd
+import sys
+from tests.wlseparation import test_separation
+
+
+def run(fun, results_folder, plot_cloud, dataset, *args):
+
+    """
+    Function to run the testing framework. This is the main function in the
+    package and manages input, testing and output.
+
+    Parameters
+    ----------
+    fun: list of str
+        List containing module path and function name (in this order) to import
+        and test.
+    results_folder: str
+        Path of the directory to save the testing results. Path must finish
+        with a separator (e.g. / or \).
+    plot_cloud: bool
+        Option to plot or not the separated point clouds.
+    dataset: list
+        List of dataset paths to use as data for the testing.
+    args: list
+        List of arguments necessary to run the function to be tested.
+        These arguments should be inserted in the same order as required by
+        function to be tested. Even if using only single values for each
+        argument, it should be inserted inside a list.
+
+    Returns
+    -------
+    res: pandas.DataFrame
+        Set of results for the testing of the function.
+    params: list
+        List of parameters used to run the tested function.
+
+    Usage
+    -----
+    >>> dataset = ['path/to/dataset.txt']
+    >>> fun = ['path/to/module_file', 'function_name']
+    >>> res, t = run(fun, 'results/', True, dataset, [10, 20, 40, 100],\
+ [100, 200, 300])
+    """
+
+    # Importing module 'm' and function 'f' to test.
+    m = import_(fun[0])
+    f = getattr(m, fun[1])
+
+    # Initializing empty lists to store parameters and results of the test.
+    results = []
+    params = []
+
+    # Looping over every data in dataset.
+    for i in dataset:
+
+        # Importing current dataset.
+        data = np.loadtxt(i, delimiter=' ')
+
+        # Extracting the dataset filename.
+        filename = os.path.basename(i).split('.')[0]
+
+        # Running the separation test.
+        results, params = test_separation(data, f, filename, results_folder,
+                                          plot_cloud, *args)
+
+    # Creating a pandas.DataFrame from the test results.
+    res = pd.DataFrame(results, columns=['accuracy', 'tp', 'fp', 'tn', 'fn',
+                                         'F_wood', 'F_leaf', 'k', 'time'])
+
+    # Saving the testing parameters to a text file.
+    np.savetxt(results_folder + 'params_' + filename + '.txt', params,
+               fmt='%1.2f')
+
+    # Saving the testing results to a text file.
+    res.to_csv(results_folder + 'results_' + filename + '.txt',
+               float_format='%1.2f')
+
+    return res, params
+
+
+def import_(filename):
+
+    """
+    Function to import a Python module from a filename.
+
+    Parameters
+    ----------
+    filename: str
+        Path of the module to import without extension.
+
+    """
+
+    # Splitting filename to extract path and name of the module.
+    path, name = os.path.split(filename)
+    # Extracting name and extension.
+    name, ext = os.path.splitext(name)
+
+    # Appending modulo path to system path.
+    sys.path.append(path)
+
+    # Finding module and importing it.
+    file_, filename, data = imp.find_module(name, [path])
+    mod = imp.load_module(name, file_, filename, data)
+
+    return mod
+
+
+if __name__ == "__main__":
+
+    fun = [r'path_to_module', 'name_of_function_to_test']
+    dataset = [r'data_to_use_in_test_1',
+               r'data_to_use_in_test_1']
+    res, t = run(fun, 'path_to_results', True, dataset, [list_arguments_1], [list_of_arguments_2])
diff --git a/lidartf/testres.py b/lidartf/testres.py
new file mode 100644
index 0000000..8d63e85
--- /dev/null
+++ b/lidartf/testres.py
@@ -0,0 +1,273 @@
+# -*- coding: utf-8 -*-
+"""
+Module to perform the results assessment of the framework.
+
+@author: Matheus Boni Vicari (2017).
+"""
+
+from __future__ import division
+from utils.intersection import count_intersection
+
+
+# Defining the summary function.
+def summary(Cw, Cl, W, L):
+
+    """
+    Function to generate a summary of all result parameters calculated within
+    this module.
+
+    Parameters
+    ----------
+    R: tuple
+            Sets of 3D point coordinates from the points cloud output by the
+            tested function.
+    T: tuple
+            Sets of 3D point coordinates, referenced as truth points, from the
+            input test data.
+    F: tuple
+            Sets of 3D point coordinates, referenced as false points, from the
+            input test data.
+
+    Returns
+    -------
+    p_removed: int
+            Total amount of points not present in the result set of points.
+            This parameter might be helpful when assessing a classification or
+            filtering processing.
+    accuracy: float
+            Accuracy of the processing when observing the tests of true/false
+            positives and negatives.
+    tp: numpy.float64
+            Number of true positives.
+    fp: numpy.float64
+            Number of false positives.
+    tn: numpy.float64
+            Number of true negatives.
+    fn: numpy.float64
+            Number of false negatives.
+    Fscore: numpy.float64
+            F-score calculated value.
+    k: numpy.float64
+            Cohen's kappa.
+
+    """
+
+    tp, fp, tn, fn = test_results(Cw, Cl, W, L)
+
+    # Executing test_results.
+#    tp, fp, tn, fn = test_results(Cw, Cl, W, L)
+
+    # Calculating the total amount of points processed.
+    total = tp + fp + tn + fn
+
+    # Calculating the accuracy of the processing. This is done by the ratio
+    # of the total amount of points correctly processed (summation of true
+    # positives and true negatives) and the total amount of points processed.
+    accuracy = (tp + tn) / total
+
+    # Executing the fscore and kappa functions.
+    F_wood = fscore(tp, fp, fn)
+    F_leaf = fscore(tn, fn, fp)
+    k = kappa(tp, fp, tn, fn)
+
+    return accuracy, tp, fp, tn, fn, F_wood, F_leaf, k
+
+
+# Defining the fscore function.
+def fscore(tp, fp, fn):
+
+    """
+    This function calculates the F-score of a set of results from the testing
+    framework.
+    The calculations area based on Goutte and Gaussier (2005), Sokolova et al.
+    (2006) and Tao et al. (2015).
+
+    Parameters
+    ----------
+    R: tuple
+            Sets of 3D point coordinates from the points cloud output by the
+            tested function.
+    T: tuple
+            Sets of 3D point coordinates, referenced as truth points, from the
+            input test data.
+    F: tuple
+            Sets of 3D point coordinates, referenced as false points, from the
+            input test data.
+
+    Returns
+    -------
+    Fscore: numpy.float64
+            F-score calculated value.
+
+    References
+    ----------
+    .. [1] Goutte, C., Gaussier, E., 2005. A probabilistic interpretation
+           of precision, recall and F-score, with implication for evaluation.
+           Lect. Notes Comput. Sci. 3408, 345–359.
+
+    .. [2] Sokolova, M., Japkowicz, N., Szpakowicz, S., 2006. Beyond accuracy,
+           F-score and ROC: a family of discriminant measures for performance
+           evaluation. In: Sattar, A., Kang, B.-H. (Eds.), AI 2006: Advances in
+           Artificial Intelligence. Springer, Berlin, Heidelberg, pp.
+           1015–1021.
+
+    .. [3] Tao, S., Wu, F., Guo, Q., Wang, Y., Li, W., Xue, B., Hu, X., Li, P.,
+           Tian, D., Li, C., Yao, H., Li, Y., Xu, G., Fang, J., 2015.
+           Segmenting tree crowns from terrestrial and mobile LiDAR data by
+           exploring ecological theories. ISPRS Journal of Photogrammetry and
+           Remote Sensing 110, 66–76.
+
+    """
+
+    # Calculating recall (r) and precision (p).
+    r = tp / (tp + fn)
+    p = tp / (tp + fp)
+
+    # Calculating the F-score.
+    Fscore = 2 * ((r * p) / (r + p))
+
+    return Fscore
+
+
+# Defining the kappa function.
+def kappa(tp, fp, tn, fn):
+
+    """
+    This function calculates kappa according to the original publication from
+    Jacob Cohen (1960). The only modification made for the use of Cohen's kappa
+    on the testing framework is the if-else control to avoid division by 0 when
+    the test data has no leaf/noise points. In these cases, the k variable is
+    set to a fill value (9999).
+
+    Parameters
+    ----------
+    R: tuple
+            Sets of 3D point coordinates from the points cloud output by the
+            tested function.
+    T: tuple
+            Sets of 3D point coordinates, referenced as truth points, from the
+            input test data.
+    F: tuple
+            Sets of 3D point coordinates, referenced as false points, from the
+            input test data.
+
+    Returns
+    -------
+    k: numpy.float64
+            Cohen's kappa.
+
+    References
+    ----------
+    .. [1] Cohen, J. A Coefficient of Agreement for Nominal Scales. Educational
+           and Psychological Measurement, April 1960, 20: 37-46.
+
+    """
+
+    # Executing the functions p_proportionate_agreement and
+    # p_random_agreement to obtain the po and pe, respectively.
+    po = p_proportionate_agreement(tp, fp, tn, fn)
+    pe = p_random_agreement(tp, fp, tn, fn)
+
+    # Testing if pe is larger than 1. If so, calculate k and,
+    # if not, assign a fill value to k (9999).
+    if pe < 1:
+        k = (po - pe) / (1 - pe)
+    else:
+        k = 9999
+        return k
+
+    return abs(k)
+
+
+# Defining the p_proportionate_agreement function.
+def p_proportionate_agreement(tp, fp, tn, fn):
+
+    """
+    The probability of proportionate agreement is the relative amount
+    of points in agreement with the "truth", which is the sum of true
+    positives and false positives divided by the total number of points
+
+    Parameters
+    ----------
+    tp: int or float
+            Number of true positives.
+    fp: int or float
+            Number of false positives.
+    tn: int or float
+            Number of true negatives.
+    fn: int or float
+            Number of false negatives.
+
+    Returns
+    -------
+    po: numpy.float64
+        probability of proportionate agreement.
+
+    """
+
+    # Calculating the total amount of points.
+    total = tp + fp + tn + fn
+
+    # Calculating the proportionate agreement.
+    po = (tp + tn) / total
+    return po
+
+
+# Defining the p_random_agreement function.
+def p_random_agreement(tp, fp, tn, fn):
+
+    """
+    The probability of random agreement is the probability of the "truth"
+    to be achieved randomly, independent of the method used to classify/
+    identify each point as wood or leaf/noise.
+
+    Parameters
+    ----------
+    tp: int or float
+            Number of true positives.
+    fp: int or float
+            Number of false positives.
+    tn: int or float
+            Number of true negatives.
+    fn: int or float
+            Number of false negatives.
+
+    Returns
+    -------
+    pe: numpy.float64
+            Probability of random agreement.
+
+    """
+
+    # Calculating the total amount of points.
+    total = tp + fp + tn + fn
+
+    m_original = ((tp + fn) * (tp + fp)) / total
+    m_classification = ((fp + tn) * (fn + tn)) / total
+
+    # Calculating the probability of random agreement.
+    pe = (m_original + m_classification) / total
+
+    return pe
+
+
+# Defining the function test_results function.
+def test_results(Cw, Cl, W, L):
+
+#    tw = count_intersection(Cw, W)
+#    fw = count_intersection(Cw, L)
+#    tl = count_intersection(Cl, L)
+#    fl = count_intersection(Cl, W)
+
+    # OR
+
+    tw = count_intersection(Cw, W)
+    tl = count_intersection(Cl, L)
+    fw = abs(Cw.shape[0] - tw)
+    fl = abs(Cl.shape[0] - tl)
+
+    return tw, fw, tl, fl
+
+
+if __name__ == "__main__":
+    pass
diff --git a/lidartf/tests/__init__.py b/lidartf/tests/__init__.py
new file mode 100644
index 0000000..a0efdc0
--- /dev/null
+++ b/lidartf/tests/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 06 15:50:31 2017
+
+@author: mathe
+"""
+
diff --git a/lidartf/tests/wlseparation.py b/lidartf/tests/wlseparation.py
new file mode 100644
index 0000000..4643248
--- /dev/null
+++ b/lidartf/tests/wlseparation.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+"""
+Module to perform the wood-leaf separation testing.
+
+@author: Matheus Boni Vicari (2017).
+"""
+import numpy as np
+import itertools
+import testres as tr
+import pandas as pd
+import time
+import mayavi.mlab as mlab
+
+
+def test_separation(data, f, filename, results_folder, plot_cloud, *args):
+
+    # Initializing empty lists to store parameters and results of the test.
+    results = []
+    params = []
+
+    # Generating the reference wood and leaf datasets.
+    wood = remove_duplicates(data[data[:, 3] == 0])
+    leaf = remove_duplicates(data[data[:, 3] == 1])
+
+    # Stacking references data to create the testing dataset.
+    test_data = np.vstack((wood, leaf))
+
+    # Iterating over the product of all input arguments. This will generate
+    # all possible combinations of arguments from 'args' to test.
+    for j in itertools.product(*args):
+
+        # Starting the time counter.
+        start = time.time()
+
+        # Trying to separate the testing dataset with the current
+        # arguments.
+        try:
+            # Run function 'f' with arguments j.
+            w_out, l_out, p = f(test_data, *j)
+
+            # If set, plot and save separated point clouds.
+            if plot_cloud:
+                mlab.figure(bgcolor=(1, 1, 1))
+                mlab.points3d(w_out[:, 0], w_out[:, 1], w_out[:, 2],
+                              color=(0.4, 0.2, 0), mode='point')
+                mlab.points3d(l_out[:, 0], l_out[:, 1], l_out[:, 2],
+                              color=(0, 0.4, 0), mode='point')
+                mlab.savefig(results_folder + 'cloud_' + filename + '_' +
+                             '_'.join(map(str, j)) + '.png',
+                             size=[1920, 1080])
+                mlab.close()
+
+            # Testing the separated point clouds against the reference
+            # point clouds.
+            tempres = tr.summary(w_out, l_out, wood[:], leaf[:])
+            # Joining processing time to the testing results.
+            tempres = tempres + (time.time() - start, )
+
+            # Printing current iteration results.
+            print('\n Current results for dataset %s and arguments %s:' %
+                  (filename, j))
+            print tempres
+
+            # Appending the test results to results list.
+            results.append(tempres[:])
+
+            # Deleting separated point clouds and current results to avoid
+            # conflicts when
+            del(w_out, l_out, tempres)
+
+        except:
+            # If not possible to separate the point cloud, fill current results
+            # as zeroes.
+            results.append((0, 0, 0, 0, 0, 0, 0, 0, 0))
+
+        # Append current parameters (arguments).
+        params.append(j)
+
+    return results, params
+
+
+def remove_duplicates(arr):
+
+    """
+    Function to remove duplicate rows from an array.
+
+    Parameters
+    ----------
+    arr: numpy.ndarray
+        N-dimensional array to uniquify rows.
+
+    Returns
+    -------
+    unique: numpy.ndarray
+        Array with unique rows.
+
+    """
+
+    # Creating a pandas.DataFrame from the input array.
+    df = pd.DataFrame({'x': arr[:, 0], 'y': arr[:, 1], 'z': arr[:, 2]})
+
+    # Removing duplicate rows.
+    unique = df.drop_duplicates(['x', 'y', 'z'])
+
+    return np.asarray(unique)
diff --git a/lidartf/utils/__init__.py b/lidartf/utils/__init__.py
new file mode 100644
index 0000000..a0efdc0
--- /dev/null
+++ b/lidartf/utils/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 06 15:50:31 2017
+
+@author: mathe
+"""
+
diff --git a/lidartf/utils/intersection.py b/lidartf/utils/intersection.py
new file mode 100644
index 0000000..84d25a9
--- /dev/null
+++ b/lidartf/utils/intersection.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+"""
+Module to run an intersection analysis between two point clouds.
+
+@author: Matheus Boni Vicari (2017).
+"""
+
+import numpy as np
+import pandas as pd
+
+
+def get_diff(arr1, arr2):
+
+    """
+    Function to generate a difference point cloud (points not intersected)
+    between point clouds.
+
+    Parameters
+    ----------
+    arr1: numpy.ndarray
+        First point cloud to analyze.
+
+    arr2: numpy.ndarray
+        Second point cloud to analyze.
+
+    Returns
+    -------
+    diff: numpy.ndarray
+        Difference point cloud.
+
+    Examples
+    --------
+    >>> arr1 = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]])
+    >>> arr1
+    array([[0, 0, 0],
+           [1, 1, 1],
+           [2, 2, 2]])
+    >>> arr2 = np.array([[0, 0, 0], [1, 1, 1], [3, 3, 3]])
+    >>> arr2
+    array([[0, 0, 0],
+           [1, 1, 1],
+           [3, 3, 3]])
+    >>> get_diff(arr1, arr2)
+    array([[2, 2, 2],
+           [3, 3, 3]])
+
+    """
+
+    # Making sure arr1 and arr2 have the same number of dimensions.
+    assert arr1.shape[1] == arr2.shape[1]
+
+    # Stacking both arrays.
+    arr3 = np.vstack((arr1, arr2))
+
+    # Generating a pandas.DataFrame from the stacked array.
+    df = pd.DataFrame(arr3)
+
+    # Removing all points (rows) that are not unique.
+    diff = df.drop_duplicates(keep=False)
+
+    return np.asarray(diff)
+
+
+def count_intersection(arr1, arr2):
+
+    """
+    Function to calculate the number of common points between two clouds.
+
+    Parameters
+    ----------
+    arr1: numpy.ndarray
+        First point cloud to analyze.
+
+    arr2: numpy.ndarray
+        Second point cloud to analyze.
+
+    Returns
+    -------
+    count: int
+        Number of common points.
+
+    Examples
+    --------
+    >>> arr1 = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]])
+    >>> arr1
+    array([[0, 0, 0],
+           [1, 1, 1],
+           [2, 2, 2]])
+    >>> arr2 = np.array([[0, 0, 0], [1, 1, 1], [3, 3, 3]])
+    >>> arr2
+    array([[0, 0, 0],
+           [1, 1, 1],
+           [3, 3, 3]])
+    >>> count_intersection(arr1, arr2)
+    2
+
+    """
+
+    # Making sure arr1 and arr2 have the same number of dimensions.
+    assert arr1.shape[1] == arr2.shape[1]
+
+    # Stacking both arrays.
+    arr3 = np.vstack((arr1, arr2))
+
+    # Generating a pandas.DataFrame from the stacked array.
+    df = pd.DataFrame(arr3)
+
+    # Obtainin the duplicated points in the DataFrame.
+    diff = np.asarray(df.duplicated(keep=False))
+
+    # Calculating the number of intersected points.
+    intercount = np.sum(diff) / 2
+
+    return intercount.astype(np.int)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4633eb2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+mayavi>=4.5.0
+numpy>=1.11.3
+pandas>=0.19.2
+setuptools>=36.0.1
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..e6b9731
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+"""
+Setup file for the LiDAR Processing Testing Framework package.
+
+@author: Matheus Boni Vicari (matheus.boni.vicari@gmail.com)
+"""
+
+from setuptools import setup
+
+setup(
+    name="LiDAR Processing Testing Framework",
+    version="0.0.1",
+    author='Matheus Boni Vicari',
+    author_email='matheus.boni.vicari@gmail.com',
+    install_requires=[
+        "numpy",
+        "tinfiltering"
+    ],
+    # ...
+)