From 3e887311c3a512f408fa8e6867017ef3f3788e92 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 11 Jun 2024 11:22:31 +0200 Subject: [PATCH 1/3] added MulticlosureInternalLoader --- .../src/validphys/closuretest/multiclosure.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py index 5b545569e2..8d64ac93b4 100644 --- a/validphys2/src/validphys/closuretest/multiclosure.py +++ b/validphys2/src/validphys/closuretest/multiclosure.py @@ -10,6 +10,7 @@ import numpy as np import scipy.linalg as la import scipy.special as special +import dataclasses from reportengine import collect from validphys.calcutils import calc_chi2 @@ -29,6 +30,30 @@ # stepsize in fits/replicas to use for finite size bootstraps SAMPLING_INTERVAL = 5 +@dataclasses.dataclass(frozen=True) +class MulticlosureInternalLoader: + """ + Parameters + ---------- + closures_th: list + list containing validphys.results.ThPredictionsResult objects + for each fit + + law_th: ThPredictionsResult object + underlying law theory predictions + + covmat: np.array + experimental t0 covariance matrix + + sqrt_covmat: np.array + cholesky decomposed covariance matrix + """ + + closures_th: list + law_th: ThPredictionsResult + covmat: np.array + sqrt_covmat: np.array + # TODO: deprecate this at some point @check_fits_underlying_law_match From c4965fdfe6ae70a4371940cdfd7ae5e707df9f52 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 11 Jun 2024 11:23:46 +0200 Subject: [PATCH 2/3] adapted code to MulticlosureInternalLoader --- .../src/validphys/closuretest/multiclosure.py | 69 +++++++++---------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py index 8d64ac93b4..b2ccd6eefb 100644 --- a/validphys2/src/validphys/closuretest/multiclosure.py +++ b/validphys2/src/validphys/closuretest/multiclosure.py @@ -7,6 +7,7 @@ ``multiclosure_output.py`` """ + import numpy as np import scipy.linalg as la import scipy.special as special @@ -30,6 +31,7 @@ # stepsize in fits/replicas to use for finite size bootstraps SAMPLING_INTERVAL = 5 + @dataclasses.dataclass(frozen=True) class MulticlosureInternalLoader: """ @@ -114,12 +116,11 @@ def internal_multiclosure_dataset_loader( sqrt_covmat = la.cholesky(t0_covmat_from_systematics, lower=True) # TODO: support covmat reg and theory covariance matrix - # possibly make this a named tuple - return ( - fits_dataset_predictions, - fits_underlying_predictions, - t0_covmat_from_systematics, - sqrt_covmat, + return MulticlosureInternalLoader( + closures_th=fits_dataset_predictions, + law_th=fits_underlying_predictions, + covmat=t0_covmat_from_systematics, + sqrt_covmat=sqrt_covmat, ) @@ -139,9 +140,7 @@ def internal_multiclosure_data_loader( @check_multifit_replicas def fits_dataset_bias_variance( - internal_multiclosure_dataset_loader, - _internal_max_reps=None, - _internal_min_reps=20, + internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20 ): """For a single dataset, calculate the bias and variance for each fit and return tuple (bias, variance, n_data), where bias and variance are @@ -157,7 +156,10 @@ def fits_dataset_bias_variance( ``_internal_max_reps``. """ - closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader + closures_th = internal_multiclosure_dataset_loader.closures_th + law_th = internal_multiclosure_dataset_loader.law_th + sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat + # The dimentions here are (fit, data point, replica) reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps @@ -184,9 +186,7 @@ def expected_dataset_bias_variance(fits_dataset_bias_variance): @check_multifit_replicas def fits_data_bias_variance( - internal_multiclosure_data_loader, - _internal_max_reps=None, - _internal_min_reps=20, + internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20 ): """Like `fits_dataset_bias_variance` but for all data""" return fits_dataset_bias_variance( @@ -237,7 +237,10 @@ def dataset_replica_and_central_diff(internal_multiclosure_dataset_loader, diago default behaviour. """ - closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader + closures_th = internal_multiclosure_dataset_loader.closures_th + law_th = internal_multiclosure_dataset_loader.law_th + covmat = internal_multiclosure_dataset_loader.covmat + replicas = np.asarray([th.error_members for th in closures_th]) centrals = np.mean(replicas, axis=-1) underlying = law_th.central_value @@ -334,11 +337,7 @@ def n_replica_samples(fits_pdf, _internal_max_reps=None, _internal_min_reps=20): _internal_max_reps. """ return list( - range( - _internal_min_reps, - _internal_max_reps + SAMPLING_INTERVAL, - SAMPLING_INTERVAL, - ) + range(_internal_min_reps, _internal_max_reps + SAMPLING_INTERVAL, SAMPLING_INTERVAL) ) @@ -354,13 +353,7 @@ def __init__(self, data): def _bootstrap_multiclosure_fits( - internal_multiclosure_dataset_loader, - rng, - n_fit_max, - n_fit, - n_rep_max, - n_rep, - use_repeats, + internal_multiclosure_dataset_loader, rng, n_fit_max, n_fit, n_rep_max, n_rep, use_repeats ): """Perform a single bootstrap resample of the multiclosure fits and return a proxy of the base internal object used by relevant estimator actions @@ -386,7 +379,12 @@ def _bootstrap_multiclosure_fits( np.random.choice """ - closure_th, *input_tuple = internal_multiclosure_dataset_loader + closure_th = internal_multiclosure_dataset_loader.closures_th + law_th = internal_multiclosure_dataset_loader.law_th + covmat = internal_multiclosure_dataset_loader.covmat + sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat + input_tuple = law_th, covmat, sqrtcov + fit_boot_index = rng.choice(n_fit_max, size=n_fit, replace=use_repeats) fit_boot_th = [closure_th[i] for i in fit_boot_index] boot_ths = [] @@ -799,9 +797,7 @@ def total_bootstrap_xi(experiments_bootstrap_xi): def dataset_fits_bias_replicas_variance_samples( - internal_multiclosure_dataset_loader, - _internal_max_reps=None, - _internal_min_reps=20, + internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20 ): """For a single dataset, calculate the samples of chi2-quantities which are used to calculate the bias and variance for each fit. The output of this @@ -825,7 +821,10 @@ def dataset_fits_bias_replicas_variance_samples( ``_internal_max_reps``. """ - closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader + closures_th = internal_multiclosure_dataset_loader.closures_th + law_th = internal_multiclosure_dataset_loader.law_th + sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat + # The dimentions here are (fit, data point, replica) reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps @@ -842,14 +841,10 @@ def dataset_fits_bias_replicas_variance_samples( def dataset_inputs_fits_bias_replicas_variance_samples( - internal_multiclosure_data_loader, - _internal_max_reps=None, - _internal_min_reps=20, + internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20 ): return dataset_fits_bias_replicas_variance_samples( - internal_multiclosure_data_loader, - _internal_max_reps=None, - _internal_min_reps=20, + internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20 ) From c2fb8debfe1e70ea30c45f6c3f5428e4271253bf Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Tue, 11 Jun 2024 12:05:36 +0200 Subject: [PATCH 3/3] added unit tests for internal datasets and data loader --- .../src/validphys/tests/test_closuretest.py | 120 ++++++++++++++---- 1 file changed, 92 insertions(+), 28 deletions(-) diff --git a/validphys2/src/validphys/tests/test_closuretest.py b/validphys2/src/validphys/tests/test_closuretest.py index fd89fb06ee..4fa009e6ce 100644 --- a/validphys2/src/validphys/tests/test_closuretest.py +++ b/validphys2/src/validphys/tests/test_closuretest.py @@ -1,16 +1,26 @@ """ test_closuretest.py -contains some unit tests for closure test estimators +contains some unit tests for closure test module """ import numpy as np +from unittest.mock import Mock +from validphys.api import API +from validphys.tests.conftest import SINGLE_DATASET, THEORYID, PDF from validphys.closuretest import bias_dataset, variance_dataset +from validphys.closuretest.multiclosure import ( + internal_multiclosure_dataset_loader, + internal_multiclosure_data_loader, +) +from validphys.results import ThPredictionsResult + class TestResult: """class for testing base level estimators which expect a results object""" + def __init__(self, central_value, rawdata=None): self.central_value = central_value self.rawdata = rawdata @@ -18,48 +28,102 @@ def __init__(self, central_value, rawdata=None): self.ndata = len(central_value) self.sqrtcovmat = np.identity(self.ndata) - def __len__(self,): + def __len__(self): return self.ndata + N_DATA = 5 N_REPLICAS = 10 -#TODO: make these fixtures? +# TODO: make these fixtures? # these are proxies for results tuples of data and theory -ones_results = 2*[TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))] -twos_results = 2*[TestResult(2*np.ones(N_DATA), 2*np.ones((N_DATA, N_REPLICAS)))] +ones_results = 2 * [TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))] +twos_results = 2 * [TestResult(2 * np.ones(N_DATA), 2 * np.ones((N_DATA, N_REPLICAS)))] + +replicas = np.arange(N_REPLICAS)[np.newaxis, :] * np.ones((N_DATA, 1)) +replicas_result = 2 * [TestResult(replicas.mean(axis=1), replicas)] + +DATASET_INPUT = {"dataset_input": SINGLE_DATASET, "theoryid": THEORYID, "use_cuts": "internal"} + +DATASET_INPUTS = {"dataset_inputs": [SINGLE_DATASET], "theoryid": THEORYID, "use_cuts": "internal"} + +MOCK_FIT = Mock() +MOCK_FIT.name = "Mock fit spec" +MOCK_FIT.path = "mock/fit/path" +MOCK_FIT.label = MOCK_FIT.name + +MOCK_FITS = [MOCK_FIT] + + +def test_internal_multiclosure_dataset_loader(): + """ + Test the internal_multiclosure_dataset_loader + """ + dataset = API.dataset(**DATASET_INPUT) + + t0_covmat_from_systematics = API.t0_covmat_from_systematics( + **{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF} + ) + + fits_pdf = [API.pdf(pdf=PDF)] + multiclosure_underlyinglaw = API.pdf(pdf=PDF) + fits = MOCK_FITS + + loader = internal_multiclosure_dataset_loader( + dataset, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics + ) + + assert loader.closures_th is not None + assert type(loader.closures_th) == list + assert loader.law_th is not None + assert type(loader.law_th) == ThPredictionsResult + assert loader.covmat is not None + assert type(loader.covmat) == np.ndarray + assert loader.sqrt_covmat is not None + assert type(loader.sqrt_covmat) == np.ndarray + + +def test_internal_multiclosure_data_loader(): + """ + Test the internal_multiclosure_data_loader + """ + data = API.data(**DATASET_INPUTS) + + t0_covmat_from_systematics = API.t0_covmat_from_systematics( + **{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF} + ) + + fits_pdf = [API.pdf(pdf=PDF)] + multiclosure_underlyinglaw = API.pdf(pdf=PDF) + fits = MOCK_FITS + + loader = internal_multiclosure_data_loader( + data, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics + ) + + assert loader.closures_th is not None + assert type(loader.closures_th) == list + assert loader.law_th is not None + assert type(loader.law_th) == ThPredictionsResult + assert loader.covmat is not None + assert type(loader.covmat) == np.ndarray + assert loader.sqrt_covmat is not None + assert type(loader.sqrt_covmat) == np.ndarray -replicas = np.arange(N_REPLICAS)[np.newaxis, :]*np.ones((N_DATA, 1)) -replicas_result = 2*[TestResult(replicas.mean(axis=1), replicas)] def test_bias_function(): bias_ones = bias_dataset( - ones_results, - [ones_results], # need list of length one to emulate collect - None, - None, + ones_results, [ones_results], None, None # need list of length one to emulate collect ) assert np.allclose(0, bias_ones.bias) - bias_one_two = bias_dataset( - ones_results, - [twos_results], - None, - None, - ) + bias_one_two = bias_dataset(ones_results, [twos_results], None, None) assert np.allclose(N_DATA, bias_one_two.bias) + def test_variance_function(): - vardata = variance_dataset( - ones_results, - None, - None, - ) + vardata = variance_dataset(ones_results, None, None) assert np.allclose(0, vardata.variance) - var_reps = variance_dataset( - replicas_result, - None, - None, - ) + var_reps = variance_dataset(replicas_result, None, None) # calc explicitly what variance should be - expected = np.sum(((np.arange(N_REPLICAS) - 4.5)**2)*N_DATA/N_REPLICAS) + expected = np.sum(((np.arange(N_REPLICAS) - 4.5) ** 2) * N_DATA / N_REPLICAS) assert np.allclose(expected, var_reps.variance)