-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
997fe86
commit 567ec45
Showing
4 changed files
with
299 additions
and
259 deletions.
There are no files selected for viewing
137 changes: 71 additions & 66 deletions
137
nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,99 +1,104 @@ | ||
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close | ||
from pathlib import Path | ||
from dataclasses import dataclass | ||
from os import PathLike | ||
from pathlib import Path | ||
import typing | ||
from typing import List | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from os import PathLike | ||
import yaml | ||
|
||
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close | ||
|
||
|
||
def mergetables() -> pd.DataFrame: | ||
|
||
table_paths = [] | ||
for i in range(1,8): | ||
table_paths.append(Path(f"./rawdata/Table{i}.csv")) | ||
table_paths = [] | ||
for i in range(1, 8): | ||
table_paths.append(Path(f"./rawdata/Table{i}.csv")) | ||
|
||
# List with the rapidity bins for tables 1 to 7. | ||
yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4] | ||
|
||
# List with the rapidity bins for tables 1 to 7. | ||
yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4] | ||
col_names = ["M2", "dsig", "statp", "statm", "normp", "normm", "sysp", "sysm"] | ||
col_names_all = col_names + ["y", "sqrts"] | ||
|
||
col_names = ["M2","dsig","statp","statm","normp","normm","sysp","sysm"] | ||
col_names_all = col_names + ["y", "sqrts"] | ||
combined_df = pd.DataFrame(columns=col_names_all) | ||
for i, path in enumerate(table_paths): | ||
df = pd.read_csv(path, header=11, names=col_names) | ||
df["y"] = yrap[i] | ||
df["sqrts"] = 38.8 | ||
df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()] | ||
combined_df = pd.concat([combined_df, df], ignore_index=True) | ||
|
||
combined_df = pd.DataFrame(columns=col_names_all) | ||
for i, path in enumerate(table_paths): | ||
df = pd.read_csv(path, header=11, names=col_names) | ||
df["y"]=yrap[i] | ||
df["sqrts"]=38.8 | ||
df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()] | ||
combined_df = pd.concat([combined_df,df],ignore_index=True) | ||
# In the table we have sqrt(tau) not M2; compute M2=tau*s | ||
combined_df["M2"] = (combined_df["M2"] * 38.8) ** 2 | ||
|
||
# In the table we have sqrt(tau) not M2; compute M2=tau*s | ||
combined_df["M2"] = (combined_df["M2"]*38.8)**2 | ||
return combined_df | ||
|
||
return combined_df | ||
|
||
def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike): | ||
dfN = pd.read_table(tableN) | ||
dfp = pd.read_table(tablep) | ||
return dfN, dfp | ||
dfN = pd.read_table(tableN) | ||
dfp = pd.read_table(tablep) | ||
return dfN, dfp | ||
|
||
|
||
@dataclass | ||
class E605_commondata(commondata): | ||
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): | ||
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): | ||
|
||
# Kinematic quantities. | ||
self.central_values = data["dsig"].astype(float).to_numpy() | ||
self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy() | ||
self.kinematic_quantities = ["y", "M2", "sqrts"] | ||
# Kinematic quantities. | ||
self.central_values = data["dsig"].astype(float).to_numpy() | ||
self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy() | ||
self.kinematic_quantities = ["y", "M2", "sqrts"] | ||
|
||
# Statistical uncertainties. | ||
self.statistical_uncertainties = data["statp"] | ||
# Statistical uncertainties. | ||
self.statistical_uncertainties = data["statp"] | ||
|
||
# the overall 10% statistical uncertainty is treated as | ||
# additive, while normalisation uncertainty is always treated | ||
# multiplicatively | ||
syst = pd.DataFrame(0.1 * self.central_values) | ||
# the overall 10% statistical uncertainty is treated as | ||
# additive, while normalisation uncertainty is always treated | ||
# multiplicatively | ||
syst = pd.DataFrame(0.1 * self.central_values) | ||
|
||
# Systematic uncertainties. | ||
syst["norm"] = (self.central_values | ||
*data["normp"].str.strip("%").astype(float)/100) | ||
# Systematic uncertainties. | ||
syst["norm"] = self.central_values * data["normp"].str.strip("%").astype(float) / 100 | ||
|
||
# self.systematic_uncertainties = np.dstack((stat,norm))[0] | ||
self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")] | ||
|
||
#self.systematic_uncertainties = np.dstack((stat,norm))[0] | ||
self.systypes = [("ADD","UNCORR"),("MULT", "CORR")] | ||
# Compute the point-to-point uncertainties | ||
nrep = 999 | ||
norm = np.sqrt(nrep) | ||
dfN, dfp = nuclear_uncert_dw( | ||
"rawdata/nuclear/output/tables/group_result_table.csv", | ||
"rawdata/proton_ite/output/tables/group_result_table.csv", | ||
) | ||
|
||
# Compute the point-to-point uncertainties | ||
nrep=999 | ||
norm=np.sqrt(nrep) | ||
dfN, dfp = nuclear_uncert_dw("rawdata/nuclear/output/tables/group_result_table.csv", | ||
"rawdata/proton_ite/output/tables/group_result_table.csv") | ||
for rep in range(1, nrep + 1): | ||
Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm | ||
syst[f"NUCLEAR{rep:05d}"] = Delta | ||
self.systypes.append(("ADD", f"NUCLEAR{rep:05d}")) | ||
|
||
for rep in range(1,nrep+1): | ||
Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm | ||
syst[f"NUCLEAR{rep:05d}"]=Delta | ||
self.systypes.append(("ADD", f"NUCLEAR{rep:05d}")) | ||
self.systematic_uncertainties = syst.to_numpy() | ||
|
||
self.systematic_uncertainties = syst.to_numpy() | ||
self.process = process | ||
self.dataset_name = dataset_name | ||
|
||
self.process = process | ||
self.dataset_name = dataset_name | ||
|
||
def main(): | ||
data = mergetables() | ||
# First create the commondata variant without the nuclear uncertainties. | ||
DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0") | ||
DYE605.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"), | ||
Path("kinematics_reimplemented_PXSEC.yaml"), | ||
Path("uncertainties_reimplemented_PXSEC.yaml")) | ||
if(covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented")): | ||
print("covmat is close") | ||
else: | ||
print("covmat is different.") | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
||
|
||
data = mergetables() | ||
# First create the commondata variant without the nuclear uncertainties. | ||
DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0") | ||
DYE605.write_new_commondata( | ||
Path("data_reimplemented_PXSEC.yaml"), | ||
Path("kinematics_reimplemented_PXSEC.yaml"), | ||
Path("uncertainties_reimplemented_PXSEC.yaml"), | ||
) | ||
if covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented"): | ||
print("covmat is close") | ||
else: | ||
print("covmat is different.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
120 changes: 69 additions & 51 deletions
120
nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,76 +1,94 @@ | ||
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close | ||
from pathlib import Path | ||
from dataclasses import dataclass | ||
from os import PathLike | ||
from pathlib import Path | ||
import typing | ||
from typing import List | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from os import PathLike | ||
import yaml | ||
|
||
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close | ||
|
||
|
||
def readdata() -> pd.DataFrame: | ||
col_names = ["xF","Mmin","Mmax","Mavg","xFavg","pt","dsig","stat","syst","kfact","rsig","rstat","rsyst"] | ||
table_path = Path(f"./rawdata/table.csv") | ||
df = pd.read_csv(table_path,names=col_names) | ||
return df | ||
col_names = [ | ||
"xF", | ||
"Mmin", | ||
"Mmax", | ||
"Mavg", | ||
"xFavg", | ||
"pt", | ||
"dsig", | ||
"stat", | ||
"syst", | ||
"kfact", | ||
"rsig", | ||
"rstat", | ||
"rsyst", | ||
] | ||
table_path = Path(f"./rawdata/table.csv") | ||
df = pd.read_csv(table_path, names=col_names) | ||
return df | ||
|
||
|
||
@dataclass | ||
class E866commondata(commondata): | ||
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): | ||
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): | ||
|
||
# Definitions, compute Jacobian, get dsig/dy/dM | ||
M = (data["Mmax"]+data["Mmin"])/2 | ||
M2=M*M | ||
sqrts=M/M*38.8 | ||
s=sqrts**2 | ||
tau=M**2/s | ||
tau=tau.to_numpy() | ||
xF=data["xF"] | ||
y=np.arcsinh(xF/np.sqrt(tau)/2) | ||
jac=np.sqrt(xF**2+4*tau) | ||
dsigdydM = data["dsig"]*jac | ||
# Definitions, compute Jacobian, get dsig/dy/dM | ||
M = (data["Mmax"] + data["Mmin"]) / 2 | ||
M2 = M * M | ||
sqrts = M / M * 38.8 | ||
s = sqrts**2 | ||
tau = M**2 / s | ||
tau = tau.to_numpy() | ||
xF = data["xF"] | ||
y = np.arcsinh(xF / np.sqrt(tau) / 2) | ||
jac = np.sqrt(xF**2 + 4 * tau) | ||
dsigdydM = data["dsig"] * jac | ||
|
||
# Set the central values | ||
self.central_values = dsigdydM.astype(float).to_numpy() | ||
|
||
# Set the central values | ||
self.central_values = dsigdydM.astype(float).to_numpy() | ||
# Pick the the kinematic quantities | ||
kin = pd.concat([y, M2, sqrts], axis=1) | ||
kin = kin.set_axis(["y", "M2", "sqrts"], axis=1) | ||
self.kinematics = kin.astype(float).to_numpy() | ||
self.kinematic_quantities = ["y", "M2", "sqrts"] | ||
|
||
# Pick the the kinematic quantities | ||
kin=pd.concat([y,M2,sqrts],axis=1) | ||
kin=kin.set_axis(["y","M2","sqrts"],axis=1) | ||
self.kinematics = kin.astype(float).to_numpy() | ||
self.kinematic_quantities = ["y", "M2", "sqrts"] | ||
# Statistical uncertainties. | ||
self.statistical_uncertainties = data["stat"] * jac | ||
|
||
# Statistical uncertainties. | ||
self.statistical_uncertainties = data["stat"]*jac | ||
# Systematic uncertainty | ||
syst = data["syst"] * jac | ||
|
||
# Systematic uncertainty | ||
syst = data["syst"]*jac | ||
# Normalisation uncertainty of 6.5% from beam intensity calibration. | ||
norm = 6.5 / 100 | ||
norm = norm * self.central_values | ||
|
||
# Normalisation uncertainty of 6.5% from beam intensity calibration. | ||
norm = 6.5/100 | ||
norm = norm * self.central_values | ||
self.systematic_uncertainties = np.dstack((syst, norm))[0] | ||
self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")] | ||
|
||
self.systematic_uncertainties = np.dstack((syst,norm))[0] | ||
self.systypes = [("ADD", "UNCORR"),("MULT","CORR")] | ||
self.process = process | ||
self.dataset_name = dataset_name | ||
|
||
self.process = process | ||
self.dataset_name = dataset_name | ||
|
||
def main(): | ||
data = readdata() | ||
# First create the commondata variant without the nuclear uncertainties. | ||
DYE866 = E866commondata(data, "DYE866_Z0", "Z0") | ||
DYE866.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"), | ||
Path("kinematics_reimplemented_PXSEC.yaml"), | ||
Path("uncertainties_reimplemented_PXSEC.yaml")) | ||
|
||
if(covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented")): | ||
print("covmat is close") | ||
else: | ||
print("covmat is different.") | ||
if __name__ == "__main__": | ||
main() | ||
|
||
data = readdata() | ||
# First create the commondata variant without the nuclear uncertainties. | ||
DYE866 = E866commondata(data, "DYE866_Z0", "Z0") | ||
DYE866.write_new_commondata( | ||
Path("data_reimplemented_PXSEC.yaml"), | ||
Path("kinematics_reimplemented_PXSEC.yaml"), | ||
Path("uncertainties_reimplemented_PXSEC.yaml"), | ||
) | ||
|
||
if covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented"): | ||
print("covmat is close") | ||
else: | ||
print("covmat is different.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.