Skip to content

Commit

Permalink
Run pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
Radonirinaunimi committed Jan 10, 2025
1 parent 997fe86 commit 567ec45
Show file tree
Hide file tree
Showing 4 changed files with 299 additions and 259 deletions.
137 changes: 71 additions & 66 deletions nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py
Original file line number Diff line number Diff line change
@@ -1,99 +1,104 @@
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
from pathlib import Path
from dataclasses import dataclass
from os import PathLike
from pathlib import Path
import typing
from typing import List

import numpy as np
import pandas as pd
from os import PathLike
import yaml

from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close


def mergetables() -> pd.DataFrame:

table_paths = []
for i in range(1,8):
table_paths.append(Path(f"./rawdata/Table{i}.csv"))
table_paths = []
for i in range(1, 8):
table_paths.append(Path(f"./rawdata/Table{i}.csv"))

# List with the rapidity bins for tables 1 to 7.
yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4]

# List with the rapidity bins for tables 1 to 7.
yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4]
col_names = ["M2", "dsig", "statp", "statm", "normp", "normm", "sysp", "sysm"]
col_names_all = col_names + ["y", "sqrts"]

col_names = ["M2","dsig","statp","statm","normp","normm","sysp","sysm"]
col_names_all = col_names + ["y", "sqrts"]
combined_df = pd.DataFrame(columns=col_names_all)
for i, path in enumerate(table_paths):
df = pd.read_csv(path, header=11, names=col_names)
df["y"] = yrap[i]
df["sqrts"] = 38.8
df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()]
combined_df = pd.concat([combined_df, df], ignore_index=True)

combined_df = pd.DataFrame(columns=col_names_all)
for i, path in enumerate(table_paths):
df = pd.read_csv(path, header=11, names=col_names)
df["y"]=yrap[i]
df["sqrts"]=38.8
df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()]
combined_df = pd.concat([combined_df,df],ignore_index=True)
# In the table we have sqrt(tau) not M2; compute M2=tau*s
combined_df["M2"] = (combined_df["M2"] * 38.8) ** 2

# In the table we have sqrt(tau) not M2; compute M2=tau*s
combined_df["M2"] = (combined_df["M2"]*38.8)**2
return combined_df

return combined_df

def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike):
dfN = pd.read_table(tableN)
dfp = pd.read_table(tablep)
return dfN, dfp
dfN = pd.read_table(tableN)
dfp = pd.read_table(tablep)
return dfN, dfp


@dataclass
class E605_commondata(commondata):
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):

# Kinematic quantities.
self.central_values = data["dsig"].astype(float).to_numpy()
self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy()
self.kinematic_quantities = ["y", "M2", "sqrts"]
# Kinematic quantities.
self.central_values = data["dsig"].astype(float).to_numpy()
self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy()
self.kinematic_quantities = ["y", "M2", "sqrts"]

# Statistical uncertainties.
self.statistical_uncertainties = data["statp"]
# Statistical uncertainties.
self.statistical_uncertainties = data["statp"]

# the overall 10% statistical uncertainty is treated as
# additive, while normalisation uncertainty is always treated
# multiplicatively
syst = pd.DataFrame(0.1 * self.central_values)
# the overall 10% statistical uncertainty is treated as
# additive, while normalisation uncertainty is always treated
# multiplicatively
syst = pd.DataFrame(0.1 * self.central_values)

# Systematic uncertainties.
syst["norm"] = (self.central_values
*data["normp"].str.strip("%").astype(float)/100)
# Systematic uncertainties.
syst["norm"] = self.central_values * data["normp"].str.strip("%").astype(float) / 100

# self.systematic_uncertainties = np.dstack((stat,norm))[0]
self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")]

#self.systematic_uncertainties = np.dstack((stat,norm))[0]
self.systypes = [("ADD","UNCORR"),("MULT", "CORR")]
# Compute the point-to-point uncertainties
nrep = 999
norm = np.sqrt(nrep)
dfN, dfp = nuclear_uncert_dw(
"rawdata/nuclear/output/tables/group_result_table.csv",
"rawdata/proton_ite/output/tables/group_result_table.csv",
)

# Compute the point-to-point uncertainties
nrep=999
norm=np.sqrt(nrep)
dfN, dfp = nuclear_uncert_dw("rawdata/nuclear/output/tables/group_result_table.csv",
"rawdata/proton_ite/output/tables/group_result_table.csv")
for rep in range(1, nrep + 1):
Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm
syst[f"NUCLEAR{rep:05d}"] = Delta
self.systypes.append(("ADD", f"NUCLEAR{rep:05d}"))

for rep in range(1,nrep+1):
Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm
syst[f"NUCLEAR{rep:05d}"]=Delta
self.systypes.append(("ADD", f"NUCLEAR{rep:05d}"))
self.systematic_uncertainties = syst.to_numpy()

self.systematic_uncertainties = syst.to_numpy()
self.process = process
self.dataset_name = dataset_name

self.process = process
self.dataset_name = dataset_name

def main():
data = mergetables()
# First create the commondata variant without the nuclear uncertainties.
DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0")
DYE605.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"),
Path("kinematics_reimplemented_PXSEC.yaml"),
Path("uncertainties_reimplemented_PXSEC.yaml"))
if(covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented")):
print("covmat is close")
else:
print("covmat is different.")

if __name__ == "__main__":
main()


data = mergetables()
# First create the commondata variant without the nuclear uncertainties.
DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0")
DYE605.write_new_commondata(
Path("data_reimplemented_PXSEC.yaml"),
Path("kinematics_reimplemented_PXSEC.yaml"),
Path("uncertainties_reimplemented_PXSEC.yaml"),
)
if covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented"):
print("covmat is close")
else:
print("covmat is different.")


if __name__ == "__main__":
main()
120 changes: 69 additions & 51 deletions nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py
Original file line number Diff line number Diff line change
@@ -1,76 +1,94 @@
from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
from pathlib import Path
from dataclasses import dataclass
from os import PathLike
from pathlib import Path
import typing
from typing import List

import numpy as np
import pandas as pd
from os import PathLike
import yaml

from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close


def readdata() -> pd.DataFrame:
col_names = ["xF","Mmin","Mmax","Mavg","xFavg","pt","dsig","stat","syst","kfact","rsig","rstat","rsyst"]
table_path = Path(f"./rawdata/table.csv")
df = pd.read_csv(table_path,names=col_names)
return df
col_names = [
"xF",
"Mmin",
"Mmax",
"Mavg",
"xFavg",
"pt",
"dsig",
"stat",
"syst",
"kfact",
"rsig",
"rstat",
"rsyst",
]
table_path = Path(f"./rawdata/table.csv")
df = pd.read_csv(table_path, names=col_names)
return df


@dataclass
class E866commondata(commondata):
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):

# Definitions, compute Jacobian, get dsig/dy/dM
M = (data["Mmax"]+data["Mmin"])/2
M2=M*M
sqrts=M/M*38.8
s=sqrts**2
tau=M**2/s
tau=tau.to_numpy()
xF=data["xF"]
y=np.arcsinh(xF/np.sqrt(tau)/2)
jac=np.sqrt(xF**2+4*tau)
dsigdydM = data["dsig"]*jac
# Definitions, compute Jacobian, get dsig/dy/dM
M = (data["Mmax"] + data["Mmin"]) / 2
M2 = M * M
sqrts = M / M * 38.8
s = sqrts**2
tau = M**2 / s
tau = tau.to_numpy()
xF = data["xF"]
y = np.arcsinh(xF / np.sqrt(tau) / 2)
jac = np.sqrt(xF**2 + 4 * tau)
dsigdydM = data["dsig"] * jac

# Set the central values
self.central_values = dsigdydM.astype(float).to_numpy()

# Set the central values
self.central_values = dsigdydM.astype(float).to_numpy()
# Pick the the kinematic quantities
kin = pd.concat([y, M2, sqrts], axis=1)
kin = kin.set_axis(["y", "M2", "sqrts"], axis=1)
self.kinematics = kin.astype(float).to_numpy()
self.kinematic_quantities = ["y", "M2", "sqrts"]

# Pick the the kinematic quantities
kin=pd.concat([y,M2,sqrts],axis=1)
kin=kin.set_axis(["y","M2","sqrts"],axis=1)
self.kinematics = kin.astype(float).to_numpy()
self.kinematic_quantities = ["y", "M2", "sqrts"]
# Statistical uncertainties.
self.statistical_uncertainties = data["stat"] * jac

# Statistical uncertainties.
self.statistical_uncertainties = data["stat"]*jac
# Systematic uncertainty
syst = data["syst"] * jac

# Systematic uncertainty
syst = data["syst"]*jac
# Normalisation uncertainty of 6.5% from beam intensity calibration.
norm = 6.5 / 100
norm = norm * self.central_values

# Normalisation uncertainty of 6.5% from beam intensity calibration.
norm = 6.5/100
norm = norm * self.central_values
self.systematic_uncertainties = np.dstack((syst, norm))[0]
self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")]

self.systematic_uncertainties = np.dstack((syst,norm))[0]
self.systypes = [("ADD", "UNCORR"),("MULT","CORR")]
self.process = process
self.dataset_name = dataset_name

self.process = process
self.dataset_name = dataset_name

def main():
data = readdata()
# First create the commondata variant without the nuclear uncertainties.
DYE866 = E866commondata(data, "DYE866_Z0", "Z0")
DYE866.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"),
Path("kinematics_reimplemented_PXSEC.yaml"),
Path("uncertainties_reimplemented_PXSEC.yaml"))

if(covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented")):
print("covmat is close")
else:
print("covmat is different.")
if __name__ == "__main__":
main()

data = readdata()
# First create the commondata variant without the nuclear uncertainties.
DYE866 = E866commondata(data, "DYE866_Z0", "Z0")
DYE866.write_new_commondata(
Path("data_reimplemented_PXSEC.yaml"),
Path("kinematics_reimplemented_PXSEC.yaml"),
Path("uncertainties_reimplemented_PXSEC.yaml"),
)

if covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented"):
print("covmat is close")
else:
print("covmat is different.")


if __name__ == "__main__":
main()
Loading

0 comments on commit 567ec45

Please sign in to comment.