Run pre-commit hooks

NNPDF · Jan 10, 2025 · 567ec45 · 567ec45
1 parent 997fe86
commit 567ec45
Show file tree

Hide file tree

Showing 4 changed files with 299 additions and 259 deletions.
diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py
@@ -1,99 +1,104 @@
-from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
-from pathlib import Path
 from dataclasses import dataclass
+from os import PathLike
+from pathlib import Path
 import typing
 from typing import List
+
 import numpy as np
 import pandas as pd
-from os import PathLike
 import yaml
 
+from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
+
+
 def mergetables() -> pd.DataFrame:
 
-   table_paths = []
-   for i in range(1,8):
-      table_paths.append(Path(f"./rawdata/Table{i}.csv"))
+    table_paths = []
+    for i in range(1, 8):
+        table_paths.append(Path(f"./rawdata/Table{i}.csv"))
+
+    # List with the rapidity bins for tables 1 to 7.
+    yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4]
 
-   # List with the rapidity bins for tables 1 to 7.
-   yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4]  
+    col_names = ["M2", "dsig", "statp", "statm", "normp", "normm", "sysp", "sysm"]
+    col_names_all = col_names + ["y", "sqrts"]
 
-   col_names = ["M2","dsig","statp","statm","normp","normm","sysp","sysm"]
-   col_names_all = col_names + ["y", "sqrts"]
+    combined_df = pd.DataFrame(columns=col_names_all)
+    for i, path in enumerate(table_paths):
+        df = pd.read_csv(path, header=11, names=col_names)
+        df["y"] = yrap[i]
+        df["sqrts"] = 38.8
+        df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()]
+        combined_df = pd.concat([combined_df, df], ignore_index=True)
 
-   combined_df = pd.DataFrame(columns=col_names_all)
-   for i, path in enumerate(table_paths):
-      df = pd.read_csv(path, header=11, names=col_names)
-      df["y"]=yrap[i]
-      df["sqrts"]=38.8
-      df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()]
-      combined_df = pd.concat([combined_df,df],ignore_index=True)
+    # In the table we have sqrt(tau) not M2; compute M2=tau*s
+    combined_df["M2"] = (combined_df["M2"] * 38.8) ** 2
 
-   # In the table we have sqrt(tau) not M2; compute M2=tau*s
-   combined_df["M2"] = (combined_df["M2"]*38.8)**2
+    return combined_df
 
-   return combined_df
 
 def nuclear_uncert_dw(tableN: PathLike, tablep: PathLike):
-   dfN = pd.read_table(tableN)
-   dfp = pd.read_table(tablep)
-   return dfN, dfp
+    dfN = pd.read_table(tableN)
+    dfp = pd.read_table(tablep)
+    return dfN, dfp
+
 
 @dataclass
 class E605_commondata(commondata):
-   def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
+    def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
 
-      # Kinematic quantities.
-      self.central_values = data["dsig"].astype(float).to_numpy()
-      self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy()
-      self.kinematic_quantities = ["y", "M2", "sqrts"]
+        # Kinematic quantities.
+        self.central_values = data["dsig"].astype(float).to_numpy()
+        self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy()
+        self.kinematic_quantities = ["y", "M2", "sqrts"]
 
-      # Statistical uncertainties.
-      self.statistical_uncertainties = data["statp"]
+        # Statistical uncertainties.
+        self.statistical_uncertainties = data["statp"]
 
-      # the overall 10% statistical uncertainty is treated as
-      # additive, while normalisation uncertainty is always treated
-      # multiplicatively
-      syst = pd.DataFrame(0.1 * self.central_values)
+        # the overall 10% statistical uncertainty is treated as
+        # additive, while normalisation uncertainty is always treated
+        # multiplicatively
+        syst = pd.DataFrame(0.1 * self.central_values)
 
-      # Systematic uncertainties.
-      syst["norm"] = (self.central_values
-               *data["normp"].str.strip("%").astype(float)/100)
+        # Systematic uncertainties.
+        syst["norm"] = self.central_values * data["normp"].str.strip("%").astype(float) / 100
 
+        # self.systematic_uncertainties = np.dstack((stat,norm))[0]
+        self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")]
 
-      #self.systematic_uncertainties = np.dstack((stat,norm))[0]
-      self.systypes = [("ADD","UNCORR"),("MULT", "CORR")]
+        # Compute the point-to-point uncertainties
+        nrep = 999
+        norm = np.sqrt(nrep)
+        dfN, dfp = nuclear_uncert_dw(
+            "rawdata/nuclear/output/tables/group_result_table.csv",
+            "rawdata/proton_ite/output/tables/group_result_table.csv",
+        )
 
-      # Compute the point-to-point uncertainties
-      nrep=999
-      norm=np.sqrt(nrep)
-      dfN, dfp = nuclear_uncert_dw("rawdata/nuclear/output/tables/group_result_table.csv",
-                                   "rawdata/proton_ite/output/tables/group_result_table.csv")
+        for rep in range(1, nrep + 1):
+            Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm
+            syst[f"NUCLEAR{rep:05d}"] = Delta
+            self.systypes.append(("ADD", f"NUCLEAR{rep:05d}"))
 
-      for rep in range(1,nrep+1):
-         Delta = (dfN[f"rep_{rep:05d}"]-dfp["theory_central"])/norm
-         syst[f"NUCLEAR{rep:05d}"]=Delta
-         self.systypes.append(("ADD", f"NUCLEAR{rep:05d}"))
+        self.systematic_uncertainties = syst.to_numpy()
 
-      self.systematic_uncertainties = syst.to_numpy()
+        self.process = process
+        self.dataset_name = dataset_name
 
-      self.process = process
-      self.dataset_name = dataset_name
 
 def main():
-   data = mergetables()
-   # First create the commondata variant without the nuclear uncertainties.
-   DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0")
-   DYE605.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"),
-                                Path("kinematics_reimplemented_PXSEC.yaml"),
-                                Path("uncertainties_reimplemented_PXSEC.yaml"))
-   if(covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented")):
-      print("covmat is close")
-   else:
-      print("covmat is different.")
-
-if __name__ == "__main__":
-   main()
-
-
+    data = mergetables()
+    # First create the commondata variant without the nuclear uncertainties.
+    DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0")
+    DYE605.write_new_commondata(
+        Path("data_reimplemented_PXSEC.yaml"),
+        Path("kinematics_reimplemented_PXSEC.yaml"),
+        Path("uncertainties_reimplemented_PXSEC.yaml"),
+    )
+    if covmat_is_close("DYE605_Z0_38P8GEV_DW_PXSEC", "legacy", "reimplemented"):
+        print("covmat is close")
+    else:
+        print("covmat is different.")
 
 
+if __name__ == "__main__":
+    main()
diff --git a/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE866_Z0_800GEV/filter.py
@@ -1,76 +1,94 @@
-from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
-from pathlib import Path
 from dataclasses import dataclass
+from os import PathLike
+from pathlib import Path
 import typing
 from typing import List
+
 import numpy as np
 import pandas as pd
-from os import PathLike
 import yaml
 
+from nnpdf_data.filter_utils.hera_utils import commondata, covmat_is_close
+
+
 def readdata() -> pd.DataFrame:
-   col_names = ["xF","Mmin","Mmax","Mavg","xFavg","pt","dsig","stat","syst","kfact","rsig","rstat","rsyst"]
-   table_path = Path(f"./rawdata/table.csv")
-   df = pd.read_csv(table_path,names=col_names)
-   return df
+    col_names = [
+        "xF",
+        "Mmin",
+        "Mmax",
+        "Mavg",
+        "xFavg",
+        "pt",
+        "dsig",
+        "stat",
+        "syst",
+        "kfact",
+        "rsig",
+        "rstat",
+        "rsyst",
+    ]
+    table_path = Path(f"./rawdata/table.csv")
+    df = pd.read_csv(table_path, names=col_names)
+    return df
+
 
 @dataclass
 class E866commondata(commondata):
-   def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
+    def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
 
-      # Definitions, compute Jacobian, get dsig/dy/dM
-      M = (data["Mmax"]+data["Mmin"])/2
-      M2=M*M
-      sqrts=M/M*38.8 
-      s=sqrts**2
-      tau=M**2/s
-      tau=tau.to_numpy()
-      xF=data["xF"]
-      y=np.arcsinh(xF/np.sqrt(tau)/2)
-      jac=np.sqrt(xF**2+4*tau)
-      dsigdydM = data["dsig"]*jac
+        # Definitions, compute Jacobian, get dsig/dy/dM
+        M = (data["Mmax"] + data["Mmin"]) / 2
+        M2 = M * M
+        sqrts = M / M * 38.8
+        s = sqrts**2
+        tau = M**2 / s
+        tau = tau.to_numpy()
+        xF = data["xF"]
+        y = np.arcsinh(xF / np.sqrt(tau) / 2)
+        jac = np.sqrt(xF**2 + 4 * tau)
+        dsigdydM = data["dsig"] * jac
 
+        # Set the central values
+        self.central_values = dsigdydM.astype(float).to_numpy()
 
-      # Set the central values
-      self.central_values = dsigdydM.astype(float).to_numpy() 
+        # Pick the the kinematic quantities
+        kin = pd.concat([y, M2, sqrts], axis=1)
+        kin = kin.set_axis(["y", "M2", "sqrts"], axis=1)
+        self.kinematics = kin.astype(float).to_numpy()
+        self.kinematic_quantities = ["y", "M2", "sqrts"]
 
-      # Pick the the kinematic quantities
-      kin=pd.concat([y,M2,sqrts],axis=1)
-      kin=kin.set_axis(["y","M2","sqrts"],axis=1)
-      self.kinematics = kin.astype(float).to_numpy()
-      self.kinematic_quantities = ["y", "M2", "sqrts"]
+        # Statistical uncertainties.
+        self.statistical_uncertainties = data["stat"] * jac
 
-      # Statistical uncertainties.
-      self.statistical_uncertainties = data["stat"]*jac
+        # Systematic uncertainty
+        syst = data["syst"] * jac
 
-      # Systematic uncertainty
-      syst = data["syst"]*jac
+        # Normalisation uncertainty of 6.5% from beam intensity calibration.
+        norm = 6.5 / 100
+        norm = norm * self.central_values
 
-      # Normalisation uncertainty of 6.5% from beam intensity calibration.
-      norm = 6.5/100
-      norm = norm * self.central_values
+        self.systematic_uncertainties = np.dstack((syst, norm))[0]
+        self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")]
 
-      self.systematic_uncertainties = np.dstack((syst,norm))[0]
-      self.systypes = [("ADD", "UNCORR"),("MULT","CORR")]
+        self.process = process
+        self.dataset_name = dataset_name
 
-      self.process = process
-      self.dataset_name = dataset_name
 
 def main():
-   data = readdata()
-   # First create the commondata variant without the nuclear uncertainties.
-   DYE866 = E866commondata(data, "DYE866_Z0", "Z0")
-   DYE866.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"),
-                                Path("kinematics_reimplemented_PXSEC.yaml"),
-                                Path("uncertainties_reimplemented_PXSEC.yaml"))
-
-   if(covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented")):
-      print("covmat is close")
-   else:
-      print("covmat is different.")
-if __name__ == "__main__":
-   main()
-
+    data = readdata()
+    # First create the commondata variant without the nuclear uncertainties.
+    DYE866 = E866commondata(data, "DYE866_Z0", "Z0")
+    DYE866.write_new_commondata(
+        Path("data_reimplemented_PXSEC.yaml"),
+        Path("kinematics_reimplemented_PXSEC.yaml"),
+        Path("uncertainties_reimplemented_PXSEC.yaml"),
+    )
 
+    if covmat_is_close("DYE866_Z0_800GEV_PXSEC", "legacy", "reimplemented"):
+        print("covmat is close")
+    else:
+        print("covmat is different.")
 
 
+if __name__ == "__main__":
+    main()