Skip to content

Commit

Permalink
Test BLR with Kfold in Parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
AuguB committed Dec 12, 2024
1 parent 78d3722 commit 0d8c8e0
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 22 deletions.
11 changes: 10 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,13 @@ tests/cli_test/*

docs/autoapi/*
docs/_build/*
docs
docs
tests/cli_test_parallel_kfold/temp/be_te_fcon1000.pkl
tests/cli_test_parallel_kfold/temp/be_tr_fcon1000.pkl
tests/cli_test_parallel_kfold/temp/fcon1000
tests/cli_test_parallel_kfold/temp/X_te_fcon1000.pkl
tests/cli_test_parallel_kfold/temp/X_tr_fcon1000.pkl
tests/cli_test_parallel_kfold/temp/X_var_te_fcon1000.pkl
tests/cli_test_parallel_kfold/temp/X_var_tr_fcon1000.pkl
tests/cli_test_parallel_kfold/temp/Y_te_fcon1000.pkl
tests/cli_test_parallel_kfold/temp/Y_tr_fcon1000.pkl
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
56 changes: 56 additions & 0 deletions tests/cli_test_parallel_kfold/inspect_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pickle
import numpy as np
import matplotlib.pyplot as plt

import glob
import os



for batch in [1,2]:
results_dir = "/project/3022000.05/projects/stijdboe/temp/parallel_processing/batch_1"


for func in ['fit', 'predict', 'estimate']:
print(f"Plotting {func} results...")
results = glob.glob(os.path.join(results_dir, f"*{func}.pkl"))
for result in results:
if "Z" in result:
z = pickle.load(open(result, "rb"))
n = np.random.randn(z.shape[0], 1)
sorted_z = np.sort(z, axis=0)
sorted_n = np.sort(n, axis=0)
plt.plot(sorted_z, sorted_n, label=f"Z_{func}")
plt.savefig(f"Z_{func}.png")
plt.close()
elif "yhat" in result:
x_path = "/project/3022000.05/projects/stijdboe/Projects/PCNtoolkit/tests/cli_test_parallel_kfold/temp/X_te_fcon1000.pkl"
x = pickle.load(open(x_path, "rb")).to_numpy()
sortindex = np.argsort(x[:,1])
print(x[sortindex, 1])
yhat = pickle.load(open(result, "rb")).to_numpy()
result = result.replace("yhat", "ys2")
s2 = pickle.load(open(result, "rb")).to_numpy()
print(x.shape)
print(yhat.shape)
print(s2.shape)

for i in range(yhat.shape[1]):
plt.plot(x[sortindex, 1], yhat[sortindex, i], label=f"Yhat_{func}_{i}")
plt.plot(x[sortindex, 1], yhat[sortindex, i] - s2[sortindex, i], label=f"Yhat_{func}_{i} - s2")
plt.plot(x[sortindex, 1], yhat[sortindex, i] + s2[sortindex, i], label=f"Yhat_{func}_{i} + s2")
plt.savefig(f"Yhat_{func}_ft{i}_batch{batch}.png")
plt.close()
elif "S2" in result:
s2 = pickle.load(open(result, "rb"))
print(f"{s2=}")
elif "EXPV" in result:
expv = pickle.load(open(result, "rb"))
print(f"{expv=}")
elif "MSLL" in result:
msll = pickle.load(open(result, "rb"))
print(f"{msll=}")
elif "SMSE" in result:
smse = pickle.load(open(result, "rb"))
print(f"{smse=}")

24 changes: 18 additions & 6 deletions tests/cli_test_parallel_kfold/split_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def main():
# Standardize the covariates and responses
cov = StandardScaler().fit_transform(cov.to_numpy()[:,np.newaxis])
resp = StandardScaler().fit_transform(resp.to_numpy())

xmin = cov.min()
xmax = cov.max()


# Map the batch effects to integers
be_ids = np.unique(be, return_inverse=True)[1]
Expand All @@ -44,16 +48,24 @@ def main():
# Create the design matrices
mean_basis = 'linear'
var_basis = 'linear'
Phi_tr = create_design_matrix(cov[train_idx], basis=mean_basis, intercept=False, site_ids=be_ids[train_idx])
Phi_var_tr = create_design_matrix(cov[train_idx], basis=var_basis)
Phi_te = create_design_matrix(cov[test_idx], basis=mean_basis, intercept=False, site_ids=be_ids[test_idx])
Phi_var_te = create_design_matrix(cov[test_idx], basis=var_basis)
# Phi_tr = create_design_matrix(cov[train_idx], basis=mean_basis, intercept=False, site_ids=be_ids[train_idx])
# Phi_var_tr = create_design_matrix(cov[train_idx], basis=var_basis)
# Phi_te = create_design_matrix(cov[test_idx], basis=mean_basis, intercept=False, site_ids=be_ids[test_idx])
# Phi_var_te = create_design_matrix(cov[test_idx], basis=var_basis)

Phi_tr = create_design_matrix(cov[train_idx], basis=mean_basis, intercept=True, xmin=xmin, xmax=xmax)
# Phi_var_tr = create_design_matrix(cov[train_idx], basis=var_basis, xmin=xmin, xmax=xmax)
Phi_var_tr = cov[train_idx]
Phi_te = create_design_matrix(cov[test_idx], basis=mean_basis, intercept=True, xmin=xmin, xmax=xmax)
# Phi_var_te = create_design_matrix(cov[test_idx], basis=var_basis, xmin=xmin, xmax=xmax)
Phi_var_te = cov[test_idx]
print(f"{Phi_var_te.shape=}")

# Save everything
pd.to_pickle(pd.DataFrame(Phi_tr), os.path.join(args.output_dir, f'X_tr_{infile}.pkl'))
pd.to_pickle(pd.DataFrame(Phi_var_tr), os.path.join(args.output_dir, f'X_var_tr_{infile}.pkl'))
pd.to_pickle(Phi_var_tr, os.path.join(args.output_dir, f'X_var_tr_{infile}.pkl'))
pd.to_pickle(pd.DataFrame(Phi_te), os.path.join(args.output_dir, f'X_te_{infile}.pkl'))
pd.to_pickle(pd.DataFrame(Phi_var_te), os.path.join(args.output_dir, f'X_var_te_{infile}.pkl'))
pd.to_pickle(Phi_var_te, os.path.join(args.output_dir, f'X_var_te_{infile}.pkl'))
pd.to_pickle(pd.DataFrame(resp[train_idx]), os.path.join(args.output_dir, f'Y_tr_{infile}.pkl'))
pd.to_pickle(pd.DataFrame(resp[test_idx]), os.path.join(args.output_dir, f'Y_te_{infile}.pkl'))
pd.to_pickle(be[train_idx], os.path.join(args.output_dir, f'be_tr_{infile}.pkl'))
Expand Down
28 changes: 19 additions & 9 deletions tests/cli_test_parallel_kfold/submit_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,45 @@


def execute_nm_wrapper(*args):
args_dict = {k:v for k,v in [arg.split('=') for arg in args]}
args_dict = {k:v for k,v in [arg.split('=') for arg in args[0]]}

func = args_dict.get('func')
covfile_path = args_dict.get('covfile_path',None)
respfile_path = args_dict.get('respfile_path',None)
varcovfile_path = args_dict.get('varcovfile_path',None)
testcovfile_path = args_dict.get('testcovfile_path',None)
testrespfile_path = args_dict.get('testrespfile_path',None)
testrespfile_path = args_dict.get('testrespfile_path',None)
testvarcovfile_path = args_dict.get('testvarcovfile_path',None)
if func == "estimate":
testrespfile_path = None

execute_nm(
python_path='/home/preclineu/stijdboe/.conda/envs/pcntk_dev/bin/python',
normative_path="/home/preclineu/stijdboe/.conda/envs/pcntk_dev/lib/python3.12/site-packages/pcntoolkit/normative.py",
job_name='test_normative_parallel',
processing_dir='/project/3022000.05/projects/stijdboe/temp/parallel_processing',
processing_dir='/project/3022000.05/projects/stijdboe/temp/parallel_processing/',
log_path='/project/3022000.05/projects/stijdboe/temp/parallel_processing/log/',
varcovfile=varcovfile_path,
testvarcovfile=testvarcovfile_path,
func=func,
covfile_path=covfile_path,
respfile_path=respfile_path,
testcovfile_path=testcovfile_path,
testrespfile_path=testrespfile_path,
batch_size=2,
memory='16G',
duration='00:10:00',
memory='4G',
duration='00:02:00',
job_id=1,
cv_folds = 5,
alg='hbr',
alg='blr',
warp='WarpSinArcsinh',
optimizer='l-bfgs-b',
warp_reparam=True,
inscaler='standardize',
binary=False
warp_reparam='True',
binary='True',
cluster_spec='slurm',
saveoutput='True',
savemodel='True',
outputsuffix=f"_{func}"
)


Expand Down
12 changes: 6 additions & 6 deletions tests/cli_test_parallel_kfold/test_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ curl -o $tempdir/$data_name https://raw.githubusercontent.com/predictive-clinica
echo "Splitting the data into train and test covariates, responses and batch effects..."
python split_data.py --input_file $tempdir/$data_name --output_dir $tempdir

echo "Fitting the model..."
python submit_jobs.py func=fit covfile_path=$tempdir/X_tr_$data_name.pkl respfile_path=$tempdir/Y_tr_$data_name.pkl
# echo "Fitting the model..."
# python submit_jobs.py func=fit covfile_path=$tempdir/X_tr_$data_name.pkl respfile_path=$tempdir/Y_tr_$data_name.pkl

echo "Predicting the test set..."
python submit_jobs.py func=predict covfile_path=$tempdir/X_te_$data_name.pkl respfile_path=$tempdir/Y_te_$data_name.pkl
# echo "Predicting the test set..."
# python submit_jobs.py func=predict covfile_path=$tempdir/X_te_$data_name.pkl respfile_path=$tempdir/Y_te_$data_name.pkl

echo "Also doing estimate..."
python submit_jobs.py func=estimate covfile_path=$tempdir/X_tr_$data_name.pkl respfile_path=$tempdir/Y_tr_$data_name.pkl testcovfile_path=$tempdir/X_te_$data_name.pkl testrespfile_path=$tempdir/Y_te_$data_name.pkl
python submit_jobs.py func=estimate covfile_path=$tempdir/X_tr_$data_name.pkl respfile_path=$tempdir/Y_tr_$data_name.pkl testcovfile_path=$tempdir/X_te_$data_name.pkl testrespfile_path=$tempdir/Y_te_$data_name.pkl testvarcovfile_path=$tempdir/X_var_te_$data_name.pkl varcovfile_path=$tempdir/X_var_tr_$data_name.pkl

echo "Done!"
rm -R $tempdir
# rm -R $tempdirls

0 comments on commit 0d8c8e0

Please sign in to comment.