Skip to content

Commit

Permalink
Merge pull request #111 from amarquand/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
amarquand authored Mar 6, 2023
2 parents 36b7ce9 + 4c44b02 commit 84e47ec
Show file tree
Hide file tree
Showing 13 changed files with 227 additions and 45 deletions.
11 changes: 6 additions & 5 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ version 0.16
Minor bug fix relative to release 0.15. Fixed a transpose operation that would cause normative.py to fail in certain cases

version 0.17:
Changes:
- New syntax for functions related to parallelization (python path is automatically generated)
- Support for slurm clusters has been added
- Updates to hierarchical Bayesian regression (different priors for intercepts and noise) for improved transfer
Expand All @@ -15,20 +14,17 @@ Changes:
- Updated sphinx documentation

version 0.18
Changes:
- addition of cross-validation functionality for HBR
- minor bug fixes for HBR

version 0.19
Changes:
- separate standardization of responses and covariates
- standardization is no longer on by default
- new standardization methods
- pymc3 version fixed
- minor bugs resolved (CV with HBR)

version 0.20
Changes:
- Major code refactoring
- Requirements updated for python 3.8.3
- Updates to documentation and Integrations with Read the Docs
Expand All @@ -54,7 +50,6 @@ Some minor updates and bug fixes:
- other minor bug fixes related to cross-validation (computation of error metrics), import problems and calibration statistics

version 0.23
Changes:
- SHASH functionality for HBR added
- Bug fix for normative model predict() function

Expand All @@ -72,3 +67,9 @@ version 0.26
- Added support for web portal
- Provided a wrapper for blr to use transfer() functionality
- Also streamlined tutorials (PCNtoolkit-demo), so that all tutorials run with this version

version 0.27
- Configured more sensible default options for HBR (random slope and intercept for mu and random intercept for sigma)
- Fixed a translation problem between the previous naming convention for HBR models (only Gaussian models) and the current naming (also SHASH models)
- Minor updates to fix synchronisation problems in PCNportal (related to the HBR updates above)
- Added configuration files for containerisation with Docker
39 changes: 39 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Installs software and cleans up unnecessary memory: the smaller, the less downtime
FROM continuumio/miniconda3:latest
RUN apt-get update
RUN apt-get install -y libhdf5-dev \
pkg-config \
gcc \
g++ \
zip

# Make sure we have the right version of numpy
RUN conda install numpy=1.21

# Install directly from GitHub (master branch)
#RUN pip install scikit-learn
#RUN pip install pcntoolkit==0.26

# This is an alternative method that pulls from the dev branch
RUN wget https://github.com/amarquand/PCNtoolkit/archive/dev.zip
RUN unzip dev.zip
RUN pip install scikit-learn
RUN cd PCNtoolkit-dev; pip install . ; cd ..

# Add command line links
RUN ln -s /opt/conda/lib/python3.10/site-packages/pcntoolkit /opt/ptk
RUN chmod 755 /opt/ptk/normative.py
RUN chmod 755 /opt/ptk/normative_parallel.py
RUN chmod 755 /opt/ptk/trendsurf.py
RUN echo "export PATH=${PATH}:/opt/ptk" >> ~/.bashrc

# clean up
RUN rm -rf PCNtoolkit-dev dev.zip
RUN conda clean -a
RUN pip cache purge
RUN apt-get clean

# execute entrypoint
COPY entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh
ENTRYPOINT [ "./entrypoint.sh" ]
8 changes: 8 additions & 0 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

# take the pcntoolkit function from the first argument specified
func="$1"
shift

# run using all remaining arguments
/opt/ptk/${func} "$@"
1 change: 0 additions & 1 deletion pcntoolkit/model/NPR.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,3 @@ def np_loss(y_hat, y_hat_84, y, z_all, z_context):
0.16 * F.binary_cross_entropy(torch.squeeze(y_hat_84[idx2,:]), torch.mean(y[idx2,:],dim=1), reduction="sum")
KLD = kl_div_gaussians(z_all[0], z_all[1], z_context[0], z_context[1])
return BCE + KLD + BCE84

11 changes: 8 additions & 3 deletions pcntoolkit/model/hbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,12 @@ def hbr(X, y, batch_effects, batch_effects_size, configs, trace=None):
pb = ParamBuilder(model, X, y, batch_effects, trace, configs)

if configs['likelihood'] == 'Normal':
mu = pb.make_param("mu").get_samples(pb)
sigma = pb.make_param("sigma").get_samples(pb)
mu = pb.make_param("mu", mu_slope_mu_params = (0.,10.),
sigma_slope_mu_params = (5.,),
mu_intercept_mu_params=(0.,10.),
sigma_intercept_mu_params = (5.,)).get_samples(pb)
sigma = pb.make_param("sigma", mu_sigma_params = (10., 5.),
sigma_sigma_params = (5.,)).get_samples(pb)
sigma_plus = pm.math.log(1+pm.math.exp(sigma))
y_like = pm.Normal('y_like',mu=mu, sigma=sigma_plus, observed=y)

Expand Down Expand Up @@ -530,7 +534,8 @@ def make_param(self, name, dim = (1,), **kwargs):
slope_parameterization = self.make_param(f'slope_{name}', dim=[self.feature_num], **kwargs)
intercept_parameterization = self.make_param(f'intercept_{name}', **kwargs)
return LinearParameterization(name=name, dim=dim,
slope_parameterization=slope_parameterization, intercept_parameterization=intercept_parameterization,
slope_parameterization=slope_parameterization,
intercept_parameterization=intercept_parameterization,
pb=self,
**kwargs)

Expand Down
18 changes: 12 additions & 6 deletions pcntoolkit/normative.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/Users/andre/sfw/anaconda3/bin/python
#!/opt/conda/bin/python

# ------------------------------------------------------------------------------
# Usage:
Expand Down Expand Up @@ -1032,11 +1032,7 @@ def transfer(covfile, respfile, testcov=None, testresp=None, maskfile=None,
Yhat[:, i] = yhat.squeeze()
S2[:, i] = s2.squeeze()

# Creates a file for every job succesfully completed (for tracking failed jobs).
if count_jobsdone==True:
done_path = os.path.join(log_path, str(job_id)+".jobsdone")
Path(done_path).touch()



if testresp is None:
save_results(respfile, Yhat, S2, maskvol, outputsuffix=outputsuffix)
Expand Down Expand Up @@ -1069,7 +1065,17 @@ def transfer(covfile, respfile, testcov=None, testresp=None, maskfile=None,
save_results(respfile, Yhat, S2, maskvol, Z=Z, results=results,
outputsuffix=outputsuffix)

# Creates a file for every job succesfully completed (for tracking failed jobs).
if count_jobsdone==True:
done_path = os.path.join(log_path, str(job_id)+".jobsdone")
Path(done_path).touch()

return (Yhat, S2, Z)

# Creates a file for every job succesfully completed (for tracking failed jobs).
if count_jobsdone==True:
done_path = os.path.join(log_path, str(job_id)+".jobsdone")
Path(done_path).touch()


def extend(covfile, respfile, maskfile=None, **kwargs):
Expand Down
112 changes: 104 additions & 8 deletions pcntoolkit/normative_model/norm_hbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,28 +34,115 @@


class NormHBR(NormBase):
""" Classical GPR-based normative modelling approach

""" HBR multi-batch normative modelling class. By default, this function
estimates a linear model with random intercept, random slope, and random
homoscedastic noise.
:param X: [N×P] array of clinical covariates
:param y: [N×1] array of neuroimaging measures
:param trbefile: the address to the batch effects file for the training set.
the batch effect array should be a [N×M] array where M is the number of
the type of batch effects. For example when the site and gender is modeled
as batch effects M=2. Each column in the batch effect array contains the
batch ID (starting from 0) for each sample. If not specified (default=None)
then all samples assumed to be from the same batch (i.e., the batch effect
is not modelled).
:param tsbefile: Similar to trbefile for the test set.
:param model_type: Specifies the type of the model from 'linear', 'plynomial',
and 'bspline' (defauls is 'linear').
:param likelihood: specifies the type of likelihood among 'Normal' 'SHASHb','SHASHo',
and 'SHASHo2' (defauls is normal).
:param linear_mu: Boolean (default='True') to decide whether the mean (mu) is
parametrized on a linear function (thus changes with covariates) or it is fixed.
:param linear_sigma: Boolean (default='False') to decide whether the variance (sigma) is
parametrized on a linear function (heteroscedastic noise) or it is fixed for
each batch (homoscedastic noise).
:param linear_epsilon: Boolean (default='False') to decide the parametrization
of epsilon for the SHASH likelihood that controls its skewness.
If True, epsilon is parametrized on a linear function
(thus changes with covariates) otherwise it is fixed for each batch.
:param linear_delta: Boolean (default='False') to decide the parametrization
of delta for the SHASH likelihood that controls its kurtosis.
If True, delta is parametrized on a linear function
(thus changes with covariates) otherwise it is fixed for each batch.
:param random_intercept_{parameter}: if parameters mu (default='True'),
sigma (default='False'), epsilon (default='False'), and delta (default='False')
are parametrized on a linear function, then this boolean decides
whether the intercept can vary across batches.
:param random_slope_{parameter}: if parameters mu (default='True'),
sigma (default='False'), epsilon (default='False'), and delta (default='False')
are parametrized on a linear function, then this boolean decides
whether the slope can vary across batches.
:param centered_intercept_{parameter}: if parameters mu (default='False'),
sigma (default='False'), epsilon (default='False'), and delta (default='False')
are parametrized on a linear function, then this boolean decides
whether the parameters of intercept are estimated in a centered or
non-centered manner (default). While centered estimation runs faster
it may cause some problems for the sampler (the funnel of hell).
:param centered_slope_{parameter}: if parameters mu (default='False'),
sigma (default='False'), epsilon (default='False'), and delta (default='False')
are parametrized on a linear function, then this boolean decides
whether the parameters of slope are estimated in a centered or
non-centered manner (default). While centered estimation runs faster
it may cause some problems for the sampler (the funnel of hell).
:param sampler: specifies the type of PyMC sampler (Defauls is 'NUTS').
:param n_samples: The number of samples to draw (Default is '1000'). Please
note that this parameter must be specified in a string fromat ('1000' and
not 1000).
:param n_tuning: String that specifies the number of iterations to adjust
the samplers's step sizes, scalings or similar (defauls is '500').
:param n_chains: String that specifies the number of chains to sample. Defauls
is '1' for faster estimation, but note that sampling independent chains
is important for some convergence checks.
:param cores: String that specifies the number of chains to run in parallel.
(defauls is '1').
:param Initialization method to use for auto-assigned NUTS samplers. The
defauls is 'jitter+adapt_diag' that starts with a identity mass matrix
and then adapt a diagonal based on the variance of the tuning samples
while adding a uniform jitter in [-1, 1] to the starting point in each chain.
:param target_accept: String that of a float in [0, 1] that regulates the
step size such that we approximate this acceptance rate. The defauls is '0.8'
but higher values like 0.9 or 0.95 often work better for problematic posteriors.
:param order: String that defines the order of bspline or polynomial model.
The defauls is '3'.
:param nknots: String that defines the numbers of knots for the bspline model.
The defauls is '5'. Higher values increase the model complexity with negative
effect on the spped of estimations.
:param nn_hidden_layers_num: String the specifies the number of hidden layers
in neural network model. It can be either '1' or '2'. The default is set to '2'.
:param nn_hidden_neuron_num: String that specifies the number of neurons in
the hidden layers. The defauls is set to '2'.
Written by S.de Boer and S.M. Kia
"""

def __init__(self, **kwargs):

self.configs = dict()
self.configs['transferred'] = False
# inputs
self.configs['trbefile'] = kwargs.pop('trbefile', None)
self.configs['tsbefile'] = kwargs.pop('tsbefile', None)
# Model settings
self.configs['type'] = kwargs.pop('model_type', 'linear')
self.configs['skewed_likelihood'] = kwargs.pop('skewed_likelihood', 'False') == 'True'
self.configs['pred_type'] = kwargs.pop('pred_type', 'single')
self.configs['random_noise'] = kwargs.pop('random_noise', 'True') == 'True'
self.configs['likelihood'] = kwargs.pop('likelihood', 'Normal')
# sampler settings
self.configs['n_samples'] = int(kwargs.pop('n_samples', '1000'))
self.configs['n_tuning'] = int(kwargs.pop('n_tuning', '500'))
self.configs['n_chains'] = int(kwargs.pop('n_chains', '1'))
self.configs['likelihood'] = kwargs.pop('likelihood', 'Normal')
self.configs['sampler'] = kwargs.pop('sampler', 'NUTS')
self.configs['target_accept'] = float(kwargs.pop('target_accept', '0.8'))
self.configs['init'] = kwargs.pop('init', 'jitter+adapt_diag')
self.configs['cores'] = int(kwargs.pop('cores', '1'))
# model transfer setting
self.configs['freedom'] = int(kwargs.pop('freedom', '1'))
self.configs['transferred'] = False
# deprecated settings
self.configs['skewed_likelihood'] = kwargs.pop('skewed_likelihood', 'False') == 'True'
# misc
self.configs['pred_type'] = kwargs.pop('pred_type', 'single')

if self.configs['type'] == 'bspline':
self.configs['order'] = int(kwargs.pop('order', '3'))
Expand Down Expand Up @@ -95,16 +182,25 @@ def __init__(self, **kwargs):
if self.configs['linear_sigma']:
if 'random_noise' in kwargs.keys():
print("The keyword \'random_noise\' is deprecated. It is now automatically replaced with \'random_intercept_sigma\', because sigma is linear")
self.configs['random_intercept_sigma'] = kwargs.pop('random_noise','False') == 'True'
self.configs['random_intercept_sigma'] = kwargs.pop('random_noise','True') == 'True'
elif 'random_noise' in kwargs.keys():
print("The keyword \'random_noise\' is deprecated. It is now automatically replaced with \'random_sigma\', because sigma is fixed")
self.configs['random_sigma'] = kwargs.pop('random_noise','False') == 'True'
self.configs['random_sigma'] = kwargs.pop('random_noise','True') == 'True'
if 'random_slope' in kwargs.keys():
print("The keyword \'random_slope\' is deprecated. It is now automatically replaced with \'random_intercept_mu\'")
self.configs['random_intercept_mu'] = kwargs.pop('random_slope','False') == 'True'
self.configs['random_slope_mu'] = kwargs.pop('random_slope','True') == 'True'
##### End Deprecations


## Default parameters
self.configs['linear_mu'] = kwargs.pop('linear_mu','True') == 'True'
self.configs['random_mu'] = kwargs.pop('random_mu','True') == 'True'
self.configs['random_intercept_mu'] = kwargs.pop('random_intercept_mu','True') == 'True'
self.configs['random_slope_mu'] = kwargs.pop('random_slope_mu','True') == 'True'
self.configs['random_sigma'] = kwargs.pop('random_sigma','True') == 'True'
self.configs['centered_sigma'] = kwargs.pop('centered_sigma','True') == 'True'
## End default parameters

self.hbr = HBR(self.configs)

@property
Expand Down
2 changes: 1 addition & 1 deletion pcntoolkit/normative_model/norm_np.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -226,4 +226,4 @@ def predict(self, Xs, X=None, Y=None, theta=None):
y_sigma_84 = y_sigma_84.cpu().numpy() * (self.scaler.data_max_ - self.scaler.data_min_)
sigma_al = y_hat - y_hat_84
return y_hat.squeeze(), (y_sigma**2 + sigma_al**2).squeeze() #, z_context[0].cpu().numpy(), z_context[1].cpu().numpy()

17 changes: 13 additions & 4 deletions pcntoolkit/normative_parallel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/.../anaconda/bin/python/
#!/opt/conda/bin/python

# -----------------------------------------------------------------------------
# Run parallel normative modelling.
Expand Down Expand Up @@ -88,8 +88,17 @@ def execute_nm(processing_dir,
:param testrespfile_path: Full path to a .txt file that contains all test features
:param log_path: Path for saving log files
:param binary: If True uses binary format for response file otherwise it is text
written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
:param interactive: If False (default) the user should manually
rerun the failed jobs or collect the results.
If 'auto' the job status are checked until all
jobs are completed then the failed jobs are rerun
and the results are automaticallu collectted.
Using 'query' is similar to 'auto' unless it
asks for user verification thius is immune to
endless loop in the case of bugs in the code.
written by (primarily) T Wolfers, (adapted) SM Kia
The documentation is adapated by S Rutherford.
'''

if normative_path is None:
Expand Down Expand Up @@ -899,7 +908,7 @@ def bashwrap_nm(processing_dir,

# add in optional arguments.
for k in kwargs:
job_call = [job_call[0] + ' ' + k + '=' + kwargs[k]]
job_call = [job_call[0] + ' ' + k + '=' + str(kwargs[k])]

# writes bash file into processing dir
with open(processing_dir+job_name, 'w') as bash_file:
Expand Down
2 changes: 1 addition & 1 deletion pcntoolkit/trendsurf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/Users/andre/sfw/anaconda3/bin/python
#!/opt/conda/bin/python

# ------------------------------------------------------------------------------
# Usage:
Expand Down
Loading

0 comments on commit 84e47ec

Please sign in to comment.