Skip to content

Commit

Permalink
Merge pull request #45 from amarquand/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
amarquand authored Feb 8, 2021
2 parents 433f5a6 + 168a517 commit 83c2f5f
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 63 deletions.
55 changes: 39 additions & 16 deletions pcntoolkit/bayesreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,16 @@ class BLR:
Written by A. Marquand
"""

def __init__(self, hyp=None, X=None, y=None,
n_iter=100, tol=1e-3, verbose=False,
var_groups=None, warp=None):

def __init__(self, **kwargs):
# parse arguments
n_iter = kwargs.get('n_iter', 100)
tol = kwargs.get('tol', 1e-3)
verbose = kwargs.get('verbose', False)
var_groups = kwargs.get('var_groups', None)
warp = kwargs.get('warp', None)
warp_reparam = kwargs.get('warp_reparam', False)

# basic parameters
self.hyp = np.nan
self.nlZ = np.nan
self.tol = tol # not used at present
Expand All @@ -61,31 +67,28 @@ def __init__(self, hyp=None, X=None, y=None,
self.var_ids = sorted(list(self.var_ids))

# set up warped likelihood
if verbose:
print('warp:', warp, 'warp_reparam:', warp_reparam)
if warp is None:
self.warp = None
self.n_warp_param = 0
else:
self.warp = warp
self.n_warp_param = warp.get_n_params()
self.warp_reparam = warp_reparam

self.gamma = None

def _parse_hyps(self, hyp, X):

N = X.shape[0]

# hyperparameters
# noise precision
if self.var_groups is None:
beta = np.asarray([np.exp(hyp[0])]) # noise precision
self.Lambda_n = np.diag(np.ones(N)*beta)
self.Sigma_n = np.diag(np.ones(N)/beta)
beta = np.asarray([np.exp(hyp[0])])
else:
beta = np.exp(hyp[0:len(self.var_ids)])
beta_all = np.ones(N)
for v in range(len(self.var_ids)):
beta_all[self.var_groups == self.var_ids[v]] = beta[v]
self.Lambda_n = np.diag(beta_all)
self.Sigma_n = np.diag(1/beta_all)


# parameters for warping the likelhood function
n_lik_param = len(beta)
if self.warp is not None:
Expand All @@ -100,6 +103,22 @@ def _parse_hyps(self, hyp, X):
else:
alpha = np.exp(hyp[1:])

# reparameterise the warp (WarpSinArcsinh only)
if self.warp is not None and self.warp_reparam:
delta = np.exp(gamma[1])
beta = beta/(delta**2)

# Create precision matrix from noise precision
if self.var_groups is None:
self.Lambda_n = np.diag(np.ones(N)*beta)
self.Sigma_n = np.diag(np.ones(N)/beta)
else:
beta_all = np.ones(N)
for v in range(len(self.var_ids)):
beta_all[self.var_groups == self.var_ids[v]] = beta[v]
self.Lambda_n = np.diag(beta_all)
self.Sigma_n = np.diag(1/beta_all)

return beta, alpha, gamma

def post(self, hyp, X, y):
Expand Down Expand Up @@ -298,8 +317,9 @@ def dloglik(self, hyp, X, y):
return dnlZ

# model estimation (optimization)
def estimate(self, hyp0, X, y, optimizer='cg'):
def estimate(self, hyp0, X, y, **kwargs):
""" Function to estimate the model """
optimizer = kwargs.get('optimizer','cg')

if optimizer.lower() == 'cg': # conjugate gradients
out = optimize.fmin_cg(self.loglik, hyp0, self.dloglik, (X, y),
Expand All @@ -309,6 +329,9 @@ def estimate(self, hyp0, X, y, optimizer='cg'):
elif optimizer.lower() == 'powell': # Powell's method
out = optimize.fmin_powell(self.loglik, hyp0, (X, y),
full_output=1)
elif optimizer.lower() == 'nelder-mead':
out = optimize.fmin(self.loglik, hyp0, (X, y),
full_output=1)
else:
raise ValueError("unknown optimizer")

Expand All @@ -322,7 +345,7 @@ def predict(self, hyp, X, y, Xs, var_groups_test=None):
""" Function to make predictions from the model """

if X is None or y is None:
# set hyperparameters. we can use an array of zeros because
# set dummy hyperparameters
beta, alpha, gamma = self._parse_hyps(hyp, np.zeros((self.N, 1)))
else:

Expand Down
40 changes: 23 additions & 17 deletions pcntoolkit/hbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,28 @@

def bspline_fit(X, order, nknots):

X = X.squeeze()
if len(X.shape) > 1:
raise ValueError('Bspline method only works for a single covariate.')

knots = np.linspace(X.min(), X.max(), nknots)
k = splinelab.augknt(knots, order)
bsp_basis = bspline.Bspline(k, order)
feature_num = X.shape[1]
bsp_basis = []
for i in range(feature_num):
knots = np.linspace(X[:,i].min(), X[:,i].max(), nknots)
k = splinelab.augknt(knots, order)
bsp_basis.append(bspline.Bspline(k, order))

return bsp_basis


def bspline_transform(X, bsp_basis):

X = X.squeeze()
if len(X.shape) > 1:
raise ValueError('Bspline method only works for a single covariate.')

X_transformed = np.array([bsp_basis(i) for i in X])
if type(bsp_basis)!=list:
temp = []
temp.append(bsp_basis)
bsp_basis = temp

feature_num = len(bsp_basis)
X_transformed = []
for f in range(feature_num):
X_transformed.append(np.array([bsp_basis[f](i) for i in X[:,f]]))
X_transformed = np.concatenate(X_transformed, axis=1)

return X_transformed

Expand Down Expand Up @@ -270,10 +274,11 @@ def hbr(X, y, batch_effects, batch_effects_size, configs, trace=None):
if trace is not None: # Used for transferring the priors
log_sigma_noise = from_posterior('log_sigma_noise',
trace['log_sigma_noise'],
distribution=None, freedom=configs['freedom'])
distribution='normal', freedom=configs['freedom'])

else:
log_sigma_noise = pm.Uniform('log_sigma_noise', lower=-5, upper=5, shape=(batch_effects_size))
#log_sigma_noise = pm.Uniform('log_sigma_noise', lower=-5, upper=5, shape=(batch_effects_size))
log_sigma_noise = pm.Normal('log_sigma_noise', mu=0., sigma=2.5, shape=(batch_effects_size))
sigma_y = theano.tensor.zeros(y_shape)
for be in be_idx:
a = []
Expand All @@ -287,9 +292,10 @@ def hbr(X, y, batch_effects, batch_effects_size, configs, trace=None):
if trace is not None:
log_sigma_noise = from_posterior('log_sigma_noise',
trace['log_sigma_noise'],
distribution=None, freedom=configs['freedom'])
distribution='normal', freedom=configs['freedom'])
else:
log_sigma_noise = pm.Uniform('log_sigma_noise', lower=-5, upper=5)
#log_sigma_noise = pm.Uniform('log_sigma_noise', lower=-5, upper=5)
log_sigma_noise = pm.Normal('log_sigma_noise', mu=0., sigma=2.5)

sigma_y = theano.tensor.zeros(y_shape)
for be in be_idx:
Expand Down Expand Up @@ -825,7 +831,7 @@ def generate(self, X, batch_effects, samples):
X = np.repeat(X, samples)
if len(X.shape)==1:
X = np.expand_dims(X, axis=1)
batch_effects = np.repeat(batch_effects, samples)
batch_effects = np.repeat(batch_effects, samples, axis=0)
if len(batch_effects.shape)==1:
batch_effects = np.expand_dims(batch_effects, axis=1)

Expand Down
39 changes: 27 additions & 12 deletions pcntoolkit/normative.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,15 @@ def estimate(covfile, respfile, **kwargs):
else:
run_cv = True
# we are running under cross-validation
splits = KFold(n_splits=cvfolds)
splits = KFold(n_splits=cvfolds, shuffle=True)
testids = range(0, X.shape[0])
if alg=='hbr':
trbefile = kwargs.get('trbefile', None)
if trbefile is not None:
be = fileio.load(trbefile)
else:
print('Could not find batch-effects file! Initilizing all as zeros!')
be = np.zeros([X.shape[0],1])

# find and remove bad variables from the response variables
# note: the covariates are assumed to have already been checked
Expand Down Expand Up @@ -391,14 +398,20 @@ def estimate(covfile, respfile, **kwargs):
else:
Yz = Y
Xz = X

if (run_cv==True and alg=='hbr'):
fileio.save(be[tr,:], 'be_kfold_tr_tempfile.pkl')
fileio.save(be[te,:], 'be_kfold_ts_tempfile.pkl')
kwargs['trbefile'] = 'be_kfold_tr_tempfile.pkl'
kwargs['tsbefile'] = 'be_kfold_ts_tempfile.pkl'

# estimate the models for all subjects
for i in range(0, len(nz)):
print("Estimating model ", i+1, "of", len(nz))
nm = norm_init(Xz[tr, :], Yz[tr, nz[i]], alg=alg, **kwargs)
try:
nm = nm.estimate(Xz[tr, :], Yz[tr, nz[i]], **kwargs)

try:
nm = nm.estimate(Xz[tr, :], Yz[tr, nz[i]], **kwargs)
yhat, s2 = nm.predict(Xz[te, :], Xz[tr, :], Yz[tr, nz[i]], **kwargs)

if savemodel:
Expand Down Expand Up @@ -559,7 +572,7 @@ def fit(covfile, respfile, **kwargs):
return nm


def predict(covfile, respfile=None, maskfile=None, **kwargs):
def predict(covfile, respfile, maskfile=None, **kwargs):
'''
Make predictions on the basis of a pre-estimated normative model
If only the covariates are specified then only predicted mean and variance
Expand All @@ -578,7 +591,6 @@ def predict(covfile, respfile=None, maskfile=None, **kwargs):
:param model_path: Directory containing the normative model and metadata.
When using parallel prediction, do not pass the model path. It will be automatically
decided.
:param output_path: Directory to store the results
:param outputsuffix: Text string to add to the output filenames
:param batch_size: batch size (for use with normative_parallel)
:param job_id: batch id
Expand All @@ -594,7 +606,6 @@ def predict(covfile, respfile=None, maskfile=None, **kwargs):
model_path = kwargs.pop('model_path', 'Models')
job_id = kwargs.pop('job_id', None)
batch_size = kwargs.pop('batch_size', None)
output_path = kwargs.pop('output_path', '')
outputsuffix = kwargs.pop('outputsuffix', '_predict')
inputsuffix = kwargs.pop('inputsuffix', '_estimate')
alg = kwargs.pop('alg')
Expand All @@ -614,15 +625,16 @@ def predict(covfile, respfile=None, maskfile=None, **kwargs):
sY = meta_data['std_resp']
mX = meta_data['mean_cov']
sX = meta_data['std_cov']
meta_data = True
else:
print("No meta-data file is found!")
standardize = False
meta_data = False

if batch_size is not None:
batch_size = int(batch_size)
job_id = int(job_id) - 1

if (output_path != '') and (not os.path.isdir(output_path)):
os.mkdir(output_path)


# load data
print("Loading data ...")
Expand Down Expand Up @@ -682,12 +694,15 @@ def predict(covfile, respfile=None, maskfile=None, **kwargs):
Z = (Y - Yhat) / np.sqrt(S2)

print("Evaluating the model ...")
results = evaluate(Y, Yhat, S2=S2,
if meta_data:
results = evaluate(Y, Yhat, S2=S2, mY=mY[0], sY=sY[0])
else:
results = evaluate(Y, Yhat, S2=S2,
metrics = ['Rho', 'RMSE', 'SMSE', 'EXPV'])

print("Evaluations Writing outputs ...")
save_results(respfile, Yhat, S2, maskvol, Z=Z, outputsuffix=outputsuffix,
results=results, save_path=output_path)
results=results)

return (Yhat, S2, Z)

Expand Down Expand Up @@ -848,7 +863,7 @@ def extend(covfile, respfile, maskfile=None, **kwargs):

outputsuffix = kwargs.pop('outputsuffix', '_extend')
inputsuffix = kwargs.pop('inputsuffix', '_estimate')
informative_prior = kwargs.pop('job_id', 'False') == 'True'
informative_prior = kwargs.pop('informative_prior', 'False') == 'True'
generation_factor = int(kwargs.pop('generation_factor', '10'))
job_id = kwargs.pop('job_id', None)
batch_size = kwargs.pop('batch_size', None)
Expand Down
Loading

0 comments on commit 83c2f5f

Please sign in to comment.