Skip to content

Commit

Permalink
Merge pull request #131 from amarquand/dev_pymc_SHASH
Browse files Browse the repository at this point in the history
Transfer functionalty implemented
  • Loading branch information
amarquand authored Jun 7, 2023
2 parents 3f2aaef + e86ce0c commit 0840688
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 26 deletions.
62 changes: 36 additions & 26 deletions pcntoolkit/model/hbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,7 @@ def create_poly_basis(X, order):
return Phi


def from_posterior(param, samples, distribution=None, half=False, freedom=1):
if len(samples.shape) > 1:
shape = samples.shape[1:]
else:
shape = None

def from_posterior(param, samples, shape, distribution=None, half=False, freedom=1):
if distribution is None:
smin, smax = np.min(samples), np.max(samples)
width = smax - smin
Expand Down Expand Up @@ -222,22 +217,22 @@ def get_sample_dims(var):
"mu_samples",
pb.make_param(
"mu",
mu_slope_mu_params=(0.0, 1.0),
sigma_slope_mu_params=(1.0,),
mu_intercept_mu_params=(0.0, 1.0),
sigma_intercept_mu_params=(1.0,),
mu_slope_mu_params=(0.0, 3.0),
sigma_slope_mu_params=(3.0,),
mu_intercept_mu_params=(0.0, 3.0),
sigma_intercept_mu_params=(3.0,),
).get_samples(pb),
dims=get_sample_dims('mu'),
)
sigma = pm.Deterministic(
"sigma_samples",
pb.make_param(
"sigma", mu_sigma_params=(0.0, 2.0), sigma_sigma_params=(5.0,)
"sigma", mu_sigma_params=(0.0, 2.0), sigma_sigma_params=(2.0,)
).get_samples(pb),
dims=get_sample_dims('sigma'),
)
sigma_plus = pm.Deterministic(
"sigma_plus_samples", pm.math.log(1 + pm.math.exp(sigma)), dims=get_sample_dims('sigma')
"sigma_plus_samples", pm.math.log(1 + pm.math.exp(sigma/3))*3, dims=get_sample_dims('sigma')
)
y_like = pm.Normal(
"y_like", mu, sigma=sigma_plus, observed=y, dims="datapoints"
Expand Down Expand Up @@ -586,24 +581,39 @@ def make_dist(self, dist, params, pb):
"""This creates a pymc distribution. If there is a idata, the distribution is fitted to the idata. If there isn't a idata, the prior is parameterized by the values in (params)"""
with pb.model as m:
if pb.idata is not None:
samples = az.extract(pb.idata, var_names=self.name).to_numpy()
if not self.has_random_effect:
samples = np.reshape(samples, (-1,))
# Get samples
samples = az.extract(pb.idata, var_names=self.name)
# Define mapping to new shape
def get_new_dim_size(tup):
oldsize, name = tup
if name.startswith('batch_effect_'):
ind = pb.batch_effect_dim_names.index(name)
return len(np.unique(pb.batch_effect_indices[ind].container.data))
return oldsize

new_shape = list(map(get_new_dim_size, zip(samples.shape,samples.dims)))
if len(new_shape) == 1:
new_shape = None
else:
new_shape = new_shape[:-1]
self.dist = from_posterior(
param=self.name,
samples=samples,
samples=samples.to_numpy(),
shape = new_shape,
distribution=dist,
freedom=pb.configs["freedom"],
)
dims = []
if self.has_random_effect:
dims = dims + pb.batch_effect_dim_names
if self.name.startswith("slope") or self.name.startswith("offset_slope"):
dims = dims + ["basis_functions"]
if dims == []:
self.dist = self.distmap[dist](self.name, *params)

else:
self.dist = self.distmap[dist](self.name, *params, dims=dims)
dims = []
if self.has_random_effect:
dims = dims + pb.batch_effect_dim_names
if self.name.startswith("slope") or self.name.startswith("offset_slope"):
dims = dims + ["basis_functions"]
if dims == []:
self.dist = self.distmap[dist](self.name, *params)
else:
self.dist = self.distmap[dist](self.name, *params, dims=dims)

def __getitem__(self, idx):
"""The idx here is the index of the batch-effect. If the prior does not model batch effects, this should return the same value for each index"""
Expand Down Expand Up @@ -687,7 +697,7 @@ class Parameterization:

def __init__(self, name):
self.name = name
print(name, type(self))
# print(name, type(self))

def get_samples(self, pb):
pass
Expand Down Expand Up @@ -805,7 +815,7 @@ def get_samples(self, pb):
intc = self.intercept_parameterization.get_samples(pb)
slope_samples = self.slope_parameterization.get_samples(pb)
if pb.configs[f"random_slope_{self.name}"]:
slope = pb.X * self.slope_parameterization.get_samples(pb)
slope = pb.X * slope_samples
slope = slope.sum(axis=-1)
else:
slope = pb.X @ self.slope_parameterization.get_samples(pb)
Expand Down
132 changes: 132 additions & 0 deletions tests/testHBR_transfer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 29 13:26:35 2019
@author: seykia
This script tests HBR models with default configs on toy data.
"""

import os
import numpy as np
from pcntoolkit.normative_model.norm_utils import norm_init
from pcntoolkit.util.utils import simulate_data
import matplotlib.pyplot as plt
from pcntoolkit.normative import estimate
from warnings import filterwarnings
from pcntoolkit.util.utils import scaler
import xarray

filterwarnings('ignore')

np.random.seed(10)

########################### Experiment Settings ###############################


working_dir = '/home/stijn/temp/' # Specifyexit() a working directory
# to save data and results.

simulation_method = 'linear'
n_features = 1 # The number of input features of X
n_grps = 5 # Number of batches in data
n_transfer_groups = 2 # number of batches in transfer data
n_samples = 500 # Number of samples in each group (use a list for different
# sample numbers across different batches)
n_transfer_samples = 100

model_types = ['linear'] # models to try

############################## Data Simulation ################################


X_train, Y_train, grp_id_train, X_test, Y_test, grp_id_test, coef = \
simulate_data(simulation_method, n_samples, n_features, n_grps,
working_dir=working_dir, plot=True)

X_train_transfer, Y_train_transfer, grp_id_train_transfer, X_test_transfer, Y_test_transfer, grp_id_test_transfer, coef= simulate_data(simulation_method, n_transfer_samples, n_features = n_features, n_grps=n_transfer_groups, plot=True)

################################# Methods Tests ###############################


for model_type in model_types:
nm = norm_init(X_train, Y_train, alg='hbr',likelihood='Normal', model_type=model_type,n_chains = 4,cores=4,n_samples=100,n_tuning=50, freedom = 5, nknots=8, target_accept="0.99")

print("Now Estimating on original train data ==============================================")
nm.estimate(X_train, Y_train, trbefile=working_dir+'trbefile.pkl')
print("Now Predicting on original test data ==============================================")
yhat, ys2 = nm.predict(X_test, tsbefile=working_dir+'tsbefile.pkl')

for i in range(n_features):
sorted_idx = X_test[:,i].argsort(axis=0).squeeze()
temp_X = X_test[sorted_idx,i]
temp_Y = Y_test[sorted_idx,]
temp_be = grp_id_test[sorted_idx,:].squeeze()
temp_yhat = yhat[sorted_idx,]
temp_s2 = ys2[sorted_idx,]

plt.figure()
for j in range(n_grps):
plt.scatter(temp_X[temp_be==j,], temp_Y[temp_be==j,],
label='Group' + str(j))
plt.plot(temp_X[temp_be==j,], temp_yhat[temp_be==j,])
plt.fill_between(temp_X[temp_be==j,], temp_yhat[temp_be==j,] -
1.96 * np.sqrt(temp_s2[temp_be==j,]),
temp_yhat[temp_be==j,] +
1.96 * np.sqrt(temp_s2[temp_be==j,]),
color='gray', alpha=0.2)
plt.title('Model %s, Feature %d' %(model_type, i))
plt.legend()
plt.show()

print("Now Estimating on transfer train data ==============================================")
nm.estimate_on_new_sites(X_train_transfer, Y_train_transfer, grp_id_train_transfer)
print("Now Estimating on transfer test data ==============================================")
yhat, s2 = nm.predict_on_new_sites(X_test_transfer, grp_id_test_transfer)

for i in range(n_features):
sorted_idx = X_test_transfer[:,i].argsort(axis=0).squeeze()
temp_X = X_test_transfer[sorted_idx,i]
temp_Y = Y_test_transfer[sorted_idx,]
temp_be = grp_id_test_transfer[sorted_idx,:].squeeze()
temp_yhat = yhat[sorted_idx,]
temp_s2 = ys2[sorted_idx,]

for j in range(n_transfer_groups):
plt.scatter(temp_X[temp_be==j,], temp_Y[temp_be==j,],
label='Group' + str(j))
plt.plot(temp_X[temp_be==j,], temp_yhat[temp_be==j,])
plt.fill_between(temp_X[temp_be==j,], temp_yhat[temp_be==j,] -
1.96 * np.sqrt(temp_s2[temp_be==j,]),
temp_yhat[temp_be==j,] +
1.96 * np.sqrt(temp_s2[temp_be==j,]),
color='gray', alpha=0.2)
plt.title('Transfer model %s, Feature %d' %(model_type, i))
plt.legend()
plt.show()


############################## Normative Modelling Test #######################


model_type = model_types[0]

covfile = working_dir + 'X_train.pkl'
respfile = working_dir + 'Y_train.pkl'
testcov = working_dir + 'X_test.pkl'
testresp = working_dir + 'Y_test.pkl'
trbefile = working_dir + 'trbefile.pkl'
tsbefile = working_dir + 'tsbefile.pkl'

os.chdir(working_dir)

estimate(covfile, respfile, testcov=testcov, testresp=testresp, trbefile=trbefile,
tsbefile=tsbefile, alg='hbr', outputsuffix='_' + model_type,
inscaler='None', outscaler='None', model_type=model_type,
savemodel='True', saveoutput='True')


###############################################################################

0 comments on commit 0840688

Please sign in to comment.