Merge pull request #131 from amarquand/dev_pymc_SHASH

Transfer functionalty implemented
amarquand · Jun 7, 2023 · 0840688 · 0840688
2 parents 3f2aaef + e86ce0c
commit 0840688
Show file tree

Hide file tree

Showing 2 changed files with 168 additions and 26 deletions.
diff --git a/pcntoolkit/model/hbr.py b/pcntoolkit/model/hbr.py
@@ -81,12 +81,7 @@ def create_poly_basis(X, order):
     return Phi
 
 
-def from_posterior(param, samples, distribution=None, half=False, freedom=1):
-    if len(samples.shape) > 1:
-        shape = samples.shape[1:]
-    else:
-        shape = None
-
+def from_posterior(param, samples, shape,  distribution=None, half=False, freedom=1):
     if distribution is None:
         smin, smax = np.min(samples), np.max(samples)
         width = smax - smin
@@ -222,22 +217,22 @@ def get_sample_dims(var):
                 "mu_samples",
                 pb.make_param(
                     "mu",
-                    mu_slope_mu_params=(0.0, 1.0),
-                    sigma_slope_mu_params=(1.0,),
-                    mu_intercept_mu_params=(0.0, 1.0),
-                    sigma_intercept_mu_params=(1.0,),
+                    mu_slope_mu_params=(0.0, 3.0),
+                    sigma_slope_mu_params=(3.0,),
+                    mu_intercept_mu_params=(0.0, 3.0),
+                    sigma_intercept_mu_params=(3.0,),
                 ).get_samples(pb),
                 dims=get_sample_dims('mu'),
             )
             sigma = pm.Deterministic(
                 "sigma_samples",
                 pb.make_param(
-                    "sigma", mu_sigma_params=(0.0, 2.0), sigma_sigma_params=(5.0,)
+                    "sigma", mu_sigma_params=(0.0, 2.0), sigma_sigma_params=(2.0,)
                 ).get_samples(pb),
                 dims=get_sample_dims('sigma'),
             )
             sigma_plus = pm.Deterministic(
-                "sigma_plus_samples", pm.math.log(1 + pm.math.exp(sigma)), dims=get_sample_dims('sigma')
+                "sigma_plus_samples", pm.math.log(1 + pm.math.exp(sigma/3))*3, dims=get_sample_dims('sigma')
             )
             y_like = pm.Normal(
                 "y_like", mu, sigma=sigma_plus, observed=y, dims="datapoints"
@@ -586,24 +581,39 @@ def make_dist(self, dist, params, pb):
         """This creates a pymc distribution. If there is a idata, the distribution is fitted to the idata. If there isn't a idata, the prior is parameterized by the values in (params)"""
         with pb.model as m:
             if pb.idata is not None:
-                samples = az.extract(pb.idata, var_names=self.name).to_numpy()
-                if not self.has_random_effect:
-                    samples = np.reshape(samples, (-1,))
+                # Get samples
+                samples = az.extract(pb.idata, var_names=self.name)
+                # Define mapping to new shape
+                def get_new_dim_size(tup):
+                    oldsize, name = tup
+                    if name.startswith('batch_effect_'):
+                        ind = pb.batch_effect_dim_names.index(name)
+                        return len(np.unique(pb.batch_effect_indices[ind].container.data))
+                    return oldsize
+
+                new_shape = list(map(get_new_dim_size, zip(samples.shape,samples.dims)))
+                if len(new_shape) == 1:
+                    new_shape = None
+                else:
+                    new_shape = new_shape[:-1]
                 self.dist = from_posterior(
                     param=self.name,
-                    samples=samples,
+                    samples=samples.to_numpy(),
+                    shape = new_shape,
                     distribution=dist,
                     freedom=pb.configs["freedom"],
                 )
-            dims = []
-            if self.has_random_effect:
-                dims = dims + pb.batch_effect_dim_names
-            if self.name.startswith("slope") or self.name.startswith("offset_slope"):
-                dims = dims + ["basis_functions"]
-            if dims == []:
-                self.dist = self.distmap[dist](self.name, *params)
+
             else:
-                self.dist = self.distmap[dist](self.name, *params, dims=dims)
+                dims = []
+                if self.has_random_effect:
+                    dims = dims + pb.batch_effect_dim_names
+                if self.name.startswith("slope") or self.name.startswith("offset_slope"):
+                    dims = dims + ["basis_functions"]
+                if dims == []:
+                    self.dist = self.distmap[dist](self.name, *params)
+                else:
+                    self.dist = self.distmap[dist](self.name, *params, dims=dims)
 
     def __getitem__(self, idx):
         """The idx here is the index of the batch-effect. If the prior does not model batch effects, this should return the same value for each index"""
@@ -687,7 +697,7 @@ class Parameterization:
 
     def __init__(self, name):
         self.name = name
-        print(name, type(self))
+        # print(name, type(self))
 
     def get_samples(self, pb):
         pass
@@ -805,7 +815,7 @@ def get_samples(self, pb):
             intc = self.intercept_parameterization.get_samples(pb)
             slope_samples = self.slope_parameterization.get_samples(pb)
             if pb.configs[f"random_slope_{self.name}"]:
-                slope = pb.X * self.slope_parameterization.get_samples(pb)
+                slope = pb.X * slope_samples
                 slope = slope.sum(axis=-1)
             else:
                 slope = pb.X @ self.slope_parameterization.get_samples(pb)

diff --git a/tests/testHBR_transfer.py b/tests/testHBR_transfer.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Jul 29 13:26:35 2019
+
+@author: seykia
+
+This script tests HBR models with default configs on toy data.
+
+"""
+
+import os
+import numpy as np
+from pcntoolkit.normative_model.norm_utils import norm_init
+from pcntoolkit.util.utils import simulate_data
+import matplotlib.pyplot as plt
+from pcntoolkit.normative import estimate
+from warnings import filterwarnings
+from pcntoolkit.util.utils import scaler
+import xarray
+
+filterwarnings('ignore')
+
+np.random.seed(10)
+
+########################### Experiment Settings ###############################
+
+
+working_dir = '/home/stijn/temp/'  # Specifyexit() a working directory
+                                                    # to save data and results.
+
+simulation_method = 'linear'
+n_features = 1      # The number of input features of X
+n_grps = 5          # Number of batches in data 
+n_transfer_groups = 2 # number of batches in transfer data
+n_samples = 500     # Number of samples in each group (use a list for different
+                    # sample numbers across different batches)
+n_transfer_samples = 100
+
+model_types = ['linear']  # models to try
+
+############################## Data Simulation ################################
+
+
+X_train, Y_train, grp_id_train, X_test, Y_test, grp_id_test, coef = \
+    simulate_data(simulation_method, n_samples, n_features, n_grps, 
+                  working_dir=working_dir, plot=True)
+
+X_train_transfer, Y_train_transfer, grp_id_train_transfer, X_test_transfer, Y_test_transfer, grp_id_test_transfer, coef= simulate_data(simulation_method, n_transfer_samples, n_features = n_features, n_grps=n_transfer_groups, plot=True)
+
+################################# Methods Tests ###############################
+
+
+for model_type in model_types:
+    nm = norm_init(X_train, Y_train, alg='hbr',likelihood='Normal', model_type=model_type,n_chains = 4,cores=4,n_samples=100,n_tuning=50, freedom = 5, nknots=8, target_accept="0.99")
+
+    print("Now Estimating on original train data ==============================================")
+    nm.estimate(X_train, Y_train, trbefile=working_dir+'trbefile.pkl')
+    print("Now Predicting on original test data ==============================================")
+    yhat, ys2 = nm.predict(X_test, tsbefile=working_dir+'tsbefile.pkl')
+
+    for i in range(n_features):
+        sorted_idx = X_test[:,i].argsort(axis=0).squeeze()
+        temp_X = X_test[sorted_idx,i]
+        temp_Y = Y_test[sorted_idx,]
+        temp_be = grp_id_test[sorted_idx,:].squeeze()
+        temp_yhat = yhat[sorted_idx,]
+        temp_s2 = ys2[sorted_idx,]
+
+        plt.figure()
+        for j in range(n_grps):
+            plt.scatter(temp_X[temp_be==j,], temp_Y[temp_be==j,], 
+                        label='Group' + str(j))
+            plt.plot(temp_X[temp_be==j,], temp_yhat[temp_be==j,])
+            plt.fill_between(temp_X[temp_be==j,], temp_yhat[temp_be==j,] - 
+                             1.96 * np.sqrt(temp_s2[temp_be==j,]), 
+                             temp_yhat[temp_be==j,] + 
+                             1.96 * np.sqrt(temp_s2[temp_be==j,]),
+                             color='gray', alpha=0.2)
+        plt.title('Model %s, Feature %d' %(model_type, i))
+        plt.legend()
+        plt.show()
+
+    print("Now Estimating on transfer train data ==============================================")
+    nm.estimate_on_new_sites(X_train_transfer, Y_train_transfer, grp_id_train_transfer)
+    print("Now Estimating on transfer test data ==============================================")
+    yhat, s2 = nm.predict_on_new_sites(X_test_transfer, grp_id_test_transfer)
+
+    for i in range(n_features):
+        sorted_idx = X_test_transfer[:,i].argsort(axis=0).squeeze()
+        temp_X = X_test_transfer[sorted_idx,i]
+        temp_Y = Y_test_transfer[sorted_idx,]
+        temp_be = grp_id_test_transfer[sorted_idx,:].squeeze()
+        temp_yhat = yhat[sorted_idx,]
+        temp_s2 = ys2[sorted_idx,]
+
+        for j in range(n_transfer_groups):
+            plt.scatter(temp_X[temp_be==j,], temp_Y[temp_be==j,], 
+                        label='Group' + str(j))
+            plt.plot(temp_X[temp_be==j,], temp_yhat[temp_be==j,])
+            plt.fill_between(temp_X[temp_be==j,], temp_yhat[temp_be==j,] - 
+                             1.96 * np.sqrt(temp_s2[temp_be==j,]), 
+                             temp_yhat[temp_be==j,] + 
+                             1.96 * np.sqrt(temp_s2[temp_be==j,]),
+                             color='gray', alpha=0.2)
+        plt.title('Transfer model %s, Feature %d' %(model_type, i))
+        plt.legend()
+        plt.show()
+
+
+############################## Normative Modelling Test #######################
+
+
+model_type = model_types[0]
+
+covfile = working_dir + 'X_train.pkl'
+respfile = working_dir + 'Y_train.pkl'
+testcov = working_dir + 'X_test.pkl'
+testresp = working_dir + 'Y_test.pkl'
+trbefile = working_dir + 'trbefile.pkl'
+tsbefile = working_dir + 'tsbefile.pkl'
+
+os.chdir(working_dir)
+
+estimate(covfile, respfile, testcov=testcov, testresp=testresp, trbefile=trbefile, 
+         tsbefile=tsbefile, alg='hbr', outputsuffix='_' + model_type, 
+         inscaler='None', outscaler='None', model_type=model_type, 
+         savemodel='True', saveoutput='True')
+
+
+###############################################################################
+