fix merge conflicts

amarquand · Feb 23, 2022 · edff361 · edff361
2 parents b3bf8ba + 7e8d87e
commit edff361
Show file tree

Hide file tree

Showing 14 changed files with 700 additions and 135 deletions.
diff --git a/doc/build/html/_sources/pages/HBR_NormativeModel_FCONdata_Tutorial.rst.txt b/doc/build/html/_sources/pages/HBR_NormativeModel_FCONdata_Tutorial.rst.txt
@@ -23,7 +23,8 @@ Step 0: Install necessary libraries & grab data files
 
 .. code:: ipython3
 
-    ! pip install pcntoolkit==0.20
+    ! pip uninstall -y Theano-PyMC  # conflicts with Theano on some environments
+    ! pip install pcntoolkit==0.22
 
 For this tutorial we will use data from the `Functional Connectom
 Project FCON1000 <http://fcon_1000.projects.nitrc.org/>`__ to create a
@@ -169,49 +170,21 @@ then displayed.
         idxte = fcon_te['site'] == s
         print(i,s, sum(idx), sum(idxte))
     
-    # Uncomment the following lines if you want to keep a defined version of the sets
-    # fcon_tr.to_csv('/Users/andmar/data/sairut/data/fcon1000_tr.csv')
-    # fcon_te.to_csv('/Users/andmar/data/sairut/data/fcon1000_te.csv')
-    # icbm_tr.to_csv('/Users/andmar/data/sairut/data/fcon1000_icbm_tr.csv')
-    # icbm_te.to_csv('/Users/andmar/data/sairut/data/fcon1000_icbm_te.csv')
+    fcon_tr.to_csv(processing_dir + '/fcon1000_tr.csv')
+    fcon_te.to_csv(processing_dir + '/fcon1000_te.csv')
+    icbm_tr.to_csv(processing_dir + '/fcon1000_icbm_tr.csv')
+    icbm_te.to_csv(processing_dir + '/fcon1000_icbm_te.csv')
 
+Otherwise you can uncomment the following lines and just load these pre defined subsets:
 
-.. parsed-literal::
-
-    sample size check
-    0 AnnArbor_a 10 14
-    1 AnnArbor_b 11 21
-    2 Atlanta 11 17
-    3 Baltimore 10 13
-    4 Bangor 9 11
-    5 Beijing_Zang 108 90
-    6 Berlin_Margulies 13 13
-    7 Cambridge_Buckner 94 104
-    8 Cleveland 15 16
-    9 Leiden_2180 6 6
-    10 Leiden_2200 11 8
-    11 Milwaukee_b 29 17
-    12 Munchen 6 9
-    13 NewYork_a 48 35
-    14 NewYork_a_ADHD 17 8
-    15 Newark 13 6
-    16 Oulu 55 47
-    17 Oxford 9 13
-    18 PaloAlto 7 10
-    19 Pittsburgh 1 2
-    20 Queensland 6 13
-    21 SaintLouis 12 19
-
-
-Otherwise you can just load these pre defined subsets:
 
 .. code:: ipython3
 
     # Optional
-    fcon_tr = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_tr.csv')
-    fcon_te = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_te.csv')
-    icbm_tr = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_icbm_tr.csv')
-    icbm_te = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_icbm_te.csv')
+    #fcon_tr = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_tr.csv')
+    #fcon_te = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_te.csv')
+    #icbm_tr = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_icbm_tr.csv')
+    #icbm_te = pd.read_csv('https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/main/data/fcon1000_icbm_te.csv')
 
 Step 2: Configure HBR inputs: covariates, measures and batch effects
 --------------------------------------------------------------------

diff --git a/pcntoolkit/dataio/fileio.py b/pcntoolkit/dataio/fileio.py
@@ -377,7 +377,7 @@ def load(filename, mask=None, text=False, vol=True):
     if file_type(filename) == 'cifti':
         x = load_cifti(filename, vol=vol)
     elif file_type(filename) == 'nifti':
-        x = load_nifti(filename, mask)
+        x = load_nifti(filename, mask, vol=vol)
     elif text or file_type(filename) == 'text':
         x = load_ascii(filename)
     elif file_type(filename) == 'binary':

diff --git a/pcntoolkit/model/bayesreg.py b/pcntoolkit/model/bayesreg.py
@@ -387,8 +387,8 @@ def estimate(self, hyp0, X, y, **kwargs):
         Xv = kwargs.get('var_covariates', None)
 
         # options for l-bfgs-b
-        l = kwargs.get('l', 0.1)
-        epsilon = kwargs.get('epsilon', 0.1)
+        l = float(kwargs.get('l', 0.1))
+        epsilon = float(kwargs.get('epsilon', 0.1))
         norm = kwargs.get('norm', 'l2')
 
         if optimizer.lower() == 'cg':  # conjugate gradients

diff --git a/pcntoolkit/model/gp.py b/pcntoolkit/model/gp.py
@@ -12,7 +12,7 @@
 
 
 try:  # Run as a package if installed    
-    from pcntoolkit.utils import squared_dist
+    from pcntoolkit.util.utils import squared_dist
 except ImportError:
     pass
 

diff --git a/pcntoolkit/model/hbr.py b/pcntoolkit/model/hbr.py
@@ -17,7 +17,7 @@
 from scipy import stats
 import bspline
 from bspline import splinelab
-
+from pcntoolkit.util.utils import cartesian_product
 
 
 def bspline_fit(X, order, nknots):
@@ -133,10 +133,12 @@ def hbr(X, y, batch_effects, batch_effects_size, configs, trace=None):
         all_idx.append(np.int16(np.unique(batch_effects[:,i])))
     be_idx = list(product(*all_idx))
 
-    X = theano.shared(X)
-    y = theano.shared(y)
 
     with pm.Model() as model:
+
+        X = pm.Data('X', X)
+        y = pm.Data('y', y)
+
         # Priors
         if trace is not None: # Used for transferring the priors
             mu_prior_intercept = from_posterior('mu_prior_intercept', 
@@ -320,7 +322,8 @@ def hbr(X, y, batch_effects, batch_effects_size, configs, trace=None):
         else:
             alpha = 0
 
-        y_like = pm.SkewNormal('y_like', mu=y_hat, sigma=sigma_y, alpha=alpha, observed=y)
+        y_like = pm.SkewNormal('y_like', mu=y_hat, sigma=sigma_y, alpha=alpha, 
+                               observed=y)
 
     return model
 
@@ -336,8 +339,6 @@ def nn_hbr(X, y, batch_effects, batch_effects_size, configs, trace=None):
         all_idx.append(np.int16(np.unique(batch_effects[:,i])))
     be_idx = list(product(*all_idx))
 
-    X = theano.shared(X)
-    y = theano.shared(y)
 
     # Initialize random weights between each layer for the mu:
     init_1 = pm.floatX(np.random.randn(feature_num, n_hidden) * np.sqrt(1/feature_num))
@@ -361,6 +362,10 @@ def nn_hbr(X, y, batch_effects, batch_effects_size, configs, trace=None):
         std_init_2_noise = pm.floatX(np.random.rand(n_hidden, n_hidden))
 
     with pm.Model() as model:
+
+        X = pm.Data('X', X)
+        y = pm.Data('y', y)
+
         if trace is not None: # Used when estimating/predicting on a new site
             weights_in_1_grp = from_posterior('w_in_1_grp', trace['w_in_1_grp'], 
                                             distribution='normal', freedom=configs['freedom'])
@@ -650,6 +655,9 @@ def estimate(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                # Filling the theano shared variables with dummy zero variables to ensure the privacy.
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])}) 
+
         elif self.model_type == 'polynomial': 
             X = create_poly_basis(X, self.configs['order'])
             with hbr(X, y, batch_effects, self.batch_effects_size, 
@@ -660,6 +668,8 @@ def estimate(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'bspline': 
             self.bsp = bspline_fit(X, self.configs['order'], self.configs['nknots'])
             X = bspline_transform(X, self.bsp)
@@ -671,6 +681,8 @@ def estimate(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'nn': 
             with nn_hbr(X, y, batch_effects, self.batch_effects_size, 
                                self.configs):    
@@ -680,6 +692,7 @@ def estimate(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
 
         return self.trace
 
@@ -697,25 +710,29 @@ def predict(self, X, batch_effects, pred = 'single'):
             if self.model_type == 'linear': 
                 with hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
                     ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                    pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
             elif self.model_type == 'polynomial':
                 X = create_poly_basis(X, self.configs['order'])
                 with hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
                     ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                    pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
             elif self.model_type == 'bspline': 
                 X = bspline_transform(X, self.bsp)
                 with hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
                     ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                    pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
             elif self.model_type == 'nn': 
                 with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
                     ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                    pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
 
             pred_mean = ppc['y_like'].mean(axis=0)
             pred_var = ppc['y_like'].var(axis=0)
 
         return pred_mean, pred_var
 
 
-    def estimate_on_new_site(self, X, y, batch_effects):
+    def adapt(self, X, y, batch_effects):
         """ Function to adapt the model """
 
         if len(X.shape)==1:
@@ -739,6 +756,8 @@ def estimate_on_new_site(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'polynomial': 
             X = create_poly_basis(X, self.configs['order'])
             with hbr(X, y, batch_effects, self.batch_effects_size, 
@@ -749,6 +768,8 @@ def estimate_on_new_site(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         if self.model_type == 'bspline': 
             X = bspline_transform(X, self.bsp)
             with hbr(X, y, batch_effects, self.batch_effects_size, 
@@ -759,6 +780,8 @@ def estimate_on_new_site(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'nn': 
             with nn_hbr(X, y, batch_effects, self.batch_effects_size, 
                                self.configs, trace = self.trace):    
@@ -768,6 +791,7 @@ def estimate_on_new_site(self, X, y, batch_effects):
                                        target_accept=self.configs['target_accept'], 
                                        init=self.configs['init'], n_init=50000, 
                                        cores=self.configs['cores'])
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
 
         return self.trace
 
@@ -783,19 +807,34 @@ def predict_on_new_site(self, X, batch_effects):
         samples = self.configs['n_samples']
         y = np.zeros([X.shape[0],1])
         if self.model_type == 'linear': 
-            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, 
+                     trace = self.trace):
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True)
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'polynomial': 
             X = create_poly_basis(X, self.configs['order'])
-            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, 
+                     trace = self.trace):
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True) 
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'bspline': 
             X = bspline_transform(X, self.bsp)
-            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, 
+                     trace = self.trace):
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True) 
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'nn': 
-            with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+            with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs, 
+                        trace = self.trace):
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True)
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
 
         pred_mean = ppc['y_like'].mean(axis=0)
         pred_var = ppc['y_like'].var(axis=0)
@@ -814,18 +853,29 @@ def generate(self, X, batch_effects, samples):
         y = np.zeros([X.shape[0],1])
         if self.model_type == 'linear': 
             with hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True) 
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'polynomial':
             X = create_poly_basis(X, self.configs['order'])
             with hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True)
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'bspline': 
             X = bspline_transform(X, self.bsp)
             with hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True)
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'nn': 
             with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs):
-                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, progressbar=True) 
+                ppc = pm.sample_posterior_predictive(self.trace, samples=samples, 
+                                                     progressbar=True)
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
 
         generated_samples = np.reshape(ppc['y_like'].squeeze().T, [X.shape[0]*samples, 1])
         X = np.repeat(X, samples)
@@ -857,21 +907,47 @@ def sample_prior_predictive(self, X, batch_effects, samples, trace=None):
             with hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                      trace):
                 ppc = pm.sample_prior_predictive(samples=samples) 
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'polynomial':
             X = create_poly_basis(X, self.configs['order'])
             with hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                      trace):
                 ppc = pm.sample_prior_predictive(samples=samples) 
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'bspline': 
             self.bsp = bspline_fit(X, self.configs['order'], self.configs['nknots'])
             X = bspline_transform(X, self.bsp)
             with hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                      trace):
                 ppc = pm.sample_prior_predictive(samples=samples) 
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
+
         elif self.model_type == 'nn': 
             with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                         trace):
                 ppc = pm.sample_prior_predictive(samples=samples) 
+                pm.set_data({'X':np.zeros([10,2]), 'y':np.zeros([10,1])})
 
         return ppc
+
+
+    def create_dummy_inputs(self, covariate_ranges = [[0.1,0.9,0.01]]):
+
+        arrays = []
+        for i in range(len(covariate_ranges)):
+            arrays.append(np.arange(covariate_ranges[i][0],covariate_ranges[i][1],
+                                    covariate_ranges[i][2]))
+        X = cartesian_product(arrays)
+        X_dummy = np.concatenate([X for i in range(np.prod(self.batch_effects_size))])
+
+        arrays = []
+        for i in range(self.batch_effects_num):
+            arrays.append(np.arange(0, self.batch_effects_size[i]))
+        batch_effects = cartesian_product(arrays)
+
+        batch_effects_dummy = np.repeat(batch_effects, X.shape[0], axis=0)
+
+        return X_dummy, batch_effects_dummy