Skip to content

Commit

Permalink
Merge branch 'main' of github.com:KastnerRG/fkeras into main
Browse files Browse the repository at this point in the history
  • Loading branch information
anmeza committed Mar 15, 2024
2 parents 19d1d46 + 08e360c commit da8a0a4
Showing 1 changed file with 70 additions and 20 deletions.
90 changes: 70 additions & 20 deletions fkeras/metrics/hessian.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class HessianMetrics:
- The trace of the Hessian
"""

def __init__(self, model, loss_fn, x, y, batch_size=32):
def __init__(self, model, loss_fn, x, y, batch_size=32, layer_precision_info=None):
"""
model: Keras model
loss_fn: loss function
Expand All @@ -34,6 +34,7 @@ def __init__(self, model, loss_fn, x, y, batch_size=32):
self.batched_x = tf.data.Dataset.from_tensor_slices(x).batch(batch_size)
self.batched_y = tf.data.Dataset.from_tensor_slices(y).batch(batch_size)
self.layer_indices = self.get_layers_with_trainable_params(model)
self.layer_precision_info = layer_precision_info
np.random.seed(83158011)

def get_layers_with_trainable_params(self, model):
Expand All @@ -54,7 +55,7 @@ def get_supported_layer_indices(self):
if self.model.layers[i].__class__.__name__ in SUPPORTED_LAYERS:
supported_indices.append(running_idx)
running_idx += self.model.layers[i].trainable_variables.__len__()
return supported_indices
return supported_indices

def hessian_vector_product(self, v):
"""
Expand Down Expand Up @@ -224,7 +225,6 @@ def normalize_vector_list(self, v_list):
s = s**0.5
return [v / (s + 1e-6) for v in v_list]


def top_k_eigenvalues_hack(self, k=1, max_iter=100, tolerance=1e-3):
"""
Compute the top k eigenvalues and eigenvectors of the Hessian using the
Expand Down Expand Up @@ -277,7 +277,6 @@ def top_k_eigenvalues_hack(self, k=1, max_iter=100, tolerance=1e-3):
break # Compute for encoder only
return eigenvalues, eigenvectors


def top_k_eigenvalues(self, k=1, max_iter=100, tolerance=1e-3, rank_BN=True):
"""
Compute the top k eigenvalues and eigenvectors of the Hessian using the
Expand All @@ -294,7 +293,7 @@ def top_k_eigenvalues(self, k=1, max_iter=100, tolerance=1e-3, rank_BN=True):

eigenvalues = []
eigenvectors = []

for _ in range(k):
eigenvalue = None
# Initialize the eigenvector
Expand Down Expand Up @@ -323,7 +322,7 @@ def top_k_eigenvalues(self, k=1, max_iter=100, tolerance=1e-3, rank_BN=True):
eigenvalue = tmp_eigenvalue
eigenvalues.append(eigenvalue)
eigenvectors.append(np.array(v, dtype=object))

if not rank_BN:
supported_indices = self.get_supported_layer_indices()

Expand All @@ -334,7 +333,7 @@ def top_k_eigenvalues(self, k=1, max_iter=100, tolerance=1e-3, rank_BN=True):
if j in supported_indices:
curr_evs.append(np.array(eigenvectors[i][j]))
sanitized_evs.append(np.array(curr_evs, dtype=object))

eigenvectors = sanitized_evs

return eigenvalues, eigenvectors
Expand All @@ -355,7 +354,7 @@ def do_sum_hessian_rank(self, params, eigenvectors, eigenvalues, k, iter_by=2):
combined_eigenvector = []
for j in range(0, len(eigenvectors[i]), iter_by):
# Go every 2 to ignore biases
#TODO: Exception is used due to the inconsistent
# TODO: Exception is used due to the inconsistent
# usage of numpy arrays and tensorflow tensors.
# To be fixed in the future by only using np arrays.
try:
Expand Down Expand Up @@ -442,7 +441,44 @@ def sort_bits_MSB_to_LSB(self, param_bit_order_ranking, num_bits):

return np.array(bit_rank)

def convert_param_ranking_to_msb_bit_ranking(self, param_ranking, num_bits):
###############################
def convert_param_ranking_to_msb_bit_ranking_mixed_precision(self, param_ranking):
layer_info_head = 0
num_of_params_seen = 0
last_bit_idx = -1
param_to_bit_indices_dict = dict()
# Build a dictionary such that (key, val) = param_index, ([bit_indices])
for wi in range(len(param_ranking)):
bit_indices_associated_with_param = []
bit_width = self.layer_precision_info[layer_info_head][1]
for wbi in range(bit_width):
last_bit_idx += 1
bit_indices_associated_with_param.append((last_bit_idx, wbi))

param_to_bit_indices_dict[wi] = bit_indices_associated_with_param

num_of_params_seen += 1
if num_of_params_seen == self.layer_precision_info[layer_info_head][0]:
layer_info_head += 1
num_of_params_seen = 0

# Add the bit indices into the appropriate list based on
# the parameter ranking
max_bit_width = max(self.layer_precision_info, key=lambda x: x[1])[1]
sorted_msb_lsb_lists = [[] for _ in range(max_bit_width)]
for param_idx in param_ranking:
for bit_idx, wbi in param_to_bit_indices_dict[param_idx]:
sorted_msb_lsb_lists[wbi].append(bit_idx)

merged_msb_to_lsb_list = []
for l in sorted_msb_lsb_lists:
merged_msb_to_lsb_list.extend(l)

return merged_msb_to_lsb_list

def convert_param_ranking_to_msb_bit_ranking_single_precision(
self, param_ranking, num_bits
):
# Convert param ranking to bit ranking
bit_level_rank = []

Expand All @@ -456,6 +492,18 @@ def convert_param_ranking_to_msb_bit_ranking(self, param_ranking, num_bits):
bit_level_rank = self.sort_bits_MSB_to_LSB(bit_level_rank, num_bits)
return bit_level_rank

def convert_param_ranking_to_msb_bit_ranking(self, param_ranking, num_bits):
if self.layer_precision_info == None:
return self.convert_param_ranking_to_msb_bit_ranking_single_precision(
param_ranking, num_bits
)
else:
return self.convert_param_ranking_to_msb_bit_ranking_mixed_precision(
param_ranking
)

###############################

def hessian_ranking_hack(self, eigenvectors, eigenvalues=None, k=1, strategy="sum"):
"""
Given list of eigenvectors and eigenvalues, compute the sensitivity.
Expand Down Expand Up @@ -487,8 +535,10 @@ def hessian_ranking_hack(self, eigenvectors, eigenvalues=None, k=1, strategy="su
param_scores = eigenvector_rank[param_ranking]
print(f"parameter_ranking: {param_ranking[:15]}")
return param_ranking, param_scores

def hessian_ranking_general(self, eigenvectors, eigenvalues=None, k=1, strategy="sum", iter_by=1):

def hessian_ranking_general(
self, eigenvectors, eigenvalues=None, k=1, strategy="sum", iter_by=1
):
"""
Given list of eigenvectors and eigenvalues, compute the sensitivity.
Use Hessian to rank parameters based on sensitivity to bit flips with
Expand Down Expand Up @@ -525,7 +575,7 @@ def hessian_ranking_general(self, eigenvectors, eigenvalues=None, k=1, strategy=
)
param_ranking = np.flip(np.argsort(np.abs(eigenvector_rank)))
param_scores = eigenvector_rank[param_ranking]

return param_ranking, param_scores

def gradient_ranking_hack_OLD_CODE(self):
Expand Down Expand Up @@ -562,12 +612,12 @@ def gradient_ranking_hack_OLD_CODE(self):
]
break # Compute for encoder only
return grad_ranking, grad_dict

def gradient_ranking_hack(self, super_layer_idx=0):
"""
Rank parameters based on gradient magnitude per layer
"""
#TODO: Consider implementing gradient ranking in batches?
# TODO: Consider implementing gradient ranking in batches?
# Get all parameters of the model and flatten and concat into one list
params = []
for sl_i in self.layer_indices:
Expand All @@ -577,7 +627,7 @@ def gradient_ranking_hack(self, super_layer_idx=0):
self.model.layers[sl_i].layers[l_i].trainable_variables[0]
)
break # Compute for encoder only

# Calculate grads over all params
with tf.GradientTape() as inner_tape:
loss = self.loss_fn(self.model(self.x), self.y)
Expand All @@ -596,14 +646,14 @@ def gradient_ranking_hack(self, super_layer_idx=0):
param_ranking = np.flip(np.argsort(np.abs(grads)))
param_rank_score = grads[param_ranking]
# print(f"grad parameter_ranking: {param_ranking[:10]}")

return param_ranking, param_rank_score

def gradient_ranking(self):
"""
Rank parameters based on gradient magnitude per layer
"""
#TODO: Consider implementing gradient ranking in batches?
# TODO: Consider implementing gradient ranking in batches?
# Get all parameters of the model and flatten and concat into one list
params = [
v
Expand All @@ -613,7 +663,7 @@ def gradient_ranking(self):

# Get indices of parameters in supported layers
supported_indices = self.get_supported_layer_indices()

# Calculate grads over all params
with tf.GradientTape() as inner_tape:
loss = self.loss_fn(self.model(self.x), self.y)
Expand All @@ -639,5 +689,5 @@ def gradient_ranking(self):
param_ranking = np.flip(np.argsort(np.abs(grads)))
param_rank_score = grads[param_ranking]
# print(f"grad parameter_ranking: {param_ranking[:10]}")
return param_ranking, param_rank_score

return param_ranking, param_rank_score

0 comments on commit da8a0a4

Please sign in to comment.