manbeast3b
/

perfbench0test1

Model card Files Files and versions

manbeast3b commited on Dec 3, 2024

Commit

0b842dd

·

verified ·

1 Parent(s): 3fc2f94

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +24 -1

src/pipeline.py CHANGED Viewed

@@ -71,9 +71,32 @@ class W8A16LinearLayer(nn.Module):
             output = output + self.bias
         return output
 def replace_linear_with_target_and_quantize(module, target_class, module_name_to_exclude):
     # with open("/root/.cache/huggingface/hub/output_layers.txt", "a") as f:
-    for name, child in module.named_children():
         if isinstance(child, nn.Linear) and (  'add_k_proj' in name or 'add_v_proj' in name or 'add_q_proj' in name ): #and not any([x == name for x in module_name_to_exclude]): 'linear' in name or
             old_bias = child.bias
             old_weight = child.weight

             output = output + self.bias
         return output
+# def replace_linear_with_target_and_quantize(module, target_class, module_name_to_exclude):
+#     # with open("/root/.cache/huggingface/hub/output_layers.txt", "a") as f:
+#     for name, child in module.named_children():
+#         if isinstance(child, nn.Linear) and (  'add_k_proj' in name or 'add_v_proj' in name or 'add_q_proj' in name ): #and not any([x == name for x in module_name_to_exclude]): 'linear' in name or
+#             old_bias = child.bias
+#             old_weight = child.weight
+#             new_module = target_class(child.in_features, child.out_features, old_bias is not None, child.weight.dtype)
+#             new_module.quantize(old_weight)
+#             delattr(module, name)
+#             setattr(module, name, new_module)
+#             if old_bias is not None:
+#               getattr(module, name).bias = old_bias
+#             # # Print the replaced layer name and calculate the change in size
+#             # old_size = old_weight.numel() * old_weight.element_size()
+#             # new_size = new_module.int8_weights.numel() * new_module.int8_weights.element_size()
+#             # f.write(f"Replaced layer: {name}" + f" Size reduction: {old_size} bytes -> {new_size} bytes ({(old_size - new_size) / old_size * 100:.2f}% reduction)")
+#         else:
+#             # Recursively call the function for nested modules
+#             replace_linear_with_target_and_quantize(child, target_class, module_name_to_exclude)
 def replace_linear_with_target_and_quantize(module, target_class, module_name_to_exclude):
     # with open("/root/.cache/huggingface/hub/output_layers.txt", "a") as f:
+    for name in list(module._modules.keys()):
+        child = module._modules[name]
         if isinstance(child, nn.Linear) and (  'add_k_proj' in name or 'add_v_proj' in name or 'add_q_proj' in name ): #and not any([x == name for x in module_name_to_exclude]): 'linear' in name or
             old_bias = child.bias
             old_weight = child.weight