manbeast3b
/

perfbench0test1

Model card Files Files and versions

manbeast3b commited on Dec 3, 2024

Commit

6005ab8

·

verified ·

1 Parent(s): afb21bf

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +3 -2

src/pipeline.py CHANGED Viewed

@@ -50,7 +50,7 @@ class W8A16LinearLayer(nn.Module):
 def replace_linear_with_target_and_quantize(module, target_class, module_name_to_exclude):
     # with open("/root/.cache/huggingface/hub/output_layers.txt", "a") as f:
     for name, child in module.named_children():
-        if isinstance(child, nn.Linear) and ( 'to_q' in name or 'to_k' in name or 'to_v' in name ): #and not any([x == name for x in module_name_to_exclude]): 'linear' in name or
             old_bias = child.bias
             old_weight = child.weight
             new_module = target_class(child.in_features, child.out_features, old_bias is not None, child.weight.dtype)
@@ -98,7 +98,8 @@ def load_pipeline() -> Pipeline:
     pipeline.text_encoder.to(memory_format=torch.channels_last)
     pipeline.transformer.to(memory_format=torch.channels_last)
     replace_linear_with_target_and_quantize(pipeline.transformer, W8A16LinearLayer, [])
-    # exit()
     pipeline.vae.to(memory_format=torch.channels_last)
     pipeline.vae = torch.compile(pipeline.vae)

 def replace_linear_with_target_and_quantize(module, target_class, module_name_to_exclude):
     # with open("/root/.cache/huggingface/hub/output_layers.txt", "a") as f:
     for name, child in module.named_children():
+        if isinstance(child, nn.Linear) and (  'add_k_proj' in name or 'add_v_proj' in name or 'add_q_proj' in name ): #and not any([x == name for x in module_name_to_exclude]): 'linear' in name or
             old_bias = child.bias
             old_weight = child.weight
             new_module = target_class(child.in_features, child.out_features, old_bias is not None, child.weight.dtype)
     pipeline.text_encoder.to(memory_format=torch.channels_last)
     pipeline.transformer.to(memory_format=torch.channels_last)
     replace_linear_with_target_and_quantize(pipeline.transformer, W8A16LinearLayer, [])
+    pipeline.transformer.save_pretrained("/root/.cache/huggingface/hub/transformer-flux")
+    exit()
     pipeline.vae.to(memory_format=torch.channels_last)
     pipeline.vae = torch.compile(pipeline.vae)