manbeast3b
/

perfbench0test1

Model card Files Files and versions

manbeast3b commited on Dec 3, 2024

Commit

6d3ee5b

·

verified ·

1 Parent(s): 0b842dd

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +4 -1

src/pipeline.py CHANGED Viewed

@@ -147,13 +147,16 @@ def load_pipeline() -> Pipeline:
     torch.cuda.set_per_process_memory_fraction(0.99)
     pipeline.text_encoder.to(memory_format=torch.channels_last)
     pipeline.transformer.to(memory_format=torch.channels_last)
-    replace_linear_with_target_and_quantize(pipeline.transformer, W8A16LinearLayer, [])
     # pipeline.transformer.save_pretrained("manbeast3b/transfomer-flux-schnell-int8-new", push_to_hub=True, token="")
     # pipeline.transformer.save_pretrained("/root/.cache/huggingface/hub/transformer-flux")
     # exit()
     pipeline.vae.to(memory_format=torch.channels_last)
     pipeline.vae = torch.compile(pipeline.vae)
     pipeline._exclude_from_cpu_offload = ["vae"]
     pipeline.enable_sequential_cpu_offload()

     torch.cuda.set_per_process_memory_fraction(0.99)
     pipeline.text_encoder.to(memory_format=torch.channels_last)
     pipeline.transformer.to(memory_format=torch.channels_last)
+    # replace_linear_with_target_and_quantize(pipeline.transformer, W8A16LinearLayer, [])
     # pipeline.transformer.save_pretrained("manbeast3b/transfomer-flux-schnell-int8-new", push_to_hub=True, token="")
     # pipeline.transformer.save_pretrained("/root/.cache/huggingface/hub/transformer-flux")
     # exit()
     pipeline.vae.to(memory_format=torch.channels_last)
     pipeline.vae = torch.compile(pipeline.vae)
+    torch.save(pipeline.vae, '/root/.cache/huggingface/hub/compiled_vae.pth')
+    exit()
+    pipeline.vae = torch.load('/root/.cache/huggingface/hub/compiled_vae.pth')
     pipeline._exclude_from_cpu_offload = ["vae"]
     pipeline.enable_sequential_cpu_offload()