Update src/pipeline.py
Browse files- src/pipeline.py +4 -1
src/pipeline.py
CHANGED
|
@@ -147,13 +147,16 @@ def load_pipeline() -> Pipeline:
|
|
| 147 |
torch.cuda.set_per_process_memory_fraction(0.99)
|
| 148 |
pipeline.text_encoder.to(memory_format=torch.channels_last)
|
| 149 |
pipeline.transformer.to(memory_format=torch.channels_last)
|
| 150 |
-
replace_linear_with_target_and_quantize(pipeline.transformer, W8A16LinearLayer, [])
|
| 151 |
# pipeline.transformer.save_pretrained("manbeast3b/transfomer-flux-schnell-int8-new", push_to_hub=True, token="")
|
| 152 |
# pipeline.transformer.save_pretrained("/root/.cache/huggingface/hub/transformer-flux")
|
| 153 |
# exit()
|
| 154 |
|
| 155 |
pipeline.vae.to(memory_format=torch.channels_last)
|
| 156 |
pipeline.vae = torch.compile(pipeline.vae)
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
pipeline._exclude_from_cpu_offload = ["vae"]
|
| 159 |
pipeline.enable_sequential_cpu_offload()
|
|
|
|
| 147 |
torch.cuda.set_per_process_memory_fraction(0.99)
|
| 148 |
pipeline.text_encoder.to(memory_format=torch.channels_last)
|
| 149 |
pipeline.transformer.to(memory_format=torch.channels_last)
|
| 150 |
+
# replace_linear_with_target_and_quantize(pipeline.transformer, W8A16LinearLayer, [])
|
| 151 |
# pipeline.transformer.save_pretrained("manbeast3b/transfomer-flux-schnell-int8-new", push_to_hub=True, token="")
|
| 152 |
# pipeline.transformer.save_pretrained("/root/.cache/huggingface/hub/transformer-flux")
|
| 153 |
# exit()
|
| 154 |
|
| 155 |
pipeline.vae.to(memory_format=torch.channels_last)
|
| 156 |
pipeline.vae = torch.compile(pipeline.vae)
|
| 157 |
+
torch.save(pipeline.vae, '/root/.cache/huggingface/hub/compiled_vae.pth')
|
| 158 |
+
exit()
|
| 159 |
+
pipeline.vae = torch.load('/root/.cache/huggingface/hub/compiled_vae.pth')
|
| 160 |
|
| 161 |
pipeline._exclude_from_cpu_offload = ["vae"]
|
| 162 |
pipeline.enable_sequential_cpu_offload()
|