jokerbit
/

flux-qa-7

jokerbit commited on Jan 22, 2025

Commit

3606bd6

verified ·

1 Parent(s): 95e723e

Upload src/pipeline.py with huggingface_hub

Files changed (1) hide show

src/pipeline.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import gc
 import os
 from typing import TypeAlias
@@ -10,7 +10,9 @@ from pipelines.models import TextToImageRequest
 from torch import Generator
 from torchao.quantization import quantize_, int8_weight_only
 from transformers import T5EncoderModel, CLIPTextModel, logging
 Pipeline: TypeAlias = FluxPipeline
 torch.backends.cudnn.benchmark = True
@@ -52,7 +54,7 @@ def load_pipeline() -> Pipeline:
     pipeline.transformer.to(memory_format=torch.channels_last)
     pipeline.vae.to(memory_format=torch.channels_last)
     quantize_(pipeline.vae, int8_weight_only())
-    pipeline.vae = torch.compile(pipeline.vae, mode="reduce-overhead", fullgraph=True)
     pipeline.to("cuda")
     for _ in range(2):

+# import torch_tensorrt
 import os
 from typing import TypeAlias
 from torch import Generator
 from torchao.quantization import quantize_, int8_weight_only
 from transformers import T5EncoderModel, CLIPTextModel, logging
+from functools import partial
+my_overhead_compile = partial(torch.compile, mode="reduce-overhead", fullgraph=True)
 Pipeline: TypeAlias = FluxPipeline
 torch.backends.cudnn.benchmark = True
     pipeline.transformer.to(memory_format=torch.channels_last)
     pipeline.vae.to(memory_format=torch.channels_last)
     quantize_(pipeline.vae, int8_weight_only())
+    pipeline.vae = my_overhead_compile(pipeline.vae)
     pipeline.to("cuda")
     for _ in range(2):