manbeast3b
/

perfbench0test1

Model card Files Files and versions

manbeast3b commited on Dec 3, 2024

Commit

4f22740

·

verified ·

1 Parent(s): 43018bb

Update src/pipeline.py

Files changed (1) hide show

src/pipeline.py +2 -2

src/pipeline.py CHANGED Viewed

@@ -12,7 +12,7 @@ from pipelines.models import TextToImageRequest
 from torch import Generator
 import time
 from diffusers import FluxTransformer2DModel, DiffusionPipeline
-from torchao.quantization import quantize_, PerRow, float8_dynamic_activation_float8_weight
 import os
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:False,garbage_collection_threshold:0.01"
 Pipeline = None
@@ -39,7 +39,7 @@ def load_pipeline() -> Pipeline:
         text_encoder_2 = text_encoder_2,
         torch_dtype=dtype,
         )
-    quantize_(pipeline.transformer, float8_dynamic_activation_float8_weight(granularity=PerRow()))
     torch.backends.cudnn.benchmark = True
     torch.backends.cuda.matmul.allow_tf32 = True
     torch.cuda.set_per_process_memory_fraction(0.99)

 from torch import Generator
 import time
 from diffusers import FluxTransformer2DModel, DiffusionPipeline
+from torchao.quantization import quantize_,  float8_dynamic_activation_float8_weight #PerRow,
 import os
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:False,garbage_collection_threshold:0.01"
 Pipeline = None
         text_encoder_2 = text_encoder_2,
         torch_dtype=dtype,
         )
+    quantize_(pipeline.transformer, float8_dynamic_activation_float8_weight())
     torch.backends.cudnn.benchmark = True
     torch.backends.cuda.matmul.allow_tf32 = True
     torch.cuda.set_per_process_memory_fraction(0.99)