NoMoreCopyrightOrg
/

flux-test2

English

Model card Files Files and versions

xet

Community

John6666 commited on Mar 9, 2025

Commit

9378ccb

verified ·

1 Parent(s): cc5f48e

Upload 2 files

Browse files

Files changed (2) hide show

handler.py +8 -8
requirements.txt +1 -1

handler.py CHANGED Viewed

@@ -9,7 +9,7 @@ import time
 from PIL import Image
 from huggingface_hub import hf_hub_download
 import torch
-from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int4_weight, float8_dynamic_activation_float8_weight, int8_dynamic_activation_int8_weight
 from torchao.quantization.quant_api import PerRow
 from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, TorchAoConfig
 from transformers import T5EncoderModel
@@ -30,7 +30,7 @@ IS_LVRAM = False
 IS_COMPILE = True
 IS_WARM = True
 IS_QUANT = True
-IS_AUTOQ = True
 IS_CC90 = True if torch.cuda.get_device_capability() >= (9, 0) else False
 IS_CC89 = True if torch.cuda.get_device_capability() >= (8, 9) else False
@@ -42,7 +42,7 @@ if IS_COMPILE:
     import torch._dynamo
     torch._dynamo.config.suppress_errors = False
     #torch._dynamo.config.suppress_errors = True
-    #torch._inductor.config.disable_progress = False
     #torch._inductor.config.conv_1x1_as_mm = True
     #torch._inductor.config.coordinate_descent_tuning = True
     #torch._inductor.config.coordinate_descent_check_all_directions = True
@@ -217,14 +217,14 @@ def load_pipeline_fast(repo_id: str, dtype: torch.dtype) -> Any:
     pipe.transformer.to(memory_format=torch.channels_last)
     pipe.vae.to(memory_format=torch.channels_last)
     if IS_QUANT and not IS_AUTOQ:
-        quantize_(pipe.text_encoder, int8_dynamic_activation_int8_weight())
-        quantize_(pipe.text_encoder_2, int8_dynamic_activation_int8_weight())
         if IS_CC90: quantize_(pipe.transformer, float8_dynamic_activation_float8_weight(granularity=PerRow()), device="cuda")
         elif IS_CC89: quantize_(pipe.transformer, float8_dynamic_activation_float8_weight(), device="cuda")
-        else: quantize_(pipe.vae, int8_dynamic_activation_int4_weight())
         if IS_CC90: quantize_(pipe.vae, float8_dynamic_activation_float8_weight(granularity=PerRow()), device="cuda")
         elif IS_CC89: quantize_(pipe.vae, float8_dynamic_activation_float8_weight(), device="cuda")
-        else: quantize_(pipe.vae, int8_dynamic_activation_int8_weight())
     return pipe
 class EndpointHandler:
@@ -261,7 +261,7 @@ class EndpointHandler:
             end = time.time()
             print(f'Compiled in {end - start:.3f} sec.')
-    def __call__(self, data: Dict[str, Any]) -> Image.Image:
         logger.info(f"Received incoming request with {data=}")
         if "inputs" in data and isinstance(data["inputs"], str):

 from PIL import Image
 from huggingface_hub import hf_hub_download
 import torch
+from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int4_weight, float8_dynamic_activation_float8_weight, int8_dynamic_activation_int8_weight, int8_weight_only
 from torchao.quantization.quant_api import PerRow
 from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, TorchAoConfig
 from transformers import T5EncoderModel
 IS_COMPILE = True
 IS_WARM = True
 IS_QUANT = True
+IS_AUTOQ = False
 IS_CC90 = True if torch.cuda.get_device_capability() >= (9, 0) else False
 IS_CC89 = True if torch.cuda.get_device_capability() >= (8, 9) else False
     import torch._dynamo
     torch._dynamo.config.suppress_errors = False
     #torch._dynamo.config.suppress_errors = True
+    torch._inductor.config.disable_progress = False
     #torch._inductor.config.conv_1x1_as_mm = True
     #torch._inductor.config.coordinate_descent_tuning = True
     #torch._inductor.config.coordinate_descent_check_all_directions = True
     pipe.transformer.to(memory_format=torch.channels_last)
     pipe.vae.to(memory_format=torch.channels_last)
     if IS_QUANT and not IS_AUTOQ:
+        quantize_(pipe.text_encoder, int8_weight_only())
+        quantize_(pipe.text_encoder_2, int8_weight_only())
         if IS_CC90: quantize_(pipe.transformer, float8_dynamic_activation_float8_weight(granularity=PerRow()), device="cuda")
         elif IS_CC89: quantize_(pipe.transformer, float8_dynamic_activation_float8_weight(), device="cuda")
+        else: quantize_(pipe.vae, int8_weight_only())
         if IS_CC90: quantize_(pipe.vae, float8_dynamic_activation_float8_weight(granularity=PerRow()), device="cuda")
         elif IS_CC89: quantize_(pipe.vae, float8_dynamic_activation_float8_weight(), device="cuda")
+        else: quantize_(pipe.vae, int8_weight_only())
     return pipe
 class EndpointHandler:
             end = time.time()
             print(f'Compiled in {end - start:.3f} sec.')
+    def __call__(self, data: Dict[str, Any]) -> str:
         logger.info(f"Received incoming request with {data=}")
         if "inputs" in data and isinstance(data["inputs"], str):

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
---extra-index-url https://download.pytorch.org/whl/cu126
 torch==2.6.0
 torchvision
 torchaudio

+--extra-index-url https://download.pytorch.org/whl/cu121
 torch==2.6.0
 torchvision
 torchaudio