NoMoreCopyrightOrg
/

flux-test2

Model card Files Files and versions

John6666 commited on Mar 6, 2025

Commit

e54933c

·

verified ·

1 Parent(s): 650114b

Upload handler.py

Files changed (1) hide show

handler.py +5 -7

handler.py CHANGED Viewed

@@ -9,8 +9,12 @@ import torch
 from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int8_weight, int8_dynamic_activation_int4_weight, float8_dynamic_activation_float8_weight
 from torchao.quantization.quant_api import PerRow
 from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, TorchAoConfig
-IS_NEW_GPU = True
 IS_COMPILE = True
 IS_TURBO = False
 IS_4BIT = False
@@ -19,16 +23,10 @@ IS_4BIT = False
 # This setting optimizes performance on NVIDIA GPUs with Ampere architecture (e.g., A100, RTX 30 series) or newer.
 if IS_NEW_GPU: torch.set_float32_matmul_precision("high")
-import subprocess
-subprocess.run("nvcc -V", shell=True)
-subprocess.run("pip list", shell=True)
 if IS_COMPILE:
     import torch._dynamo
     torch._dynamo.config.suppress_errors = True
-from huggingface_inference_toolkit.logging import logger
 def load_pipeline_stable(repo_id: str, dtype: torch.dtype) -> Any:
     quantization_config = TorchAoConfig("int4dq" if IS_4BIT else "int8dq")
     vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)

 from torchao.quantization import quantize_, autoquant, int8_dynamic_activation_int8_weight, int8_dynamic_activation_int4_weight, float8_dynamic_activation_float8_weight
 from torchao.quantization.quant_api import PerRow
 from diffusers import FluxPipeline, FluxTransformer2DModel, AutoencoderKL, TorchAoConfig
+from huggingface_inference_toolkit.logging import logger
+import subprocess
+subprocess.run("pip list", shell=True)
+IS_NEW_GPU = False
 IS_COMPILE = True
 IS_TURBO = False
 IS_4BIT = False
 # This setting optimizes performance on NVIDIA GPUs with Ampere architecture (e.g., A100, RTX 30 series) or newer.
 if IS_NEW_GPU: torch.set_float32_matmul_precision("high")
 if IS_COMPILE:
     import torch._dynamo
     torch._dynamo.config.suppress_errors = True
 def load_pipeline_stable(repo_id: str, dtype: torch.dtype) -> Any:
     quantization_config = TorchAoConfig("int4dq" if IS_4BIT else "int8dq")
     vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)