Upload handler.py
Browse files- handler.py +4 -3
handler.py
CHANGED
|
@@ -14,10 +14,8 @@ if IS_COMPILE:
|
|
| 14 |
#from huggingface_inference_toolkit.logging import logger
|
| 15 |
|
| 16 |
def compile_pipeline(pipe) -> Any:
|
| 17 |
-
pipe.transformer.fuse_qkv_projections()
|
| 18 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 19 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
| 20 |
-
pipe.vae.fuse_qkv_projections()
|
| 21 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 22 |
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
| 23 |
return pipe
|
|
@@ -27,10 +25,12 @@ class EndpointHandler:
|
|
| 27 |
repo_id = "camenduru/FLUX.1-dev-diffusers"
|
| 28 |
#repo_id = "NoMoreCopyright/FLUX.1-dev-test"
|
| 29 |
dtype = torch.bfloat16
|
| 30 |
-
quantization_config = TorchAoConfig("
|
| 31 |
vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
|
| 32 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
| 33 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
|
|
|
|
|
|
| 34 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
| 35 |
self.pipeline.to("cuda")
|
| 36 |
|
|
@@ -68,3 +68,4 @@ class EndpointHandler:
|
|
| 68 |
output_type="pil",
|
| 69 |
).images[0]
|
| 70 |
|
|
|
|
|
|
| 14 |
#from huggingface_inference_toolkit.logging import logger
|
| 15 |
|
| 16 |
def compile_pipeline(pipe) -> Any:
|
|
|
|
| 17 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 18 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
|
|
|
| 19 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 20 |
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
| 21 |
return pipe
|
|
|
|
| 25 |
repo_id = "camenduru/FLUX.1-dev-diffusers"
|
| 26 |
#repo_id = "NoMoreCopyright/FLUX.1-dev-test"
|
| 27 |
dtype = torch.bfloat16
|
| 28 |
+
quantization_config = TorchAoConfig("int8dq")
|
| 29 |
vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
|
| 30 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
| 31 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
| 32 |
+
self.pipeline.transformer.fuse_qkv_projections()
|
| 33 |
+
self.pipeline.vae.fuse_qkv_projections()
|
| 34 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
| 35 |
self.pipeline.to("cuda")
|
| 36 |
|
|
|
|
| 68 |
output_type="pil",
|
| 69 |
).images[0]
|
| 70 |
|
| 71 |
+
|