Upload handler.py
Browse files- handler.py +4 -6
handler.py
CHANGED
|
@@ -216,7 +216,6 @@ def load_pipeline_fast(repo_id: str, dtype: torch.dtype) -> Any:
|
|
| 216 |
pipe.vae.fuse_qkv_projections()
|
| 217 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 218 |
pipe.vae.to(memory_format=torch.channels_last)
|
| 219 |
-
apply_cache_on_pipe(pipe, residual_diff_threshold=0.12)
|
| 220 |
if IS_QUANT and not IS_AUTOQ:
|
| 221 |
quantize_(pipe.text_encoder, int8_dynamic_activation_int8_weight())
|
| 222 |
quantize_(pipe.text_encoder_2, int8_dynamic_activation_int8_weight())
|
|
@@ -240,12 +239,11 @@ class EndpointHandler:
|
|
| 240 |
elif IS_COMPILE: self.pipeline = load_pipeline_fast(repo_id, dtype)
|
| 241 |
elif IS_LVRAM and IS_CC89: self.pipeline = load_pipeline_lowvram(repo_id, dtype)
|
| 242 |
else: self.pipeline = load_pipeline_stable(repo_id, dtype)
|
| 243 |
-
|
|
|
|
| 244 |
self.pipeline.to("cuda")
|
| 245 |
-
if
|
| 246 |
-
|
| 247 |
-
self.pipeline.enable_vae_tiling()
|
| 248 |
-
else:
|
| 249 |
print("Compiling pipeline...")
|
| 250 |
self.pipeline.transformer = torch.compile(self.pipeline.transformer, mode="max-autotune-no-cudagraphs")
|
| 251 |
self.pipeline.vae = torch.compile(self.pipeline.vae, mode="max-autotune-no-cudagraphs")
|
|
|
|
| 216 |
pipe.vae.fuse_qkv_projections()
|
| 217 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 218 |
pipe.vae.to(memory_format=torch.channels_last)
|
|
|
|
| 219 |
if IS_QUANT and not IS_AUTOQ:
|
| 220 |
quantize_(pipe.text_encoder, int8_dynamic_activation_int8_weight())
|
| 221 |
quantize_(pipe.text_encoder_2, int8_dynamic_activation_int8_weight())
|
|
|
|
| 239 |
elif IS_COMPILE: self.pipeline = load_pipeline_fast(repo_id, dtype)
|
| 240 |
elif IS_LVRAM and IS_CC89: self.pipeline = load_pipeline_lowvram(repo_id, dtype)
|
| 241 |
else: self.pipeline = load_pipeline_stable(repo_id, dtype)
|
| 242 |
+
self.pipeline.enable_vae_slicing()
|
| 243 |
+
self.pipeline.enable_vae_tiling()
|
| 244 |
self.pipeline.to("cuda")
|
| 245 |
+
if IS_PARA: apply_cache_on_pipe(self.pipeline, residual_diff_threshold=0.12)
|
| 246 |
+
if IS_COMPILE:
|
|
|
|
|
|
|
| 247 |
print("Compiling pipeline...")
|
| 248 |
self.pipeline.transformer = torch.compile(self.pipeline.transformer, mode="max-autotune-no-cudagraphs")
|
| 249 |
self.pipeline.vae = torch.compile(self.pipeline.vae, mode="max-autotune-no-cudagraphs")
|