Retry AoTI and attention backend; show full errors
Browse files
app.py
CHANGED
|
@@ -216,6 +216,11 @@ def set_attention_backend_safe(transformer, backend: str) -> str:
|
|
| 216 |
break
|
| 217 |
if attention_backend_error is None and last_exc is not None:
|
| 218 |
attention_backend_error = str(last_exc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
return name
|
| 220 |
except Exception as exc: # noqa: BLE001
|
| 221 |
last_exc = exc
|
|
@@ -538,26 +543,27 @@ def warmup_model(pipeline: ZImagePipeline, resolutions: List[str]) -> None:
|
|
| 538 |
|
| 539 |
|
| 540 |
def init_app() -> None:
|
| 541 |
-
global aoti_loaded, aoti_error
|
| 542 |
try:
|
| 543 |
ensure_models_loaded()
|
| 544 |
if ENABLE_COMPILE and pipe is not None:
|
| 545 |
ensure_on_gpu()
|
| 546 |
if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
|
| 547 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
aoti_loaded = False
|
| 549 |
-
aoti_error =
|
| 550 |
-
print("AoTI
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
aoti_error = None
|
| 557 |
-
print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
|
| 558 |
-
except Exception as exc: # noqa: BLE001
|
| 559 |
-
aoti_error = str(exc)
|
| 560 |
-
print(f"AoTI load failed (continuing without AoTI): {exc}")
|
| 561 |
if ENABLE_WARMUP and pipe is not None:
|
| 562 |
ensure_on_gpu()
|
| 563 |
try:
|
|
@@ -670,6 +676,14 @@ Attention: `{attention_status}` | AoTI: `{aoti_status}` | torch.compile: `{compi
|
|
| 670 |
|
| 671 |
</div>"""
|
| 672 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
|
| 674 |
with gr.Row():
|
| 675 |
with gr.Column(scale=1):
|
|
|
|
| 216 |
break
|
| 217 |
if attention_backend_error is None and last_exc is not None:
|
| 218 |
attention_backend_error = str(last_exc)
|
| 219 |
+
if attention_backend_error:
|
| 220 |
+
print(
|
| 221 |
+
f"Requested attention backend {backend!r} failed; using {name!r} instead. "
|
| 222 |
+
f"Reason: {attention_backend_error}"
|
| 223 |
+
)
|
| 224 |
return name
|
| 225 |
except Exception as exc: # noqa: BLE001
|
| 226 |
last_exc = exc
|
|
|
|
| 543 |
|
| 544 |
|
| 545 |
def init_app() -> None:
|
| 546 |
+
global aoti_loaded, aoti_error, applied_attention_backend
|
| 547 |
try:
|
| 548 |
ensure_models_loaded()
|
| 549 |
if ENABLE_COMPILE and pipe is not None:
|
| 550 |
ensure_on_gpu()
|
| 551 |
if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
|
| 552 |
+
try:
|
| 553 |
+
pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
|
| 554 |
+
spaces.aoti_blocks_load(pipe.transformer.layers, AOTI_REPO, variant=AOTI_VARIANT)
|
| 555 |
+
aoti_loaded = True
|
| 556 |
+
aoti_error = None
|
| 557 |
+
print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
|
| 558 |
+
except Exception as exc: # noqa: BLE001
|
| 559 |
aoti_loaded = False
|
| 560 |
+
aoti_error = str(exc)
|
| 561 |
+
print(f"AoTI load failed (continuing without AoTI): {exc}")
|
| 562 |
+
try:
|
| 563 |
+
applied_attention_backend = set_attention_backend_safe(pipe.transformer, ATTENTION_BACKEND)
|
| 564 |
+
print(f"Attention backend (post-AoTI): {applied_attention_backend}")
|
| 565 |
+
except Exception as exc: # noqa: BLE001
|
| 566 |
+
print(f"Attention backend update failed (continuing): {exc}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
if ENABLE_WARMUP and pipe is not None:
|
| 568 |
ensure_on_gpu()
|
| 569 |
try:
|
|
|
|
| 676 |
|
| 677 |
</div>"""
|
| 678 |
)
|
| 679 |
+
if attention_backend_error or aoti_error:
|
| 680 |
+
with gr.Accordion("Runtime details (debug)", open=False):
|
| 681 |
+
details: List[str] = []
|
| 682 |
+
if attention_backend_error:
|
| 683 |
+
details.append(f"**Attention backend error**\n```\n{attention_backend_error}\n```")
|
| 684 |
+
if aoti_error:
|
| 685 |
+
details.append(f"**AoTI error**\n```\n{aoti_error}\n```")
|
| 686 |
+
gr.Markdown("\n\n".join(details))
|
| 687 |
|
| 688 |
with gr.Row():
|
| 689 |
with gr.Column(scale=1):
|