Spaces:

Husr
/

zig

Running on Zero

App Files Files Community

Husr commited on Jan 4

Commit

286e141

1 Parent(s): 5a54a8f

Disable AoTI when LoRA loaded (avoid runtime crash)

Browse files

Files changed (2) hide show

README.md +1 -0
app.py +27 -15

README.md CHANGED Viewed

@@ -40,6 +40,7 @@ Gradio Space using the official Z-Image pipeline (`Tongyi-MAI/Z-Image-Turbo`) wi
 - `ENABLE_AOTI` (default `true`): Try to load ZeroGPU AoTI blocks via `spaces.aoti_blocks_load` for faster inference.
 - `AOTI_REPO` (default `zerogpu-aoti/Z-Image`): AoTI blocks repo.
 - `AOTI_VARIANT` (default `fa3`): AoTI variant.
 ## Run locally
 ```bash

 - `ENABLE_AOTI` (default `true`): Try to load ZeroGPU AoTI blocks via `spaces.aoti_blocks_load` for faster inference.
 - `AOTI_REPO` (default `zerogpu-aoti/Z-Image`): AoTI blocks repo.
 - `AOTI_VARIANT` (default `fa3`): AoTI variant.
+- `AOTI_ALLOW_LORA` (default `false`): Allow AoTI to load even if LoRA adapters are loaded (may crash; AoTI blocks generally don’t support LoRA).
 ## Run locally
 ```bash

app.py CHANGED Viewed

@@ -24,6 +24,7 @@ OFFLOAD_TO_CPU_AFTER_RUN = os.environ.get("OFFLOAD_TO_CPU_AFTER_RUN", "false").l
 ENABLE_AOTI = os.environ.get("ENABLE_AOTI", "true").lower() == "true"
 AOTI_REPO = os.environ.get("AOTI_REPO", "zerogpu-aoti/Z-Image")
 AOTI_VARIANT = os.environ.get("AOTI_VARIANT", "fa3")
 DEFAULT_CFG = float(os.environ.get("DEFAULT_CFG", "0.0"))
@@ -549,21 +550,26 @@ def init_app() -> None:
         if ENABLE_COMPILE and pipe is not None:
             ensure_on_gpu()
         if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
-            try:
-                pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
-                spaces.aoti_blocks_load(pipe.transformer.layers, AOTI_REPO, variant=AOTI_VARIANT)
-                aoti_loaded = True
-                aoti_error = None
-                print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
-            except Exception as exc:  # noqa: BLE001
                 aoti_loaded = False
-                aoti_error = str(exc)
-                print(f"AoTI load failed (continuing without AoTI): {exc}")
-            try:
-                applied_attention_backend = set_attention_backend_safe(pipe.transformer, ATTENTION_BACKEND)
-                print(f"Attention backend (post-AoTI): {applied_attention_backend}")
-            except Exception as exc:  # noqa: BLE001
-                print(f"Attention backend update failed (continuing): {exc}")
         if ENABLE_WARMUP and pipe is not None:
             ensure_on_gpu()
             try:
@@ -651,7 +657,13 @@ with gr.Blocks(title="Z-Image + LoRA") as demo:
     if aoti_loaded:
         aoti_status = "loaded"
     elif aoti_error:
-        label = "unavailable" if "kernels" in aoti_error.lower() else "failed"
         aoti_status = f"{label} ({summarize_error(aoti_error)})"
     else:
         aoti_status = "not loaded"

 ENABLE_AOTI = os.environ.get("ENABLE_AOTI", "true").lower() == "true"
 AOTI_REPO = os.environ.get("AOTI_REPO", "zerogpu-aoti/Z-Image")
 AOTI_VARIANT = os.environ.get("AOTI_VARIANT", "fa3")
+AOTI_ALLOW_LORA = os.environ.get("AOTI_ALLOW_LORA", "false").lower() == "true"
 DEFAULT_CFG = float(os.environ.get("DEFAULT_CFG", "0.0"))
         if ENABLE_COMPILE and pipe is not None:
             ensure_on_gpu()
         if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
+            if lora_loaded and not AOTI_ALLOW_LORA:
                 aoti_loaded = False
+                aoti_error = "disabled: AoTI blocks are incompatible with LoRA adapters"
+                print("AoTI disabled: LoRA adapters are loaded (AoTI blocks are incompatible with LoRA).")
+            else:
+                try:
+                    pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
+                    spaces.aoti_blocks_load(pipe.transformer.layers, AOTI_REPO, variant=AOTI_VARIANT)
+                    aoti_loaded = True
+                    aoti_error = None
+                    print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
+                except Exception as exc:  # noqa: BLE001
+                    aoti_loaded = False
+                    aoti_error = str(exc)
+                    print(f"AoTI load failed (continuing without AoTI): {exc}")
+                try:
+                    applied_attention_backend = set_attention_backend_safe(pipe.transformer, ATTENTION_BACKEND)
+                    print(f"Attention backend (post-AoTI): {applied_attention_backend}")
+                except Exception as exc:  # noqa: BLE001
+                    print(f"Attention backend update failed (continuing): {exc}")
         if ENABLE_WARMUP and pipe is not None:
             ensure_on_gpu()
             try:
     if aoti_loaded:
         aoti_status = "loaded"
     elif aoti_error:
+        lower = aoti_error.lower()
+        if "disabled" in lower:
+            label = "disabled"
+        elif "kernels" in lower:
+            label = "unavailable"
+        else:
+            label = "failed"
         aoti_status = f"{label} ({summarize_error(aoti_error)})"
     else:
         aoti_status = "not loaded"