Spaces:

Husr
/

zig

Running on Zero

App Files Files Community

Husr commited on Jan 4

Commit

e963edc

1 Parent(s): 46983e8

Align defaults with official example (keep Advanced controls)

Browse files

Files changed (2) hide show

README.md +2 -1
app.py +96 -37

README.md CHANGED Viewed

@@ -54,7 +54,8 @@ Place the LoRA file under `lora/` first (or set `LORA_PATH`); otherwise the app
 - Prompt
 - Resolution category + explicit WxH selection
 - Seed (with random toggle)
-- Steps, CFG, scheduler + shift (and extra scheduler params), max sequence length
 - LoRA toggle + strength (enabled only if the file is found)
 ## Git LFS note

 - Prompt
 - Resolution category + explicit WxH selection
 - Seed (with random toggle)
+- Steps + Time Shift
+- Advanced: CFG, scheduler + extra scheduler params, max sequence length
 - LoRA toggle + strength (enabled only if the file is found)
 ## Git LFS note

app.py CHANGED Viewed

@@ -150,10 +150,12 @@ EXAMPLE_PROMPTS = [
 pipe: ZImagePipeline | None = None
 lora_loaded: bool = False
 lora_error: str | None = None
 pipe_lock = threading.Lock()
 pipe_on_gpu: bool = False
 aoti_loaded: bool = False
 applied_attention_backend: str | None = None
 aoti_error: str | None = None
 transformer_compiled: bool = False
 transformer_compile_attempted: bool = False
@@ -167,7 +169,6 @@ try:
 except Exception:
     pass
 def module_available(module_name: str) -> bool:
     try:
         return importlib.util.find_spec(module_name) is not None
@@ -175,6 +176,13 @@ def module_available(module_name: str) -> bool:
         return False
 def parse_resolution(resolution: str) -> Tuple[int, int]:
     match = re.search(r"(\d+)\s*[×x]\s*(\d+)", resolution)
     if match:
@@ -183,6 +191,7 @@ def parse_resolution(resolution: str) -> Tuple[int, int]:
 def set_attention_backend_safe(transformer, backend: str) -> str:
     candidates: List[str] = []
     if backend:
         candidates.append(backend)
@@ -192,41 +201,76 @@ def set_attention_backend_safe(transformer, backend: str) -> str:
             candidates.append(f"_{backend}")
     candidates.extend(["flash", "xformers", "native"])
     last_exc: Exception | None = None
     for name in candidates:
         if not name:
             continue
         try:
             transformer.set_attention_backend(name)
             return name
         except Exception as exc:  # noqa: BLE001
             last_exc = exc
             continue
     raise RuntimeError(f"Failed to set attention backend (tried {candidates}): {last_exc}")
 def attach_lora(pipeline: ZImagePipeline) -> Tuple[bool, str | None]:
     if not LORA_PATH or not os.path.isfile(LORA_PATH):
         return False, "LoRA file not found"
     if not module_available("peft"):
         return False, "PEFT backend is required for LoRA. Install `peft` and restart."
     try:
         folder, weight_name = os.path.split(LORA_PATH)
         folder = folder or "."
-        pipeline.load_lora_weights(folder, weight_name=weight_name)
-        set_lora_scale(pipeline, 1.0)
         return True, None
     except Exception as exc:  # noqa: BLE001
         return False, f"Failed to load LoRA: {exc}"
 def set_lora_scale(pipeline: ZImagePipeline, scale: float) -> None:
     weight = max(float(scale), 0.0)
     try:
-        pipeline.set_adapters(["default"], adapter_weights=[weight])
     except TypeError:
-        pipeline.set_adapters(["default"], weights=[weight])
 def load_models() -> Tuple[ZImagePipeline, bool, str | None]:
@@ -426,9 +470,9 @@ def generate_image(
     steps: int,
     shift: float,
     guidance_scale: float,
-    max_sequence_length: int,
     use_lora: bool,
     lora_scale: float,
     scheduler_name: str,
     num_train_timesteps: int,
     use_dynamic_shifting: bool,
@@ -439,17 +483,17 @@ def generate_image(
     generator = torch.Generator("cuda").manual_seed(seed)
     set_scheduler(
         pipeline,
-        scheduler_name,
-        num_train_timesteps=num_train_timesteps,
-        shift=shift,
-        use_dynamic_shifting=use_dynamic_shifting,
-        base_shift=base_shift,
-        max_shift=max_shift,
     )
     if lora_loaded:
         if use_lora:
-            set_lora_scale(pipeline, lora_scale)
         else:
             set_lora_scale(pipeline, 0.0)
@@ -458,10 +502,10 @@ def generate_image(
             prompt=prompt,
             height=height,
             width=width,
-            guidance_scale=guidance_scale,
-            num_inference_steps=steps,
             generator=generator,
-            max_sequence_length=max_sequence_length,
         ).images[0]
     return image, seed
@@ -479,9 +523,9 @@ def warmup_model(pipeline: ZImagePipeline, resolutions: List[str]) -> None:
                 steps=9,
                 shift=3.0,
                 guidance_scale=0.0,
-                max_sequence_length=512,
                 use_lora=False,
                 lora_scale=0.0,
                 scheduler_name="FlowMatch Euler",
                 num_train_timesteps=1000,
                 use_dynamic_shifting=False,
@@ -500,15 +544,20 @@ def init_app() -> None:
         if ENABLE_COMPILE and pipe is not None:
             ensure_on_gpu()
         if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
-            try:
-                pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
-                spaces.aoti_blocks_load(pipe.transformer.layers, AOTI_REPO, variant=AOTI_VARIANT)
-                aoti_loaded = True
-                aoti_error = None
-                print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
-            except Exception as exc:  # noqa: BLE001
-                aoti_error = str(exc)
-                print(f"AoTI load failed (continuing without AoTI): {exc}")
         if ENABLE_WARMUP and pipe is not None:
             ensure_on_gpu()
             try:
@@ -551,15 +600,15 @@ def generate(
     try:
         image = generate_image(
             pipeline=pipe,
-            prompt=prompt,
-            resolution=resolution.split(" ")[0] if " " in resolution else resolution,
             seed=new_seed,
             steps=int(steps) + 1,
             shift=float(shift),
             guidance_scale=float(cfg),
-            max_sequence_length=int(max_sequence_length),
             use_lora=use_lora,
             lora_scale=float(lora_scale),
             scheduler_name=str(scheduler_name),
             num_train_timesteps=int(num_train_timesteps),
             use_dynamic_shifting=bool(use_dynamic_shifting),
@@ -582,14 +631,24 @@ with gr.Blocks(title="Z-Image + LoRA") as demo:
     pipe_status = "loaded (GPU)" if pipe and pipe_on_gpu else "loaded (CPU)" if pipe else "not loaded"
     lora_file_status = "found" if os.path.isfile(LORA_PATH) else "missing"
     if lora_loaded:
-        lora_status = f"LoRA: loaded ({LORA_PATH})"
     elif lora_error:
         lora_status = f"LoRA: not loaded ({lora_error})"
     else:
         lora_status = f"LoRA file: {LORA_PATH} ({lora_file_status})"
     attention_status = applied_attention_backend or "unknown"
-    aoti_status = "loaded" if aoti_loaded else f"failed ({aoti_error})" if aoti_error else "not loaded"
     if not ENABLE_COMPILE:
         compile_status = "off"
     elif transformer_compiled:
@@ -629,10 +688,12 @@ Attention: `{attention_status}` | AoTI: `{aoti_status}` | torch.compile: `{compi
                 seed = gr.Number(label="Seed", value=42, precision=0)
                 random_seed = gr.Checkbox(label="Random Seed", value=True)
             with gr.Accordion("KSampler / Advanced", open=False):
-                with gr.Row():
-                    steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=8, step=1)
-                    cfg = gr.Slider(label="CFG", minimum=0.0, maximum=10.0, value=DEFAULT_CFG, step=0.1)
                 with gr.Row():
                     scheduler_name = gr.Dropdown(
@@ -649,15 +710,13 @@ Attention: `{attention_status}` | AoTI: `{aoti_status}` | torch.compile: `{compi
                     )
                 with gr.Row():
-                    shift = gr.Slider(label="Time Shift", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
                     use_dynamic_shifting = gr.Checkbox(label="use_dynamic_shifting", value=False)
                 with gr.Row():
                     base_shift = gr.Slider(label="base_shift", minimum=0.0, maximum=10.0, value=0.5, step=0.1)
                     max_shift = gr.Slider(label="max_shift", minimum=0.0, maximum=10.0, value=3.0, step=0.1)
-                max_seq = gr.Slider(label="Max Sequence Length", minimum=256, maximum=1024, value=512, step=16)
             with gr.Row():
                 lora_controls_enabled = bool(lora_loaded)
                 use_lora = gr.Checkbox(label="Use LoRA", value=lora_controls_enabled, interactive=lora_controls_enabled)

 pipe: ZImagePipeline | None = None
 lora_loaded: bool = False
 lora_error: str | None = None
+lora_adapter_name: str | None = None
 pipe_lock = threading.Lock()
 pipe_on_gpu: bool = False
 aoti_loaded: bool = False
 applied_attention_backend: str | None = None
+attention_backend_error: str | None = None
 aoti_error: str | None = None
 transformer_compiled: bool = False
 transformer_compile_attempted: bool = False
 except Exception:
     pass
 def module_available(module_name: str) -> bool:
     try:
         return importlib.util.find_spec(module_name) is not None
         return False
+def summarize_error(message: str, *, max_len: int = 120) -> str:
+    one_line = " ".join(str(message).split())
+    if len(one_line) <= max_len:
+        return one_line
+    return one_line[: max_len - 1] + "…"
 def parse_resolution(resolution: str) -> Tuple[int, int]:
     match = re.search(r"(\d+)\s*[×x]\s*(\d+)", resolution)
     if match:
 def set_attention_backend_safe(transformer, backend: str) -> str:
+    global attention_backend_error
     candidates: List[str] = []
     if backend:
         candidates.append(backend)
             candidates.append(f"_{backend}")
     candidates.extend(["flash", "xformers", "native"])
+    attention_backend_error = None
+    errors: dict[str, Exception] = {}
     last_exc: Exception | None = None
     for name in candidates:
         if not name:
             continue
         try:
             transformer.set_attention_backend(name)
+            if backend and name != backend:
+                for key in (backend, backend.lstrip("_"), f"_{backend}"):
+                    if key in errors:
+                        attention_backend_error = str(errors[key])
+                        break
+                if attention_backend_error is None and last_exc is not None:
+                    attention_backend_error = str(last_exc)
             return name
         except Exception as exc:  # noqa: BLE001
             last_exc = exc
+            errors[name] = exc
             continue
     raise RuntimeError(f"Failed to set attention backend (tried {candidates}): {last_exc}")
 def attach_lora(pipeline: ZImagePipeline) -> Tuple[bool, str | None]:
+    global lora_adapter_name
     if not LORA_PATH or not os.path.isfile(LORA_PATH):
         return False, "LoRA file not found"
     if not module_available("peft"):
         return False, "PEFT backend is required for LoRA. Install `peft` and restart."
+    def extract_present_adapter_names(exc: Exception) -> List[str]:
+        msg = str(exc)
+        match = re.search(r"present adapters:\s*(\{[^}]*\})", msg)
+        if not match:
+            return []
+        return re.findall(r"'([^']+)'", match.group(1))
     try:
         folder, weight_name = os.path.split(LORA_PATH)
         folder = folder or "."
+        preferred_adapter = os.environ.get("LORA_ADAPTER_NAME", "default")
+        lora_adapter_name = preferred_adapter
+        try:
+            pipeline.load_lora_weights(folder, weight_name=weight_name, adapter_name=preferred_adapter)
+        except TypeError:
+            pipeline.load_lora_weights(folder, weight_name=weight_name)
+        try:
+            set_lora_scale(pipeline, 1.0)
+        except Exception as exc:  # noqa: BLE001
+            adapter_names = extract_present_adapter_names(exc)
+            if adapter_names:
+                lora_adapter_name = adapter_names[0]
+                set_lora_scale(pipeline, 1.0)
+            else:
+                raise
         return True, None
     except Exception as exc:  # noqa: BLE001
+        lora_adapter_name = None
         return False, f"Failed to load LoRA: {exc}"
 def set_lora_scale(pipeline: ZImagePipeline, scale: float) -> None:
     weight = max(float(scale), 0.0)
+    adapter = lora_adapter_name or "default"
     try:
+        pipeline.set_adapters([adapter], adapter_weights=[weight])
     except TypeError:
+        pipeline.set_adapters([adapter], weights=[weight])
 def load_models() -> Tuple[ZImagePipeline, bool, str | None]:
     steps: int,
     shift: float,
     guidance_scale: float,
     use_lora: bool,
     lora_scale: float,
+    max_sequence_length: int,
     scheduler_name: str,
     num_train_timesteps: int,
     use_dynamic_shifting: bool,
     generator = torch.Generator("cuda").manual_seed(seed)
     set_scheduler(
         pipeline,
+        str(scheduler_name),
+        num_train_timesteps=int(num_train_timesteps),
+        shift=float(shift),
+        use_dynamic_shifting=bool(use_dynamic_shifting),
+        base_shift=float(base_shift),
+        max_shift=float(max_shift),
     )
     if lora_loaded:
         if use_lora:
+            set_lora_scale(pipeline, float(lora_scale))
         else:
             set_lora_scale(pipeline, 0.0)
             prompt=prompt,
             height=height,
             width=width,
+            guidance_scale=float(guidance_scale),
+            num_inference_steps=int(steps),
             generator=generator,
+            max_sequence_length=int(max_sequence_length),
         ).images[0]
     return image, seed
                 steps=9,
                 shift=3.0,
                 guidance_scale=0.0,
                 use_lora=False,
                 lora_scale=0.0,
+                max_sequence_length=512,
                 scheduler_name="FlowMatch Euler",
                 num_train_timesteps=1000,
                 use_dynamic_shifting=False,
         if ENABLE_COMPILE and pipe is not None:
             ensure_on_gpu()
         if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
+            if not module_available("kernels"):
+                aoti_loaded = False
+                aoti_error = "kernels module not available"
+                print("AoTI unavailable (kernels module not available).")
+            else:
+                try:
+                    pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
+                    spaces.aoti_blocks_load(pipe.transformer.layers, AOTI_REPO, variant=AOTI_VARIANT)
+                    aoti_loaded = True
+                    aoti_error = None
+                    print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
+                except Exception as exc:  # noqa: BLE001
+                    aoti_error = str(exc)
+                    print(f"AoTI load failed (continuing without AoTI): {exc}")
         if ENABLE_WARMUP and pipe is not None:
             ensure_on_gpu()
             try:
     try:
         image = generate_image(
             pipeline=pipe,
+            prompt=str(prompt),
+            resolution=str(resolution),
             seed=new_seed,
             steps=int(steps) + 1,
             shift=float(shift),
             guidance_scale=float(cfg),
             use_lora=use_lora,
             lora_scale=float(lora_scale),
+            max_sequence_length=int(max_sequence_length),
             scheduler_name=str(scheduler_name),
             num_train_timesteps=int(num_train_timesteps),
             use_dynamic_shifting=bool(use_dynamic_shifting),
     pipe_status = "loaded (GPU)" if pipe and pipe_on_gpu else "loaded (CPU)" if pipe else "not loaded"
     lora_file_status = "found" if os.path.isfile(LORA_PATH) else "missing"
     if lora_loaded:
+        adapter = lora_adapter_name or "default"
+        lora_status = f"LoRA: loaded ({LORA_PATH}, adapter={adapter})"
     elif lora_error:
         lora_status = f"LoRA: not loaded ({lora_error})"
     else:
         lora_status = f"LoRA file: {LORA_PATH} ({lora_file_status})"
     attention_status = applied_attention_backend or "unknown"
+    if attention_backend_error and ATTENTION_BACKEND and attention_status != ATTENTION_BACKEND:
+        attention_status = f"{attention_status} ({ATTENTION_BACKEND} unavailable: {summarize_error(attention_backend_error)})"
+    if aoti_loaded:
+        aoti_status = "loaded"
+    elif aoti_error:
+        label = "unavailable" if "kernels" in aoti_error.lower() else "failed"
+        aoti_status = f"{label} ({summarize_error(aoti_error)})"
+    else:
+        aoti_status = "not loaded"
     if not ENABLE_COMPILE:
         compile_status = "off"
     elif transformer_compiled:
                 seed = gr.Number(label="Seed", value=42, precision=0)
                 random_seed = gr.Checkbox(label="Random Seed", value=True)
+            with gr.Row():
+                steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=8, step=1)
+                shift = gr.Slider(label="Time Shift", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
             with gr.Accordion("KSampler / Advanced", open=False):
+                cfg = gr.Slider(label="CFG", minimum=0.0, maximum=10.0, value=DEFAULT_CFG, step=0.1)
                 with gr.Row():
                     scheduler_name = gr.Dropdown(
                     )
                 with gr.Row():
                     use_dynamic_shifting = gr.Checkbox(label="use_dynamic_shifting", value=False)
+                    max_seq = gr.Slider(label="Max Sequence Length", minimum=256, maximum=1024, value=512, step=16)
                 with gr.Row():
                     base_shift = gr.Slider(label="base_shift", minimum=0.0, maximum=10.0, value=0.5, step=0.1)
                     max_shift = gr.Slider(label="max_shift", minimum=0.0, maximum=10.0, value=3.0, step=0.1)
             with gr.Row():
                 lora_controls_enabled = bool(lora_loaded)
                 use_lora = gr.Checkbox(label="Use LoRA", value=lora_controls_enabled, interactive=lora_controls_enabled)