dee-Z-Image-Turbo

Sleeping

App Files Files Community

telcom commited on Dec 23, 2025

Commit

56ac11d

verified ·

1 Parent(s): a5ff267

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -60

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import gc
 import random
 import warnings
 import logging
 # ---- Spaces GPU decorator (must be imported early) ----------
 try:
@@ -23,14 +24,22 @@ from PIL import Image
 import torch
 from huggingface_hub import login
-# ---- Diffusers imports (robust for source installs) ---------
-try:
-    from diffusers import ZImagePipeline, ZImageImg2ImgPipeline
-except Exception:
-    from diffusers.pipelines.z_image.pipeline_z_image import ZImagePipeline
-    from diffusers.pipelines.z_image.pipeline_z_image_img2img import ZImageImg2ImgPipeline
-from diffusers import FlowMatchEulerDiscreteScheduler
 # ============================================================
 # Config
@@ -72,7 +81,7 @@ if not cuda_available:
     fallback_msg = "GPU unavailable. Running in CPU fallback mode (slow)."
 # ============================================================
-# Load pipelines (txt2img + img2img share weights)
 # ============================================================
 pipe_txt2img = None
@@ -80,60 +89,63 @@ pipe_img2img = None
 model_loaded = False
 load_error = None
-try:
-    fp_kwargs = {
-        "torch_dtype": dtype,
-        "use_safetensors": True,
-    }
-    if HF_TOKEN:
-        fp_kwargs["token"] = HF_TOKEN
-    # Default scheduler (you can change shift per-run)
-    default_scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3.0)
-    pipe_txt2img = ZImagePipeline.from_pretrained(MODEL_PATH, scheduler=default_scheduler, **fp_kwargs).to(device)
-    # Optional attention backend
     try:
-        if hasattr(pipe_txt2img, "transformer") and hasattr(pipe_txt2img.transformer, "set_attention_backend"):
-            pipe_txt2img.transformer.set_attention_backend(ATTENTION_BACKEND)
     except Exception:
         pass
-    # Optional compile
-    if ENABLE_COMPILE and device.type == "cuda":
-        try:
-            pipe_txt2img.transformer = torch.compile(
-                pipe_txt2img.transformer,
                 mode="max-autotune-no-cudagraphs",
                 fullgraph=False,
             )
-        except Exception:
-            pass
-    try:
-        pipe_txt2img.set_progress_bar_config(disable=True)
     except Exception:
         pass
-    # Build img2img pipeline reusing the exact same modules
-    pipe_img2img = ZImageImg2ImgPipeline(
-        scheduler=pipe_txt2img.scheduler,
-        vae=pipe_txt2img.vae,
-        text_encoder=pipe_txt2img.text_encoder,
-        tokenizer=pipe_txt2img.tokenizer,
-        transformer=pipe_txt2img.transformer,
-    ).to(device)
     try:
-        pipe_img2img.set_progress_bar_config(disable=True)
-    except Exception:
-        pass
-    model_loaded = True
-except Exception as e:
-    load_error = repr(e)
     model_loaded = False
 # ============================================================
@@ -153,6 +165,20 @@ def prep_init_image(img: Image.Image, width: int, height: int) -> Image.Image:
         img = img.resize((width, height), Image.LANCZOS)
     return img
 # ============================================================
 # Inference
 # ============================================================
@@ -202,13 +228,16 @@ def _infer_impl(
     init_image = prep_init_image(init_image, width, height)
-    # Update scheduler shift per run
-    scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=float(shift))
-    pipe_txt2img.scheduler = scheduler
-    pipe_img2img.scheduler = scheduler
     try:
-        common_kwargs = dict(
             prompt=prompt,
             height=height,
             width=width,
@@ -217,21 +246,28 @@ def _infer_impl(
             generator=generator,
             max_sequence_length=msl,
         )
         if neg is not None:
-            common_kwargs["negative_prompt"] = neg
         with torch.inference_mode():
             if device.type == "cuda":
                 with torch.autocast("cuda", dtype=dtype):
                     if init_image is not None:
-                        out = pipe_img2img(image=init_image, strength=st, **common_kwargs)
                     else:
-                        out = pipe_txt2img(**common_kwargs)
             else:
                 if init_image is not None:
-                    out = pipe_img2img(image=init_image, strength=st, **common_kwargs)
                 else:
-                    out = pipe_txt2img(**common_kwargs)
         img = out.images[0]
         return img, status
@@ -253,7 +289,7 @@ else:
         return _infer_impl(*args, **kwargs)
 # ============================================================
-# UI
 # ============================================================
 CSS = """

 import random
 import warnings
 import logging
+import inspect
 # ---- Spaces GPU decorator (must be imported early) ----------
 try:
 import torch
 from huggingface_hub import login
+# ============================================================
+# Try importing Z-Image pipelines (requires diffusers>=0.36.0)
+# ============================================================
+ZIMAGE_AVAILABLE = True
+ZIMAGE_IMPORT_ERROR = None
+try:
+    from diffusers import (
+        ZImagePipeline,
+        ZImageImg2ImgPipeline,
+        FlowMatchEulerDiscreteScheduler,
+    )
+except Exception as e:
+    ZIMAGE_AVAILABLE = False
+    ZIMAGE_IMPORT_ERROR = repr(e)
 # ============================================================
 # Config
     fallback_msg = "GPU unavailable. Running in CPU fallback mode (slow)."
 # ============================================================
+# Load pipelines
 # ============================================================
 pipe_txt2img = None
 model_loaded = False
 load_error = None
+def _set_attention_backend_best_effort(p):
     try:
+        if hasattr(p, "transformer") and hasattr(p.transformer, "set_attention_backend"):
+            p.transformer.set_attention_backend(ATTENTION_BACKEND)
     except Exception:
         pass
+def _compile_best_effort(p):
+    if not (ENABLE_COMPILE and device.type == "cuda"):
+        return
+    try:
+        if hasattr(p, "transformer"):
+            p.transformer = torch.compile(
+                p.transformer,
                 mode="max-autotune-no-cudagraphs",
                 fullgraph=False,
             )
     except Exception:
         pass
+if ZIMAGE_AVAILABLE:
     try:
+        fp_kwargs = {
+            "torch_dtype": dtype,
+            "use_safetensors": True,
+        }
+        if HF_TOKEN:
+            fp_kwargs["token"] = HF_TOKEN
+        pipe_txt2img = ZImagePipeline.from_pretrained(MODEL_PATH, **fp_kwargs).to(device)
+        _set_attention_backend_best_effort(pipe_txt2img)
+        _compile_best_effort(pipe_txt2img)
+        try:
+            pipe_txt2img.set_progress_bar_config(disable=True)
+        except Exception:
+            pass
+        # Share weights/components with img2img pipeline
+        pipe_img2img = ZImageImg2ImgPipeline(**pipe_txt2img.components).to(device)
+        _set_attention_backend_best_effort(pipe_img2img)
+        try:
+            pipe_img2img.set_progress_bar_config(disable=True)
+        except Exception:
+            pass
+        model_loaded = True
+    except Exception as e:
+        load_error = repr(e)
+        model_loaded = False
+else:
+    load_error = (
+        "Z-Image pipelines not available in your diffusers install.\n\n"
+        f"Import error:\n{ZIMAGE_IMPORT_ERROR}\n\n"
+        "Fix: set requirements.txt to diffusers==0.36.0 (or install Diffusers from source)."
+    )
     model_loaded = False
 # ============================================================
         img = img.resize((width, height), Image.LANCZOS)
     return img
+def _call_pipeline(pipe, kwargs: dict):
+    """
+    Robust call: only pass kwargs the pipeline actually accepts.
+    This avoids crashes if a particular build does not support negative_prompt, etc.
+    """
+    try:
+        sig = inspect.signature(pipe.__call__)
+        allowed = set(sig.parameters.keys())
+        filtered = {k: v for k, v in kwargs.items() if k in allowed and v is not None}
+        return pipe(**filtered)
+    except Exception:
+        # Fallback: try raw kwargs (some pipelines use **kwargs internally)
+        return pipe(**{k: v for k, v in kwargs.items() if v is not None})
 # ============================================================
 # Inference
 # ============================================================
     init_image = prep_init_image(init_image, width, height)
+    # Update scheduler (shift) per run
+    try:
+        scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=float(shift))
+        pipe_txt2img.scheduler = scheduler
+        pipe_img2img.scheduler = scheduler
+    except Exception:
+        pass
     try:
+        base_kwargs = dict(
             prompt=prompt,
             height=height,
             width=width,
             generator=generator,
             max_sequence_length=msl,
         )
+        # only passed if supported by the pipeline
         if neg is not None:
+            base_kwargs["negative_prompt"] = neg
         with torch.inference_mode():
             if device.type == "cuda":
                 with torch.autocast("cuda", dtype=dtype):
                     if init_image is not None:
+                        out = _call_pipeline(
+                            pipe_img2img,
+                            {**base_kwargs, "image": init_image, "strength": st},
+                        )
                     else:
+                        out = _call_pipeline(pipe_txt2img, base_kwargs)
             else:
                 if init_image is not None:
+                    out = _call_pipeline(
+                        pipe_img2img,
+                        {**base_kwargs, "image": init_image, "strength": st},
+                    )
                 else:
+                    out = _call_pipeline(pipe_txt2img, base_kwargs)
         img = out.images[0]
         return img, status
         return _infer_impl(*args, **kwargs)
 # ============================================================
+# UI (simple black style like your SDXL example)
 # ============================================================
 CSS = """