Z_image_N

Runtime error

yingzhac-research commited on Jan 25

Commit

9fc4b86

1 Parent(s): f89593c

Lazy-load pipeline in ZeroGPU worker

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,25 +2,34 @@ import torch
 import spaces
 import gradio as gr
 from diffusers import DiffusionPipeline
 MAX_SEED = 2**32 - 1
-# Load the pipeline once at startup
-print("Loading Z-Image-Turbo pipeline...")
-pipe = DiffusionPipeline.from_pretrained(
-    "Tongyi-MAI/Z-Image-Turbo",
-    torch_dtype=torch.bfloat16,
-    low_cpu_mem_usage=False,
-)
-pipe.to("cuda")
 '# ======== AoTI compilation + FA3 ======== (disabled on HF to avoid outdated AOTI/FA3 package errors)'
 # pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
 # spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")
-print("Pipeline loaded!")
-@spaces.GPU
 def generate_image(
     prompt,
     negative_prompt,
@@ -33,6 +42,10 @@ def generate_image(
     progress=gr.Progress(track_tqdm=True),
 ):
     """Generate 4 images with seeds: seed, 2x, 3x, 4x (mod MAX_SEED)."""
     if randomize_seed:
         seed = torch.randint(0, MAX_SEED, (1,)).item()

 import spaces
 import gradio as gr
 from diffusers import DiffusionPipeline
+from threading import Lock
 MAX_SEED = 2**32 - 1
+pipe = None
+pipe_lock = Lock()
+def get_pipe():
+    global pipe
+    if pipe is not None:
+        return pipe
+    with pipe_lock:
+        if pipe is None:
+            # Load the pipeline lazily inside the ZeroGPU worker
+            print("Loading Z-Image-Turbo pipeline...")
+            pipe = DiffusionPipeline.from_pretrained(
+                "Tongyi-MAI/Z-Image-Turbo",
+                torch_dtype=torch.bfloat16,
+                low_cpu_mem_usage=False,
+            ).to("cuda")
+            print("Pipeline loaded!")
+    return pipe
 '# ======== AoTI compilation + FA3 ======== (disabled on HF to avoid outdated AOTI/FA3 package errors)'
 # pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
 # spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")
+@spaces.GPU(duration=120)
 def generate_image(
     prompt,
     negative_prompt,
     progress=gr.Progress(track_tqdm=True),
 ):
     """Generate 4 images with seeds: seed, 2x, 3x, 4x (mod MAX_SEED)."""
+    if not torch.cuda.is_available():
+        raise RuntimeError("CUDA is not available inside the ZeroGPU worker.")
+    pipe = get_pipe()
     if randomize_seed:
         seed = torch.randint(0, MAX_SEED, (1,)).item()