Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

TSXu commited on Jan 30

Commit

aa36e12

1 Parent(s): 8c4267f

Load compiled graph in each GPU session

ZeroGPU runs each @spaces.GPU call in a separate session, so the
compiled graph needs to be loaded at the start of each inference.

- run_generation now loads compiled graph from Hub at start
- Compilation only happens once (when no cached graph exists)
- Each inference downloads and applies the cached .pt2 file

Files changed (1) hide show

app.py +23 -13

app.py CHANGED Viewed

@@ -415,12 +415,23 @@ def compile_model_first_time():
     return None
-@spaces.GPU(duration=120)  # 2 minutes for normal generation
 def run_generation(text, font, author, num_steps, start_seed, num_images):
     """
     Run generation with the AOT-compiled model.
     """
-    gen = init_generator()  # Returns the already-compiled generator
     results = []
     seeds_used = []
@@ -466,19 +477,18 @@ def interactive_session(
     # Determine author
     author = author_dropdown if author_dropdown != "None (Synthetic / 合成风格)" else None
-    # Step 1: Load compiled graph (cached) or compile (first time)
     if not _is_optimized:
-        if _check_compiled_graph_exists():
-            yield "⏳ 加载已缓存的编译模型...", []
-        else:
             yield "⏳ 首次运行，编译优化模型（约5-10分钟，仅此一次）...", []
-        progress(0.1, desc="加载/编译中...")
-        compile_model_first_time()
-        yield "✅ 模型加载完成！", []
-    # Step 2: Run generation (2 min)
-    yield f"🎨 开始生成 {num_images} 张图片...", []
-    progress(0.5, desc="生成中...")
     results, seeds_used = run_generation(
         text, font, author, num_steps, start_seed, num_images

     return None
+@spaces.GPU(duration=180)  # 3 minutes for generation (includes loading compiled graph)
 def run_generation(text, font, author, num_steps, start_seed, num_images):
     """
     Run generation with the AOT-compiled model.
+    Each GPU session loads the cached compiled graph from Hub.
     """
+    gen = init_generator()
+    model = gen.model
+    # Load compiled graph from Hub (fast, ~30s download + load)
+    # This is needed because each @spaces.GPU call is a new session
+    if _check_compiled_graph_exists():
+        logger.info("Loading cached compiled graph for this session...")
+        _load_compiled_graph(model)
+        logger.info("✓ Compiled graph loaded!")
+    else:
+        logger.warning("No compiled graph found on Hub - running unoptimized")
     results = []
     seeds_used = []
     # Determine author
     author = author_dropdown if author_dropdown != "None (Synthetic / 合成风格)" else None
+    # Step 1: First-time compile (only if no cached graph exists)
     if not _is_optimized:
+        if not _check_compiled_graph_exists():
             yield "⏳ 首次运行，编译优化模型（约5-10分钟，仅此一次）...", []
+            progress(0.1, desc="编译中...")
+            compile_model_first_time()
+            yield "✅ 编译完成并已上传缓存！", []
+        _is_optimized = True  # Mark as done (cached graph exists)
+    # Step 2: Run generation (includes loading compiled graph + inference)
+    yield f"🎨 加载编译模型并生成 {num_images} 张图片...", []
+    progress(0.3, desc="加载编译模型...")
     results, seeds_used = run_generation(
         text, font, author, num_steps, start_seed, num_images