Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

TSXu commited on Jan 30

Commit

6e8caef

1 Parent(s): a8c4850

Split model loading and generation for better progress visibility

- Added load_and_optimize_model() for step 1 (model loading + FP8)
- Separated run_generation() for step 2 (actual inference)
- Added warning about first-run compilation and retry hint
- Updated progress messages to show current step

Files changed (1) hide show

app.py +40 -11

app.py CHANGED Viewed

@@ -213,23 +213,37 @@ def parse_font_style(font_style: str) -> str:
     return None
 def _get_generation_duration(text, font, author, num_steps, start_seed, num_images):
-    """Calculate dynamic GPU duration: 20s loading + 1.5s per step per image"""
-    return 40 + int(2 * num_steps * num_images)
 @spaces.GPU(duration=_get_generation_duration)
 def run_generation(text, font, author, num_steps, start_seed, num_images):
     """
-    Run generation with FA3 + FP8 quantization.
-    No AOT cache - quantization applied fresh each session.
     """
     gen = init_generator()
-    # Apply FP8 quantization (works with FA3)
-    logger.info("Applying FP8 quantization to model...")
     quantize_(gen.model, Float8DynamicActivationFloat8WeightConfig())
-    logger.info("✓ FP8 quantization applied! Running with FA3 + FP8")
     results = []
     seeds_used = []
@@ -255,7 +269,8 @@ def interactive_session(
     progress=gr.Progress()
 ):
     """
-    Interactive session with FA3 + bf16.
     """
     # Validate text
     if len(text) < 1:
@@ -271,8 +286,14 @@ def interactive_session(
     # Determine author
     author = author_dropdown if author_dropdown != "None (Synthetic / 合成风格)" else None
-    # Run generation with FA3 + FP8
-    yield f"🎨 生成 {num_images} 张图片 (FA3 + FP8)...", []
     progress(0.3, desc="生成中...")
     results, seeds_used = run_generation(
@@ -368,7 +389,15 @@ with gr.Blocks(title="UniCalli - Chinese Calligraphy Generator / 中国书法生
         with gr.Column(scale=1):
             # Output section
             gr.Markdown("### 🖼️ 生成结果 / Generated Results")
-            gr.Markdown("*点击图片可放大查看 / Click image to enlarge*")
             output_gallery = gr.Gallery(
                 label="生成结果 / Generated Results",

     return None
+@spaces.GPU(duration=60)
+def load_and_optimize_model():
+    """
+    Step 1: Load model and apply FP8 quantization.
+    This triggers torch compilation on first run (may take 1-2 minutes).
+    """
+    gen = init_generator()
+    # Apply FP8 quantization (works with FA3)
+    logger.info("Applying FP8 quantization to model...")
+    quantize_(gen.model, Float8DynamicActivationFloat8WeightConfig())
+    logger.info("✓ FP8 quantization applied! Running with FA3 + FP8")
+    return "ready"
 def _get_generation_duration(text, font, author, num_steps, start_seed, num_images):
+    """Calculate dynamic GPU duration: 2s per step per image"""
+    return 20 + int(2 * num_steps * num_images)
 @spaces.GPU(duration=_get_generation_duration)
 def run_generation(text, font, author, num_steps, start_seed, num_images):
     """
+    Step 2: Run actual generation.
+    Model should already be loaded from step 1.
     """
     gen = init_generator()
+    # Re-apply FP8 quantization (ZeroGPU sessions are isolated)
     quantize_(gen.model, Float8DynamicActivationFloat8WeightConfig())
     results = []
     seeds_used = []
     progress=gr.Progress()
 ):
     """
+    Interactive session with FA3 + FP8.
+    Split into load + generate steps for better progress visibility.
     """
     # Validate text
     if len(text) < 1:
     # Determine author
     author = author_dropdown if author_dropdown != "None (Synthetic / 合成风格)" else None
+    # Step 1: Load and optimize model
+    yield "⏳ 加载模型中... (首次可能需要1-2分钟编译) / Loading model... (first run may take 1-2 min to compile)", []
+    progress(0.1, desc="加载模型...")
+    load_and_optimize_model()
+    # Step 2: Run generation
+    yield f"🎨 生成 {num_images} 张图片中... / Generating {num_images} images...", []
     progress(0.3, desc="生成中...")
     results, seeds_used = run_generation(
         with gr.Column(scale=1):
             # Output section
             gr.Markdown("### 🖼️ 生成结果 / Generated Results")
+            gr.Markdown("""
+⚠️ **首次生成说明 / First Run Note:**
+- 第一次生成会触发 PyTorch 编译，可能需要 1-2 分钟
+- 如果遇到错误，请**再点一次生成按钮**即可正常运行
+- First generation triggers PyTorch compilation (~1-2 min)
+- If you see an error, just **click generate again** and it will work
+*点击图片可放大查看 / Click image to enlarge*
+""")
             output_gallery = gr.Gallery(
                 label="生成结果 / Generated Results",