Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

App Files Files Community

Tianshuo-Xu commited on 21 days ago

Commit

39d3dc3

1 Parent(s): 5a8be65

improve gradio progress stages and percentages

Browse files

Files changed (1) hide show

app.py +21 -4

app.py CHANGED Viewed

@@ -254,11 +254,13 @@ def _get_generation_duration(text, font, author, num_steps, start_seed, num_imag
 @spaces.GPU(duration=_get_generation_duration)
-def run_generation(text, font, author, num_steps, start_seed, num_images):
     """
     Load model, apply FP8 quantization, and generate images.
     All in one GPU session to avoid redundant loading.
     """
     # Enable CUDA optimizations inside the worker
     try:
         torch.backends.cuda.matmul.allow_tf32 = True
@@ -271,16 +273,22 @@ def run_generation(text, font, author, num_steps, start_seed, num_images):
         pass
     # Step 1: Load model
     global generator
     if generator is None:
         logger.info("Initializing generator lazily inside GPU worker...")
         generator = init_generator()
     logger.info("Using initialized generator in ZeroGPU worker.")
     gen = generator
     # ZeroGPU automatically maps these to the acquired GPU during execution.
     # We must also correctly update internal Python attributes so runtime-generated latents go to GPU.
     target_device = torch.device("cuda")
     gen.device = target_device
     if hasattr(gen, "sampler") and gen.sampler is not None:
         gen.sampler.device = target_device
@@ -289,6 +297,7 @@ def run_generation(text, font, author, num_steps, start_seed, num_images):
     gen.clip.to(target_device)
     gen.t5.to(target_device)
     gen.vae.to(target_device)
     # Step 2: Since we reverted to bf16 load to avoid PyTorch native dtype mix issues, skip wrapping
     logger.info("Model weights decompressed to bfloat16 upon load. Skipping dynamic quantization to ensure stability.")
@@ -298,6 +307,8 @@ def run_generation(text, font, author, num_steps, start_seed, num_images):
     results = []
     seeds_used = []
     for i in range(num_images):
         current_seed = start_seed + i
         result_img, cond_img = gen.generate(
             text=text, font_style=font, author=author,
@@ -308,6 +319,7 @@ def run_generation(text, font, author, num_steps, start_seed, num_images):
         seeds_used.append(current_seed)
         logger.info(f"  Generated image {i+1}/{num_images}")
     return results, seeds_used
@@ -338,14 +350,19 @@ def interactive_session(
     author = author_dropdown if author_dropdown != "None (Synthetic / 合成风格)" else None
     # Run generation (includes model loading + FP8 quantization + generation)
-    yield "⏳ 加载模型并生成中... (首次需要1-2分钟编译) / Loading & generating... (first run ~1-2 min)", []
-    progress(0.2, desc="处理中...")
     # Hardcode num_steps to 4 for DMD distillation
     num_steps = 4
     results, seeds_used = run_generation(
-        text, font, author, num_steps, start_seed, num_images
     )
     progress(1.0, desc="完成!")

 @spaces.GPU(duration=_get_generation_duration)
+def run_generation(text, font, author, num_steps, start_seed, num_images, progress=gr.Progress()):
     """
     Load model, apply FP8 quantization, and generate images.
     All in one GPU session to avoid redundant loading.
     """
+    progress(0.25, desc="准备 GPU 环境 / Preparing GPU runtime...")
     # Enable CUDA optimizations inside the worker
     try:
         torch.backends.cuda.matmul.allow_tf32 = True
         pass
     # Step 1: Load model
+    progress(0.35, desc="检查模型状态 / Checking model state...")
     global generator
     if generator is None:
         logger.info("Initializing generator lazily inside GPU worker...")
+        progress(0.45, desc="首次初始化模型 / First-time model initialization...")
         generator = init_generator()
+        progress(0.65, desc="模型初始化完成 / Model initialization complete")
+    else:
+        progress(0.55, desc="复用已初始化模型 / Reusing initialized model")
     logger.info("Using initialized generator in ZeroGPU worker.")
     gen = generator
     # ZeroGPU automatically maps these to the acquired GPU during execution.
     # We must also correctly update internal Python attributes so runtime-generated latents go to GPU.
     target_device = torch.device("cuda")
+    progress(0.72, desc="迁移模型到 GPU / Moving model to GPU...")
     gen.device = target_device
     if hasattr(gen, "sampler") and gen.sampler is not None:
         gen.sampler.device = target_device
     gen.clip.to(target_device)
     gen.t5.to(target_device)
     gen.vae.to(target_device)
+    progress(0.82, desc="模型就绪，开始生成 / Model ready, starting generation...")
     # Step 2: Since we reverted to bf16 load to avoid PyTorch native dtype mix issues, skip wrapping
     logger.info("Model weights decompressed to bfloat16 upon load. Skipping dynamic quantization to ensure stability.")
     results = []
     seeds_used = []
     for i in range(num_images):
+        loop_progress = 0.82 + ((i + 1) / max(num_images, 1)) * 0.16
+        progress(loop_progress, desc=f"生成第 {i+1}/{num_images} 张 / Generating {i+1}/{num_images}")
         current_seed = start_seed + i
         result_img, cond_img = gen.generate(
             text=text, font_style=font, author=author,
         seeds_used.append(current_seed)
         logger.info(f"  Generated image {i+1}/{num_images}")
+    progress(1.0, desc="生成完成 / Generation complete")
     return results, seeds_used
     author = author_dropdown if author_dropdown != "None (Synthetic / 合成风格)" else None
     # Run generation (includes model loading + FP8 quantization + generation)
+    yield "⏳ 队列中：准备任务... / Queued: preparing task...", []
+    progress(0.05, desc="校验输入参数 / Validating inputs...")
+    yield "⏳ 输入已通过校验，等待 GPU 分配... / Inputs validated, waiting for GPU allocation...", []
+    progress(0.15, desc="等待 GPU 资源 / Waiting for GPU allocation...")
     # Hardcode num_steps to 4 for DMD distillation
     num_steps = 4
+    yield "⏳ 已分配 GPU，正在初始化与生成... / GPU allocated, initializing and generating...", []
+    progress(0.22, desc="进入生成阶段 / Entering generation stage...")
     results, seeds_used = run_generation(
+        text, font, author, num_steps, start_seed, num_images, progress
     )
     progress(1.0, desc="完成!")