Spaces:

Skywork
/

Unipic3

Running on Zero

App Files Files Community

OrlandoHugBot commited on Jan 27

Commit

96e3744

verified ·

1 Parent(s): 3fe273f

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -2

app.py CHANGED Viewed

@@ -144,6 +144,51 @@ pipe.to('cuda')
 print("✅ Models loaded successfully!")
 # ============================================================
 # GPU 推理函数（只包含实际的推理逻辑）
 # ============================================================
@@ -164,10 +209,23 @@ def generate_image(
     print(f"   Prompt: {prompt[:50]}...")
     print(f"   Steps: {num_steps}, CFG: {true_cfg_scale}, Seed: {seed}")
-    # 关键修复：在真实 GPU 环境中确保所有张量（包括 buffer）都在 GPU 上
-    # ZeroGPU 可能没有正确移动 register_buffer 注册的张量
     pipe.to('cuda')
     # 调试信息：检查模型设备
     print(f"   [DEBUG] text_encoder device: {next(pipe.text_encoder.parameters()).device}")
     print(f"   [DEBUG] transformer device: {next(pipe.transformer.parameters()).device}")

 print("✅ Models loaded successfully!")
+def fix_rope_buffers(model):
+    """
+    修复 RoPE (Rotary Position Embedding) 中的 buffer 张量
+    ZeroGPU 环境下，register_buffer 注册的张量可能不会被正确移动到 GPU
+    这个函数会遍历模型的所有子模块，检查并修复以下 buffer：
+    - inv_freq: RoPE 的核心频率 buffer
+    - cos_cached / sin_cached: 某些实现会缓存的 cos/sin 值
+    - 其他所有未在 CUDA 上的 buffer
+    """
+    device = 'cuda'
+    fixed_count = 0
+    for name, module in model.named_modules():
+        # 修复 inv_freq buffer (RoPE 的核心 buffer)
+        if hasattr(module, 'inv_freq') and module.inv_freq is not None:
+            if module.inv_freq.device.type != 'cuda':
+                module.inv_freq = module.inv_freq.to(device)
+                fixed_count += 1
+                print(f"   [FIX] Moved {name}.inv_freq to {device}")
+        # 修复 cos_cached 和 sin_cached (某些 RoPE 实现会缓存这些)
+        if hasattr(module, 'cos_cached') and module.cos_cached is not None:
+            if module.cos_cached.device.type != 'cuda':
+                module.cos_cached = module.cos_cached.to(device)
+                fixed_count += 1
+                print(f"   [FIX] Moved {name}.cos_cached to {device}")
+        if hasattr(module, 'sin_cached') and module.sin_cached is not None:
+            if module.sin_cached.device.type != 'cuda':
+                module.sin_cached = module.sin_cached.to(device)
+                fixed_count += 1
+                print(f"   [FIX] Moved {name}.sin_cached to {device}")
+        # 通用：修复所有 buffer（更全面的修复）
+        for buf_name, buf in module.named_buffers(recurse=False):
+            if buf is not None and buf.device.type != 'cuda':
+                setattr(module, buf_name, buf.to(device))
+                fixed_count += 1
+                print(f"   [FIX] Moved {name}.{buf_name} to {device}")
+    return fixed_count
 # ============================================================
 # GPU 推理函数（只包含实际的推理逻辑）
 # ============================================================
     print(f"   Prompt: {prompt[:50]}...")
     print(f"   Steps: {num_steps}, CFG: {true_cfg_scale}, Seed: {seed}")
+   # Step 1: 移动 pipeline 到 CUDA
     pipe.to('cuda')
+    # Step 2: 关键修复 - 手动修复 RoPE buffer
+    # ZeroGPU 可能没有正确移动 register_buffer 注册的张量
+    print("   [DEBUG] Fixing RoPE buffers...")
+    fixed = 0
+    fixed += fix_rope_buffers(pipe.text_encoder)
+    fixed += fix_rope_buffers(pipe.transformer)
+    fixed += fix_rope_buffers(pipe.vae)
+    print(f"   [DEBUG] Fixed {fixed} buffer(s)")
+    # 调试信息：检查模型设备
+    print(f"   [DEBUG] text_encoder device: {next(pipe.text_encoder.parameters()).device}")
+    print(f"   [DEBUG] transformer device: {next(pipe.transformer.parameters()).device}")
+    print(f"   [DEBUG] vae device: {next(pipe.vae.parameters()).device}")
     # 调试信息：检查模型设备
     print(f"   [DEBUG] text_encoder device: {next(pipe.text_encoder.parameters()).device}")
     print(f"   [DEBUG] transformer device: {next(pipe.transformer.parameters()).device}")