Spaces:

CocoBro
/

MMEdit

Sleeping

App Files Files Community

CocoBro commited on Dec 22, 2025

Commit

77f1338

1 Parent(s): 47b5ec4

fix load gpu

Browse files

Files changed (1) hide show

app.py +95 -210

app.py CHANGED Viewed

@@ -172,24 +172,6 @@ def patch_paths_in_exp_config(exp_cfg: Dict[str, Any], repo_root: Path, qwen_roo
 # Scheduler（与你 exp_cfg.model.noise_scheduler_name 对齐）
 # 带 fallback：避免 404
 # ---------------------------------------------------------
-def build_scheduler(exp_cfg: Dict[str, Any]):
-    import diffusers.schedulers as noise_schedulers
-    name = exp_cfg["model"].get("noise_scheduler_name", "stabilityai/stable-diffusion-2-1")
-    try:
-        scheduler = noise_schedulers.DDIMScheduler.from_pretrained(name, subfolder="scheduler", token=HF_TOKEN)
-        return scheduler
-    except Exception as e:
-        logger.warning(f"DDIMScheduler.from_pretrained failed for '{name}', fallback. err={e}")
-        return noise_schedulers.DDIMScheduler(
-            num_train_timesteps=1000,
-            beta_start=0.00085,
-            beta_end=0.012,
-            beta_schedule="scaled_linear",
-            clip_sample=False,
-            set_alpha_to_one=False,
-            steps_offset=1,
-        )
 def amp_autocast(device):
@@ -205,157 +187,88 @@ def amp_autocast(device):
     return torch.autocast("cuda", dtype=dtype, enabled=True)
-# ---------------------------------------------------------
-# 冷启动：load+cache pipeline（缓存 CPU 上的 model）
-# ---------------------------------------------------------
-# def load_pipeline_cpu() -> Tuple[object, object, int]:
-#     # 延迟导入（避免启动阶段触发 CUDA 初始化）
-#     import torch
-#     import hydra
-#     from omegaconf import OmegaConf
-#     from safetensors.torch import load_file
-#     # 你的项目依赖也延迟导入
-#     from models.common import LoadPretrainedBase
-#     from utils.config import register_omegaconf_resolvers
-#     register_omegaconf_resolvers()
-#     cache_key = f"{MMEDIT_REPO_ID}@{MMEDIT_REVISION}::{QWEN_REPO_ID}@{QWEN_REVISION}"
-#     if cache_key in _PIPELINE_CACHE:
-#         return _PIPELINE_CACHE[cache_key]
-#     repo_root, qwen_root = resolve_model_dirs()
-#     assert_repo_layout(repo_root)
-#     logger.info(f"repo_root = {repo_root}")
-#     logger.info(f"qwen_root = {qwen_root}")
-#     exp_cfg = OmegaConf.load(repo_root / "config.yaml")
-#     exp_cfg = OmegaConf.to_container(exp_cfg, resolve=True)
-#     patch_paths_in_exp_config(exp_cfg, repo_root, qwen_root)
-#     logger.info(f"patched pretrained_ckpt = {exp_cfg['model']['autoencoder'].get('pretrained_ckpt')}")
-#     logger.info(f"patched qwen model_path = {exp_cfg['model']['content_encoder']['text_encoder'].get('model_path')}")
-#     model: LoadPretrainedBase = hydra.utils.instantiate(exp_cfg["model"], _convert_="all")
-#     ckpt_path = repo_root / "model.safetensors"
-#     sd = load_file(str(ckpt_path))
-#     model.load_pretrained(sd)
-#     logger.info(f"Model loaded from safetensors: {ckpt_path}")
-#     # ZeroGPU：缓存 CPU 版
-#     model = model.to(torch.device("cpu")).eval()
-#     scheduler = build_scheduler(exp_cfg)
-#     target_sr = int(exp_cfg.get("sample_rate", 24000))
-#     _PIPELINE_CACHE[cache_key] = (model, scheduler, target_sr)
-#     logger.info("CPU pipeline loaded and cached.")
-#     return model, scheduler, target_sr
-def load_pipeline_cpu():
-    # 延迟导入
     import torch
     import hydra
     from omegaconf import OmegaConf
     from safetensors.torch import load_file
-    # 尝试导入项目模块
     try:
         from utils.config import register_omegaconf_resolvers
         register_omegaconf_resolvers()
     except: pass
-    cache_key = f"{MMEDIT_REPO_ID}@{MMEDIT_REVISION}::{QWEN_REPO_ID}@{QWEN_REVISION}"
-    if cache_key in _PIPELINE_CACHE: return _PIPELINE_CACHE[cache_key]
-    repo_root, qwen_root = resolve_model_dirs()
-    # 加载 Config
-    exp_cfg = OmegaConf.to_container(OmegaConf.load(repo_root / "config.yaml"), resolve=True)
-    # 路径修复
-    vae_ckpt = exp_cfg["model"]["autoencoder"].get("pretrained_ckpt", "")
-    if vae_ckpt:
-        potential_paths = [repo_root / "vae" / Path(vae_ckpt).name, repo_root / Path(vae_ckpt).name]
-        for p in potential_paths:
-            if p.exists():
-                exp_cfg["model"]["autoencoder"]["pretrained_ckpt"] = str(p)
-                break
-    exp_cfg["model"]["content_encoder"]["text_encoder"]["model_path"] = str(qwen_root)
-    logger.info("Instantiating model...")
-    model = hydra.utils.instantiate(exp_cfg["model"], _convert_="all")
-    # 加载权重并立即释放 state_dict 内存
-    ckpt_path = str(repo_root / "model.safetensors")
-    logger.info(f"Loading state_dict from {ckpt_path}...")
-    sd = load_file(ckpt_path)
-    logger.info(f"Model loaded from safetensors: {ckpt_path}")
-    model.load_pretrained(sd)
-    del sd  # <--- 关键：立即删除 state_dict 释放 20GB+ 内存
-    gc.collect() # <--- 关键：强制回收
-    # 确保在 CPU
-    model = model.to("cpu").eval()
-    # Scheduler
-    import diffusers.schedulers as noise_schedulers
-    try:
-        scheduler = noise_schedulers.DDIMScheduler.from_pretrained(
-            exp_cfg["model"].get("noise_scheduler_name", "stabilityai/stable-diffusion-2-1"),
-            subfolder="scheduler", token=HF_TOKEN
-        )
-    except:
-        scheduler = noise_schedulers.DDIMScheduler(num_train_timesteps=1000)
-    target_sr = int(exp_cfg.get("sample_rate", 24000))
-    _PIPELINE_CACHE[cache_key] = (model, scheduler, target_sr)
-    return model, scheduler, target_sr
-# ---------------------------------------------------------
-# 推理：audio + caption -> edited audio
-# ZeroGPU：必须用 @spaces.GPU
-# ---------------------------------------------------------
-# ---------------------------------------------------------
-@spaces.GPU
-def run_edit(audio_file, caption, num_steps, guidance_scale, guidance_rescale, seed):
-    import torch
     if not audio_file: return None, "Please upload audio."
-    if not caption: return None, "Please input caption."
-    # 局部变量初始化，防 finally 报错
-    model_cpu = None
-    model_on_gpu = None
     try:
-        # --- 1. 将加载过程放入 try 块保护 ---
-        logger.info("Loading pipeline (CPU)...")
-        model_cpu, scheduler, target_sr = load_pipeline_cpu()
-        # --- 2. 准备 GPU 环境 ---
-        device = torch.device("cuda")
-        dtype = torch.float16
-        if not torch.cuda.is_available():
-            raise RuntimeError("ZeroGPU assigned but CUDA not found!")
-        # --- 3. 搬运 (CPU -> GPU) ---
         gc.collect()
-        torch.cuda.empty_cache()
-        logger.info("Moving model to GPU...")
-        # 原位操作，finally 必须移回
-        model_on_gpu = model_cpu.to(device, dtype=dtype)
-        # --- 4. 数据处理 ---
         torch.manual_seed(int(seed))
         np.random.seed(int(seed))
-        wav = load_and_process_audio(audio_file, target_sr).to(device, dtype=dtype)
         batch = {
             "audio_id": [Path(audio_file).stem],
@@ -368,13 +281,14 @@ def run_edit(audio_file, caption, num_steps, guidance_scale, guidance_rescale, s
             "mask_time_aligned_content": False
         }
-        # --- 5. 推理 ---
-        logger.info("Running inference...")
         t0 = time.time()
-        with torch.no_grad(), torch.autocast("cuda", dtype=dtype):
-            out = model_on_gpu.inference(scheduler=scheduler, **batch)
-        # --- 6. 保存 ---
         out_audio = out[0, 0].detach().float().cpu().numpy()
         out_path = OUTPUT_DIR / f"{Path(audio_file).stem}_edited.wav"
         sf.write(str(out_path), out_audio, samplerate=target_sr)
@@ -382,80 +296,51 @@ def run_edit(audio_file, caption, num_steps, guidance_scale, guidance_rescale, s
         return str(out_path), f"Success | {time.time()-t0:.2f}s"
     except Exception as e:
-        # 🔥 现在你可以看到真正的报错了！
-        err_msg = traceback.format_exc()
-        logger.error(f"❌ ERROR:\n{err_msg}")
-        return None, f"Error: {str(e)}\n(Check Logs for Traceback)"
     finally:
-        # --- 7. 还原现场 ---
-        logger.info("Restoring CPU state...")
-        try:
-            if model_cpu is not None:
-                model_cpu.to("cpu")
-        except Exception as e:
-            logger.error(f"Restore failed: {e}")
-        if model_on_gpu is not None: del model_on_gpu
         torch.cuda.empty_cache()
         gc.collect()
-# ---------------------------------------------------------
 # UI
-# ---------------------------------------------------------
 def build_demo():
-    with gr.Blocks(title="MMEdit (ZeroGPU)") as demo:
-        gr.Markdown("# MMEdit ZeroGPU（audio + caption → edited audio）")
         with gr.Row():
             with gr.Column():
-                audio_in = gr.Audio(label="Input Audio", type="filepath")
-                caption = gr.Textbox(label="Caption (Edit Instruction)", lines=3)
-                # 注意：Space 不建议推大 wav；你可以换成更小的 demo wav
                 gr.Examples(
-                    label="example inputs",
-                    examples=[
-                        ["./Ym8O802VvJes.wav", "Mix in dog barking around the middle."],
-                    ],
                     inputs=[audio_in, caption],
-                    cache_examples=False,
                 )
-                with gr.Row():
-                    num_steps = gr.Slider(1, 100, value=50, step=1, label="num_steps")
-                    guidance_scale = gr.Slider(1.0, 12.0, value=5.0, step=0.5, label="guidance_scale")
                 with gr.Row():
-                    guidance_rescale = gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="guidance_rescale")
-                    seed = gr.Number(value=42, precision=0, label="seed")
-                run_btn = gr.Button("Run Editing", variant="primary")
             with gr.Column():
-                audio_out = gr.Audio(label="Edited Audio", type="filepath")
                 status = gr.Textbox(label="Status")
-        run_btn.click(
-            fn=run_edit,
-            inputs=[audio_in, caption, num_steps, guidance_scale, guidance_rescale, seed],
-            outputs=[audio_out, status],
-        )
-        gr.Markdown(
-            "## 注意事项\n"
-            "1) ZeroGPU 首次点击会分配 GPU，可能稍慢。\n"
-            "2) 如果首次报 cuda 不可用，通常重试一次即可。\n"
-        )
     return demo
 if __name__ == "__main__":
     demo = build_demo()
-    port = int(os.environ.get("PORT", "7860"))
     demo.queue().launch(
-        server_name="0.0.0.0",
-        server_port=port,
-        share=False,
-        ssr_mode=False,
-    )

 # Scheduler（与你 exp_cfg.model.noise_scheduler_name 对齐）
 # 带 fallback：避免 404
 # ---------------------------------------------------------
 def amp_autocast(device):
     return torch.autocast("cuda", dtype=dtype, enabled=True)
+# -----------------------------
+# ZeroGPU 核心任务
+# -----------------------------
+# 学长说的就是这里：所有费资源的操作（加载+推理）都要放在这里面
+@spaces.GPU(duration=150)
+def run_edit(audio_file, caption, num_steps, guidance_scale, guidance_rescale, seed):
+    # 延迟导入，防止全局污染
     import torch
     import hydra
     from omegaconf import OmegaConf
     from safetensors.torch import load_file
+    import diffusers.schedulers as noise_schedulers
+    # 尝试导入项目配置
     try:
         from utils.config import register_omegaconf_resolvers
         register_omegaconf_resolvers()
     except: pass
     if not audio_file: return None, "Please upload audio."
+    # 局部变量，用于 finally 清理
+    model = None
     try:
+        # ==========================================
+        # 1. 就在这里加载模型！利用 ZeroGPU 的大内存
+        # ==========================================
+        logger.info("🚀 Starting ZeroGPU Task...")
+        # 路径准备
+        repo_root, qwen_root = resolve_model_dirs()
+        exp_cfg = OmegaConf.to_container(OmegaConf.load(repo_root / "config.yaml"), resolve=True)
+        # 路径修复逻辑
+        vae_ckpt = exp_cfg["model"]["autoencoder"].get("pretrained_ckpt", "")
+        if vae_ckpt:
+            p1 = repo_root / "vae" / Path(vae_ckpt).name
+            p2 = repo_root / Path(vae_ckpt).name
+            if p1.exists(): exp_cfg["model"]["autoencoder"]["pretrained_ckpt"] = str(p1)
+            elif p2.exists(): exp_cfg["model"]["autoencoder"]["pretrained_ckpt"] = str(p2)
+        exp_cfg["model"]["content_encoder"]["text_encoder"]["model_path"] = str(qwen_root)
+        # 实例化模型 (此时消耗大量 CPU 内存，但 ZeroGPU 环境扛得住)
+        logger.info("Instantiating model (Hydra)...")
+        model = hydra.utils.instantiate(exp_cfg["model"], _convert_="all")
+        # 加载权重
+        ckpt_path = str(repo_root / "model.safetensors")
+        logger.info(f"Loading weights from {ckpt_path}...")
+        sd = load_file(ckpt_path)
+        model.load_pretrained(sd)
+        del sd # 立即释放
         gc.collect()
+        # ==========================================
+        # 2. 立即转到 GPU (FP16)
+        # ==========================================
+        device = torch.device("cuda")
+        logger.info("Moving model to CUDA (FP16)...")
+        # 这一步将模型送入显卡
+        model = model.to(device, dtype=torch.float16).eval()
+        # Scheduler
+        try:
+            scheduler = noise_schedulers.DDIMScheduler.from_pretrained(
+                exp_cfg["model"].get("noise_scheduler_name", ""),
+                subfolder="scheduler", token=HF_TOKEN
+            )
+        except:
+            scheduler = noise_schedulers.DDIMScheduler(num_train_timesteps=1000)
+        # ==========================================
+        # 3. 开始推理
+        # ==========================================
+        target_sr = int(exp_cfg.get("sample_rate", 24000))
         torch.manual_seed(int(seed))
         np.random.seed(int(seed))
+        wav = load_and_process_audio(audio_file, target_sr).to(device, dtype=torch.float16)
         batch = {
             "audio_id": [Path(audio_file).stem],
             "mask_time_aligned_content": False
         }
+        logger.info("Inference running...")
         t0 = time.time()
+        with torch.no_grad(), torch.autocast("cuda", dtype=torch.float16):
+            out = model.inference(scheduler=scheduler, **batch)
+        # ==========================================
+        # 4. 保存结果
+        # ==========================================
         out_audio = out[0, 0].detach().float().cpu().numpy()
         out_path = OUTPUT_DIR / f"{Path(audio_file).stem}_edited.wav"
         sf.write(str(out_path), out_audio, samplerate=target_sr)
         return str(out_path), f"Success | {time.time()-t0:.2f}s"
     except Exception as e:
+        err = traceback.format_exc()
+        logger.error(f"❌ ERROR:\n{err}")
+        return None, f"Runtime Error: {e}"
     finally:
+        # 强制清理，防止下一次任务显存不够
+        logger.info("Cleaning up...")
+        if model is not None: del model
         torch.cuda.empty_cache()
         gc.collect()
+# -----------------------------
 # UI
+# -----------------------------
 def build_demo():
+    with gr.Blocks(title="MMEdit") as demo:
+        gr.Markdown("# MMEdit ZeroGPU (Direct Load)")
         with gr.Row():
             with gr.Column():
+                audio_in = gr.Audio(label="Input", type="filepath")
+                caption = gr.Textbox(label="Instruction", lines=3)
                 gr.Examples(
+                    label="Examples",
+                    examples=[["./Ym8O802VvJes.wav", "Mix in dog barking around the middle."]],
                     inputs=[audio_in, caption],
                 )
                 with gr.Row():
+                    num_steps = gr.Slider(10, 100, 50, step=1, label="Steps")
+                    guidance_scale = gr.Slider(1.0, 12.0, 5.0, step=0.5, label="Guidance")
+                    guidance_rescale = gr.Slider(0.0, 1.0, 0.5, step=0.05, label="Rescale")
+                    seed = gr.Number(42, label="Seed")
+                run_btn = gr.Button("Run", variant="primary")
             with gr.Column():
+                out = gr.Audio(label="Output")
                 status = gr.Textbox(label="Status")
+        run_btn.click(run_edit, [audio_in, caption, num_steps, guidance_scale, guidance_rescale, seed], [out, status])
     return demo
 if __name__ == "__main__":
     demo = build_demo()
+    # 必须 ssr_mode=False
     demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=int(os.environ.get("PORT", 7860)),
+        ssr_mode=False
+    )