Spaces:

CocoBro
/

MMEdit

Sleeping

App Files Files Community

CocoBro commited on Dec 22, 2025

Commit

f3f0643

1 Parent(s): 3d58ae6

test

Browse files

Files changed (1) hide show

app.py +69 -59

app.py CHANGED Viewed

@@ -270,76 +270,86 @@ def run_edit(
 ) -> Tuple[Optional[str], str]:
     import torch
     if audio_file is None or not Path(audio_file).exists():
         return None, "Error: please upload an audio file."
     caption = (caption or "").strip()
     if not caption:
         return None, "Error: caption is empty."
-    # 1) 取 CPU 缓存
     model_cpu, scheduler, target_sr = load_pipeline_cpu()
     try:
         if not torch.cuda.is_available():
-            return None, "Error: ZeroGPU did not allocate CUDA. Please retry or restart Space."
     except Exception as e:
-        logger.exception("run_edit failed")
-        return None, f"Error: {type(e).__name__}: {e}"
-    # 2) ZeroGPU 进入 GPU 区域后，cuda 才会 available
-    if not torch.cuda.is_available():
-        return None, "Error: ZeroGPU did not allocate CUDA. Please retry or check Space hardware."
-    device = torch.device("cuda")
-    logger.info(f"[GPU] torch.cuda.is_available={torch.cuda.is_available()}, device={device}")
-    # 3) 把模型搬到 GPU（临时）
-    model = model_cpu.to(device).eval()
-    # seed
-    seed = int(seed)
-    torch.manual_seed(seed)
-    np.random.seed(seed)
-    # audio preprocess
-    wav = load_and_process_audio(audio_file, target_sr=target_sr).to(device)
-    batch = {
-        "audio_id": [Path(audio_file).stem],
-        "content": [{"audio": wav, "caption": caption}],
-        "task": ["audio_editing"],
-    }
-    kwargs = {
-        "num_steps": int(num_steps),
-        "guidance_scale": float(guidance_scale),
-        "guidance_rescale": float(guidance_rescale),
-        "use_gt_duration": False,
-        "mask_time_aligned_content": False,
-    }
-    kwargs.update(batch)
-    t0 = time.time()
-    with torch.no_grad():
-        with amp_autocast(device):
-            out = model.inference(scheduler=scheduler, **kwargs)
-    dt = time.time() - t0
-    out_audio = out[0, 0].detach().float().cpu().numpy()
-    out_path = OUTPUT_DIR / f"{Path(audio_file).stem}_edited.wav"
-    sf.write(str(out_path), out_audio, samplerate=target_sr)
-    # 4) 推完立刻把模型搬回 CPU（避免缓存残留 cuda tensor）
-    model_cpu = model.to("cpu")
-    del model
-    torch.cuda.empty_cache()
-    cache_key = f"{MMEDIT_REPO_ID}@{MMEDIT_REVISION}::{QWEN_REPO_ID}@{QWEN_REVISION}"
-    _PIPELINE_CACHE[cache_key] = (model_cpu, scheduler, target_sr)
-    return str(out_path), f"OK | saved={out_path.name} | time={dt:.2f}s | sr={target_sr} | seed={seed}"
 # ---------------------------------------------------------
@@ -358,7 +368,7 @@ def build_demo():
                 gr.Examples(
                     label="example inputs",
                     examples=[
-                        ["./Ym8O802VvJes.wav", "Mix in dog barking in the middle."],
                     ],
                     inputs=[audio_in, caption],
                     cache_examples=False,

 ) -> Tuple[Optional[str], str]:
     import torch
+    # 1. 基础检查
     if audio_file is None or not Path(audio_file).exists():
         return None, "Error: please upload an audio file."
     caption = (caption or "").strip()
     if not caption:
         return None, "Error: caption is empty."
+    # 2. 获取缓存模型
+    # 注意：此时 model_cpu 在 CPU 上
     model_cpu, scheduler, target_sr = load_pipeline_cpu()
+    # 使用 try-finally 确保无论是否出错，最后都把模型搬回 CPU
+    # 使用 try-except 确保捕获所有推理错误，打印日志
     try:
+        # --- 检查 GPU ---
         if not torch.cuda.is_available():
+            return None, "Error: ZeroGPU did not allocate CUDA."
+        device = torch.device("cuda")
+        logger.info(f"[GPU] Assigned device: {device}")
+        # --- 关键修改：模型上 GPU ---
+        # model_cpu.to(device) 是原位操作！会修改全局缓存！
+        # 所以必须在 finally 里搬回去，或者在这里使用深拷贝（深拷贝太慢，建议搬回去）
+        model = model_cpu.to(device).eval()
+        # --- 数据预处理 ---
+        seed = int(seed)
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        # 加载音频并转到 GPU
+        wav = load_and_process_audio(audio_file, target_sr=target_sr).to(device)
+        batch = {
+            "audio_id": [Path(audio_file).stem],
+            "content": [{"audio": wav, "caption": caption}],
+            "task": ["audio_editing"],
+        }
+        kwargs = {
+            "num_steps": int(num_steps),
+            "guidance_scale": float(guidance_scale),
+            "guidance_rescale": float(guidance_rescale),
+            "use_gt_duration": False,
+            "mask_time_aligned_content": False,
+        }
+        kwargs.update(batch)
+        # --- 推理 ---
+        t0 = time.time()
+        with torch.no_grad():
+            with amp_autocast(device):
+                # 这里的报错现在能被捕获了
+                out = model.inference(scheduler=scheduler, **kwargs)
+        dt = time.time() - t0
+        # --- 后处理 ---
+        out_audio = out[0, 0].detach().float().cpu().numpy()
+        out_path = OUTPUT_DIR / f"{Path(audio_file).stem}_edited.wav"
+        sf.write(str(out_path), out_audio, samplerate=target_sr)
+        return str(out_path), f"OK | time={dt:.2f}s | seed={seed}"
     except Exception as e:
+        # 这里会打印完整的堆栈信息，让你看到真正的报错原因
+        logger.exception("Error during inference")
+        return None, f"Runtime Error: {str(e)}"
+    finally:
+        # --- 关键修改：清理现场 ---
+        # 无论 try 里面是否成功，这里都会执行
+        # 必须把模型搬回 CPU，否则全局缓存 _PIPELINE_CACHE 将指向损坏的 CUDA 地址
+        if 'model_cpu' in locals() and model_cpu is not None:
+            logger.info("Moving model back to CPU to preserve cache integrity...")
+            model_cpu.to("cpu")
+        # 强制清理显存
+        torch.cuda.empty_cache()
 # ---------------------------------------------------------
                 gr.Examples(
                     label="example inputs",
                     examples=[
+                        ["./Ym8O802VvJes.wav", "Mix in dog barking around the middle."],
                     ],
                     inputs=[audio_in, caption],
                     cache_examples=False,