CocoBro commited on
Commit
f3f0643
·
1 Parent(s): 3d58ae6
Files changed (1) hide show
  1. app.py +69 -59
app.py CHANGED
@@ -270,76 +270,86 @@ def run_edit(
270
  ) -> Tuple[Optional[str], str]:
271
  import torch
272
 
 
273
  if audio_file is None or not Path(audio_file).exists():
274
  return None, "Error: please upload an audio file."
275
-
276
  caption = (caption or "").strip()
277
  if not caption:
278
  return None, "Error: caption is empty."
279
 
280
- # 1) 取 CPU 缓存
 
281
  model_cpu, scheduler, target_sr = load_pipeline_cpu()
282
 
 
 
283
  try:
 
284
  if not torch.cuda.is_available():
285
- return None, "Error: ZeroGPU did not allocate CUDA. Please retry or restart Space."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  except Exception as e:
288
- logger.exception("run_edit failed")
289
- return None, f"Error: {type(e).__name__}: {e}"
290
-
291
- # 2) ZeroGPU 进入 GPU 区域后,cuda 才会 available
292
- if not torch.cuda.is_available():
293
- return None, "Error: ZeroGPU did not allocate CUDA. Please retry or check Space hardware."
294
-
295
- device = torch.device("cuda")
296
- logger.info(f"[GPU] torch.cuda.is_available={torch.cuda.is_available()}, device={device}")
297
-
298
- # 3) 把模型搬到 GPU(临时)
299
- model = model_cpu.to(device).eval()
300
-
301
- # seed
302
- seed = int(seed)
303
- torch.manual_seed(seed)
304
- np.random.seed(seed)
305
-
306
- # audio preprocess
307
- wav = load_and_process_audio(audio_file, target_sr=target_sr).to(device)
308
-
309
- batch = {
310
- "audio_id": [Path(audio_file).stem],
311
- "content": [{"audio": wav, "caption": caption}],
312
- "task": ["audio_editing"],
313
- }
314
-
315
- kwargs = {
316
- "num_steps": int(num_steps),
317
- "guidance_scale": float(guidance_scale),
318
- "guidance_rescale": float(guidance_rescale),
319
- "use_gt_duration": False,
320
- "mask_time_aligned_content": False,
321
- }
322
- kwargs.update(batch)
323
-
324
- t0 = time.time()
325
- with torch.no_grad():
326
- with amp_autocast(device):
327
- out = model.inference(scheduler=scheduler, **kwargs)
328
- dt = time.time() - t0
329
-
330
- out_audio = out[0, 0].detach().float().cpu().numpy()
331
- out_path = OUTPUT_DIR / f"{Path(audio_file).stem}_edited.wav"
332
- sf.write(str(out_path), out_audio, samplerate=target_sr)
333
-
334
- # 4) 推完立刻把模型搬回 CPU(避免缓存残留 cuda tensor)
335
- model_cpu = model.to("cpu")
336
- del model
337
- torch.cuda.empty_cache()
338
-
339
- cache_key = f"{MMEDIT_REPO_ID}@{MMEDIT_REVISION}::{QWEN_REPO_ID}@{QWEN_REVISION}"
340
- _PIPELINE_CACHE[cache_key] = (model_cpu, scheduler, target_sr)
341
-
342
- return str(out_path), f"OK | saved={out_path.name} | time={dt:.2f}s | sr={target_sr} | seed={seed}"
343
 
344
 
345
  # ---------------------------------------------------------
@@ -358,7 +368,7 @@ def build_demo():
358
  gr.Examples(
359
  label="example inputs",
360
  examples=[
361
- ["./Ym8O802VvJes.wav", "Mix in dog barking in the middle."],
362
  ],
363
  inputs=[audio_in, caption],
364
  cache_examples=False,
 
270
  ) -> Tuple[Optional[str], str]:
271
  import torch
272
 
273
+ # 1. 基础检查
274
  if audio_file is None or not Path(audio_file).exists():
275
  return None, "Error: please upload an audio file."
276
+
277
  caption = (caption or "").strip()
278
  if not caption:
279
  return None, "Error: caption is empty."
280
 
281
+ # 2. 获取缓存模型
282
+ # 注意:此时 model_cpu 在 CPU 上
283
  model_cpu, scheduler, target_sr = load_pipeline_cpu()
284
 
285
+ # 使用 try-finally 确保无论是否出错,最后都把模型搬回 CPU
286
+ # 使用 try-except 确保捕获所有推理错误,打印日志
287
  try:
288
+ # --- 检查 GPU ---
289
  if not torch.cuda.is_available():
290
+ return None, "Error: ZeroGPU did not allocate CUDA."
291
+
292
+ device = torch.device("cuda")
293
+ logger.info(f"[GPU] Assigned device: {device}")
294
+
295
+ # --- 关键修改:模型上 GPU ---
296
+ # model_cpu.to(device) 是原位操作!会修改全局缓存!
297
+ # 所以必须在 finally 里搬回去,或者在这里使用深拷贝(深拷贝太慢,建议搬回去)
298
+ model = model_cpu.to(device).eval()
299
+
300
+ # --- 数据预处理 ---
301
+ seed = int(seed)
302
+ torch.manual_seed(seed)
303
+ np.random.seed(seed)
304
+
305
+ # 加载音频并转到 GPU
306
+ wav = load_and_process_audio(audio_file, target_sr=target_sr).to(device)
307
+
308
+ batch = {
309
+ "audio_id": [Path(audio_file).stem],
310
+ "content": [{"audio": wav, "caption": caption}],
311
+ "task": ["audio_editing"],
312
+ }
313
+
314
+ kwargs = {
315
+ "num_steps": int(num_steps),
316
+ "guidance_scale": float(guidance_scale),
317
+ "guidance_rescale": float(guidance_rescale),
318
+ "use_gt_duration": False,
319
+ "mask_time_aligned_content": False,
320
+ }
321
+ kwargs.update(batch)
322
+
323
+ # --- 推理 ---
324
+ t0 = time.time()
325
+ with torch.no_grad():
326
+ with amp_autocast(device):
327
+ # 这里的报错现在能被捕获了
328
+ out = model.inference(scheduler=scheduler, **kwargs)
329
+ dt = time.time() - t0
330
+
331
+ # --- 后处理 ---
332
+ out_audio = out[0, 0].detach().float().cpu().numpy()
333
+ out_path = OUTPUT_DIR / f"{Path(audio_file).stem}_edited.wav"
334
+ sf.write(str(out_path), out_audio, samplerate=target_sr)
335
+
336
+ return str(out_path), f"OK | time={dt:.2f}s | seed={seed}"
337
 
338
  except Exception as e:
339
+ # 这里会打印完整的堆栈信息,让你看到真正的报错原因
340
+ logger.exception("Error during inference")
341
+ return None, f"Runtime Error: {str(e)}"
342
+
343
+ finally:
344
+ # --- 关键修改:清理现场 ---
345
+ # 无论 try 里面是否成功,这里都会执行
346
+ # 必须把模型搬回 CPU,否则全局缓存 _PIPELINE_CACHE 将指向损坏的 CUDA 地址
347
+ if 'model_cpu' in locals() and model_cpu is not None:
348
+ logger.info("Moving model back to CPU to preserve cache integrity...")
349
+ model_cpu.to("cpu")
350
+
351
+ # 强制清理显存
352
+ torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
 
355
  # ---------------------------------------------------------
 
368
  gr.Examples(
369
  label="example inputs",
370
  examples=[
371
+ ["./Ym8O802VvJes.wav", "Mix in dog barking around the middle."],
372
  ],
373
  inputs=[audio_in, caption],
374
  cache_examples=False,