Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

App Files Files Community

BoxOfColors commited on 1 day ago

Commit

00ee17c

1 Parent(s): 9f79da8

Add xregen error logging: try/except in _xregen_dispatch, logging in xregen_mmaudio._run and _mmaudio_gpu_infer to surface GPU worker crashes

Browse files

Files changed (1) hide show

app.py +13 -1

app.py CHANGED Viewed

@@ -1045,6 +1045,7 @@ def _mmaudio_gpu_infer(video_file, prompt, negative_prompt, seed_val,
     GPU window (ffmpeg is CPU-safe here).  This avoids passing pre-extracted
     tmp files that don't exist in the GPU worker's process.
     """
     _ensure_syspath("MMAudio")
     from mmaudio.eval_utils        import generate, load_video
     from mmaudio.model.flow_matching   import FlowMatching
@@ -1777,12 +1778,21 @@ def _xregen_dispatch(state_json: str, seg_idx: int, slot_id: str, infer_fn):
         First:  (gr.update(), gr.update(value=pending_html))  — shown while GPU runs
         Second: (gr.update(value=video_path), gr.update(value=waveform_html))
     """
     meta         = json.loads(state_json)
     pending_html = _build_regen_pending_html(meta["segments"], seg_idx, slot_id, "")
     yield gr.update(), gr.update(value=pending_html)
-    new_wav_raw, src_sr, clip_start_s = infer_fn()
     video_path, waveform_html = _xregen_splice(new_wav_raw, src_sr, meta, seg_idx, slot_id, clip_start_s)
     yield gr.update(value=video_path), gr.update(value=waveform_html)
@@ -1835,12 +1845,14 @@ def xregen_mmaudio(seg_idx, state_json, slot_id,
         clip_start, clip_end, clip_dur = _xregen_clip_window(meta, seg_idx, MMAUDIO_WINDOW)
         source_video = _resolve_silent_video(meta)
         sub_segs = _build_segments(clip_dur, MMAUDIO_WINDOW, float(crossfade_s))
         # Pass clip_start_s/clip_dur_s so the GPU fn extracts the clip internally —
         # pre-extracted tmp files are invisible to the ZeroGPU worker process.
         results = _mmaudio_gpu_infer(source_video, prompt, negative_prompt, seed_val,
                                      cfg_strength, num_steps, crossfade_s, crossfade_db, 1,
                                      source_video, json.dumps(sub_segs),
                                      clip_start, clip_dur)
         seg_wavs, sr = results[0]
         wav = _stitch_wavs(seg_wavs, float(crossfade_s), float(crossfade_db),
                            clip_dur, sr, sub_segs)

     GPU window (ffmpeg is CPU-safe here).  This avoids passing pre-extracted
     tmp files that don't exist in the GPU worker's process.
     """
+    print(f"[_mmaudio_gpu_infer] START video={video_file!r} silent={silent_video!r} clip_start={clip_start_s} clip_dur={clip_dur_s} num_samples={num_samples}")
     _ensure_syspath("MMAudio")
     from mmaudio.eval_utils        import generate, load_video
     from mmaudio.model.flow_matching   import FlowMatching
         First:  (gr.update(), gr.update(value=pending_html))  — shown while GPU runs
         Second: (gr.update(value=video_path), gr.update(value=waveform_html))
     """
+    import traceback as _tb
     meta         = json.loads(state_json)
     pending_html = _build_regen_pending_html(meta["segments"], seg_idx, slot_id, "")
     yield gr.update(), gr.update(value=pending_html)
+    print(f"[_xregen_dispatch] slot={slot_id} seg={seg_idx} calling infer_fn={infer_fn}")
+    try:
+        new_wav_raw, src_sr, clip_start_s = infer_fn()
+        print(f"[_xregen_dispatch] infer_fn returned wav shape={getattr(new_wav_raw,'shape',None)} sr={src_sr} clip_start={clip_start_s}")
+    except Exception as _e:
+        print(f"[_xregen_dispatch] EXCEPTION in infer_fn: {_e}")
+        _tb.print_exc()
+        raise
     video_path, waveform_html = _xregen_splice(new_wav_raw, src_sr, meta, seg_idx, slot_id, clip_start_s)
+    print(f"[_xregen_dispatch] splice done video_path={video_path!r}")
     yield gr.update(value=video_path), gr.update(value=waveform_html)
         clip_start, clip_end, clip_dur = _xregen_clip_window(meta, seg_idx, MMAUDIO_WINDOW)
         source_video = _resolve_silent_video(meta)
         sub_segs = _build_segments(clip_dur, MMAUDIO_WINDOW, float(crossfade_s))
+        print(f"[xregen_mmaudio._run] clip_start={clip_start} clip_dur={clip_dur} source_video={source_video!r} sub_segs={sub_segs}")
         # Pass clip_start_s/clip_dur_s so the GPU fn extracts the clip internally —
         # pre-extracted tmp files are invisible to the ZeroGPU worker process.
         results = _mmaudio_gpu_infer(source_video, prompt, negative_prompt, seed_val,
                                      cfg_strength, num_steps, crossfade_s, crossfade_db, 1,
                                      source_video, json.dumps(sub_segs),
                                      clip_start, clip_dur)
+        print(f"[xregen_mmaudio._run] gpu_infer returned {len(results)} results")
         seg_wavs, sr = results[0]
         wav = _stitch_wavs(seg_wavs, float(crossfade_s), float(crossfade_db),
                            clip_dur, sr, sub_segs)