Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

BoxOfColors Claude Opus 4.6 commited on 3 days ago

Commit

c06e566

1 Parent(s): 13cc4e6

refactor: consolidate duplicated code via MODEL_CONFIGS registry

- Add MODEL_CONFIGS dict as single source of truth for per-model constants
(window_s, sr, secs_per_step, load_overhead, tab_prefix, regen_fn, label)
- Replace 6 nearly-identical duration estimators with 2 generic functions:
_estimate_gpu_duration() and _estimate_regen_duration()
- Replace 3 duplicated regen button factory loops (~35 lines each, 40%
duplication) with single _register_regen_handlers() function
- Fix import redundancy: consolidate threading/shutil imports at top of file,
remove duplicate `import threading` at line 310 and inline `import shutil`

Net effect: ~170 lines of duplicated boilerplate eliminated, all model-specific
behavior now parameterized through the registry. Future model additions only need
a new MODEL_CONFIGS entry + model-specific GPU/regen functions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +171 -167

app.py CHANGED Viewed

@@ -11,12 +11,13 @@ Supported models
 import os
 import sys
 import json
 import tempfile
 import random
 import threading
 from pathlib import Path
-import time
 import torch
 import numpy as np
 import torchaudio
@@ -118,7 +119,6 @@ _TEMP_DIRS_MAX  = 10       # keep at most this many; older ones get cleaned up
 def _register_tmp_dir(tmp_dir: str) -> str:
     """Register a temp dir so it can be cleaned up when newer ones replace it."""
-    import shutil
     _TEMP_DIRS.append(tmp_dir)
     while len(_TEMP_DIRS) > _TEMP_DIRS_MAX:
         old = _TEMP_DIRS.pop(0)
@@ -305,9 +305,73 @@ HUNYUAN_SECS_PER_STEP  = 0.35  # measured 0.328s/step on H200 (8.3s video, 1 seg
 HUNYUAN_LOAD_OVERHEAD  = 55    # ~55s to load the 10GB XXL model weights into GPU
 GPU_DURATION_CAP       = 300   # hard cap per call — never reserve more than this
 _TARO_CACHE_MAXLEN = 16   # evict oldest entries beyond this limit
 _TARO_INFERENCE_CACHE: dict = {}   # keyed by (video_file, seed, cfg, steps, mode, crossfade_s)
-import threading
 _TARO_CACHE_LOCK = threading.Lock()
@@ -320,16 +384,9 @@ def _taro_calc_max_samples(total_dur_s: float, num_steps: int, crossfade_s: floa
 def _taro_duration(video_file, seed_val, cfg_scale, num_steps, mode,
                    crossfade_s, crossfade_db, num_samples):
-    """Pre-GPU callable — must match _run_taro's input order exactly."""
-    try:
-        total_s = get_video_duration(video_file)
-        n_segs  = len(_build_segments(total_s, TARO_MODEL_DUR, float(crossfade_s)))
-    except Exception:
-        n_segs  = 1
-    secs = int(num_samples) * n_segs * int(num_steps) * TARO_SECS_PER_STEP + TARO_LOAD_OVERHEAD
-    result = min(GPU_DURATION_CAP, max(60, int(secs)))
-    print(f"[duration] TARO: {int(num_samples)}samp × {n_segs}seg × {int(num_steps)}steps → {secs:.0f}s → capped {result}s")
-    return result
 def _taro_infer_segment(
@@ -558,16 +615,9 @@ MMAUDIO_WINDOW = 8.0   # seconds — MMAudio's fixed generation window
 def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
                       cfg_strength, num_steps, crossfade_s, crossfade_db, num_samples):
-    """Pre-GPU callable — must match _run_mmaudio's input order exactly."""
-    try:
-        total_s = get_video_duration(video_file)
-        n_segs  = len(_build_segments(total_s, MMAUDIO_WINDOW, float(crossfade_s)))
-    except Exception:
-        n_segs  = 1
-    secs = int(num_samples) * n_segs * int(num_steps) * MMAUDIO_SECS_PER_STEP + MMAUDIO_LOAD_OVERHEAD
-    result = min(GPU_DURATION_CAP, max(60, int(secs)))
-    print(f"[duration] MMAudio: {int(num_samples)}samp × {n_segs}seg × {int(num_steps)}steps → {secs:.0f}s → capped {result}s")
-    return result
 @spaces.GPU(duration=_mmaudio_duration)
@@ -735,16 +785,9 @@ HUNYUAN_MAX_DUR = 15.0   # seconds
 def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,
                       guidance_scale, num_steps, model_size, crossfade_s, crossfade_db, num_samples):
-    """Pre-GPU callable — must match _run_hunyuan's input order exactly."""
-    try:
-        total_s = get_video_duration(video_file)
-        n_segs  = len(_build_segments(total_s, HUNYUAN_MAX_DUR, float(crossfade_s)))
-    except Exception:
-        n_segs  = 1
-    secs = int(num_samples) * n_segs * int(num_steps) * HUNYUAN_SECS_PER_STEP + HUNYUAN_LOAD_OVERHEAD
-    result = min(GPU_DURATION_CAP, max(60, int(secs)))
-    print(f"[duration] HunyuanFoley: {int(num_samples)}samp × {n_segs}seg × {int(num_steps)}steps → {secs:.0f}s → capped {result}s")
-    return result
 @spaces.GPU(duration=_hunyuan_duration)
@@ -993,10 +1036,7 @@ def _splice_and_save(new_wav, seg_idx, meta, slot_id):
 def _taro_regen_duration(video_file, seg_idx, seg_meta_json,
                          seed_val, cfg_scale, num_steps, mode,
                          crossfade_s, crossfade_db, slot_id=None):
-    secs   = int(num_steps) * TARO_SECS_PER_STEP + TARO_LOAD_OVERHEAD
-    result = min(GPU_DURATION_CAP, max(60, int(secs)))
-    print(f"[duration] TARO regen: 1 seg × {int(num_steps)} steps → {secs:.0f}s → capped {result}s")
-    return result
 @spaces.GPU(duration=_taro_regen_duration)
@@ -1067,10 +1107,7 @@ def regen_taro_segment(video_file, seg_idx, seg_meta_json,
 def _mmaudio_regen_duration(video_file, seg_idx, seg_meta_json,
                              prompt, negative_prompt, seed_val,
                              cfg_strength, num_steps, crossfade_s, crossfade_db, slot_id=None):
-    secs   = int(num_steps) * MMAUDIO_SECS_PER_STEP + MMAUDIO_LOAD_OVERHEAD
-    result = min(GPU_DURATION_CAP, max(60, int(secs)))
-    print(f"[duration] MMAudio regen: 1 seg × {int(num_steps)} steps → {secs:.0f}s → capped {result}s")
-    return result
 @spaces.GPU(duration=_mmaudio_regen_duration)
@@ -1169,10 +1206,7 @@ def _hunyuan_regen_duration(video_file, seg_idx, seg_meta_json,
                              prompt, negative_prompt, seed_val,
                              guidance_scale, num_steps, model_size,
                              crossfade_s, crossfade_db, slot_id=None):
-    secs   = int(num_steps) * HUNYUAN_SECS_PER_STEP + HUNYUAN_LOAD_OVERHEAD
-    result = min(GPU_DURATION_CAP, max(60, int(secs)))
-    print(f"[duration] HunyuanFoley regen: 1 seg × {int(num_steps)} steps → {secs:.0f}s → capped {result}s")
-    return result
 @spaces.GPU(duration=_hunyuan_regen_duration)
@@ -1268,10 +1302,82 @@ def regen_hunyuan_segment(video_file, seg_idx, seg_meta_json,
     return video_path, audio_path, json.dumps(updated_meta), waveform_html
 # ================================================================== #
 #                        SHARED UI HELPERS                            #
 # ================================================================== #
 def _pad_outputs(outputs: list) -> list:
     """Flatten (video, audio, seg_meta) triples and pad to MAX_SLOTS * 3 with None.
@@ -2073,52 +2179,14 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 outputs=taro_slot_grps,
             ))
-            # Per-slot regen handlers for TARO.
-            # JS calls /gradio_api/queue/join directly with fn_index + data array:
-            #   data = [seg_idx, state_json, video_path_or_null, seed, cfg, steps, mode, cf_dur, cf_db]
-            # fn_index is discovered at runtime from gradio_config.dependencies by api_name.
-            # The handlers are registered via a dummy gr.Button click so Gradio assigns them
-            # a stable fn_index and api_name.
-            taro_regen_btns = []
-            for _i in range(MAX_SLOTS):
-                _slot_id = f"taro_{_i}"
-                _btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
-                taro_regen_btns.append(_btn)
-                print(f"[startup] registering regen handler for slot {_slot_id}")
-                def _make_taro_regen(_si, _sid):
-                    def _do(seg_idx, state_json, video, seed, cfg, steps, mode, cf_dur, cf_db):
-                        print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
-                        if not state_json:
-                            print(f"[regen TARO] early-exit: state_json empty")
-                            yield gr.update(), gr.update(); return
-                        lock = _get_slot_lock(_sid)
-                        with lock:
-                            print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
-                            state        = json.loads(state_json)
-                            pending_html = _build_regen_pending_html(
-                                state["segments"], int(seg_idx), _sid, ""
-                            )
-                            yield gr.update(), gr.update(value=pending_html)
-                            print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — calling regen_taro_segment")
-                            try:
-                                vid, aud, new_meta_json, html = regen_taro_segment(
-                                    video, int(seg_idx), state_json,
-                                    seed, cfg, steps, mode, cf_dur, cf_db, _sid,
-                                )
-                                print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
-                            except Exception as _e:
-                                print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
-                                raise
-                            yield gr.update(value=vid), gr.update(value=html)
-                    return _do
-                _btn.click(
-                    fn=_make_taro_regen(_i, _slot_id),
-                    inputs=[taro_regen_seg, taro_regen_state,
-                            taro_video, taro_seed, taro_cfg, taro_steps,
-                            taro_mode, taro_cf_dur, taro_cf_db],
-                    outputs=[taro_slot_vids[_i], taro_slot_waves[_i]],
-                    api_name=f"regen_taro_{_i}",
-                )
         # ---------------------------------------------------------- #
         # Tab 2 — MMAudio                                             #
@@ -2167,44 +2235,12 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 outputs=mma_slot_grps,
             ))
-            mma_regen_btns = []
-            for _i in range(MAX_SLOTS):
-                _slot_id = f"mma_{_i}"
-                _btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
-                mma_regen_btns.append(_btn)
-                def _make_mma_regen(_si, _sid):
-                    def _do(seg_idx, state_json, video, prompt, neg, seed, cfg, steps, cf_dur, cf_db):
-                        print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
-                        if not state_json:
-                            print(f"[regen MMA] early-exit: state_json empty")
-                            yield gr.update(), gr.update(); return
-                        lock = _get_slot_lock(_sid)
-                        with lock:
-                            state        = json.loads(state_json)
-                            pending_html = _build_regen_pending_html(
-                                state["segments"], int(seg_idx), _sid, ""
-                            )
-                            yield gr.update(), gr.update(value=pending_html)
-                            print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — calling regen_mmaudio_segment")
-                            try:
-                                vid, aud, new_meta_json, html = regen_mmaudio_segment(
-                                    video, int(seg_idx), state_json,
-                                    prompt, neg, seed, cfg, steps, cf_dur, cf_db, _sid,
-                                )
-                                print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
-                            except Exception as _e:
-                                print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
-                                raise
-                            yield gr.update(value=vid), gr.update(value=html)
-                    return _do
-                _btn.click(
-                    fn=_make_mma_regen(_i, _slot_id),
-                    inputs=[mma_regen_seg, mma_regen_state,
-                            mma_video, mma_prompt, mma_neg, mma_seed,
-                            mma_cfg, mma_steps, mma_cf_dur, mma_cf_db],
-                    outputs=[mma_slot_vids[_i], mma_slot_waves[_i]],
-                    api_name=f"regen_mma_{_i}",
-                )
         # ---------------------------------------------------------- #
         # Tab 3 — HunyuanVideoFoley                                   #
@@ -2254,44 +2290,12 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 outputs=hf_slot_grps,
             ))
-            hf_regen_btns = []
-            for _i in range(MAX_SLOTS):
-                _slot_id = f"hf_{_i}"
-                _btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
-                hf_regen_btns.append(_btn)
-                def _make_hf_regen(_si, _sid):
-                    def _do(seg_idx, state_json, video, prompt, neg, seed, guidance, steps, size, cf_dur, cf_db):
-                        print(f"[regen HF] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json) if state_json else 0}")
-                        if not state_json:
-                            print(f"[regen HF] early-exit: state_json empty")
-                            yield gr.update(), gr.update(); return
-                        lock = _get_slot_lock(_sid)
-                        with lock:
-                            state        = json.loads(state_json)
-                            pending_html = _build_regen_pending_html(
-                                state["segments"], int(seg_idx), _sid, ""
-                            )
-                            yield gr.update(), gr.update(value=pending_html)
-                            print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — calling regen_hunyuan_segment")
-                            try:
-                                vid, aud, new_meta_json, html = regen_hunyuan_segment(
-                                    video, int(seg_idx), state_json,
-                                    prompt, neg, seed, guidance, steps, size, cf_dur, cf_db, _sid,
-                                )
-                                print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
-                            except Exception as _e:
-                                print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
-                                raise
-                            yield gr.update(value=vid), gr.update(value=html)
-                    return _do
-                _btn.click(
-                    fn=_make_hf_regen(_i, _slot_id),
-                    inputs=[hf_regen_seg, hf_regen_state,
-                            hf_video, hf_prompt, hf_neg, hf_seed,
-                            hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db],
-                    outputs=[hf_slot_vids[_i], hf_slot_waves[_i]],
-                    api_name=f"regen_hf_{_i}",
-                )
     # ---- Cross-tab video sync ----
     _sync = lambda v: (gr.update(value=v), gr.update(value=v))

 import os
 import sys
 import json
+import shutil
 import tempfile
 import random
 import threading
+import time
 from pathlib import Path
 import torch
 import numpy as np
 import torchaudio
 def _register_tmp_dir(tmp_dir: str) -> str:
     """Register a temp dir so it can be cleaned up when newer ones replace it."""
     _TEMP_DIRS.append(tmp_dir)
     while len(_TEMP_DIRS) > _TEMP_DIRS_MAX:
         old = _TEMP_DIRS.pop(0)
 HUNYUAN_LOAD_OVERHEAD  = 55    # ~55s to load the 10GB XXL model weights into GPU
 GPU_DURATION_CAP       = 300   # hard cap per call — never reserve more than this
+# ------------------------------------------------------------------ #
+# Model configuration registry — single source of truth for per-model #
+# constants used by duration estimation, segmentation, and UI.        #
+# ------------------------------------------------------------------ #
+MODEL_CONFIGS = {
+    "taro": {
+        "window_s":       TARO_MODEL_DUR,       # 8.192 s
+        "sr":             TARO_SR,               # 16000
+        "secs_per_step":  TARO_SECS_PER_STEP,   # 0.05
+        "load_overhead":  TARO_LOAD_OVERHEAD,    # 15
+        "tab_prefix":     "taro",
+        "regen_fn":       None,   # set after function definitions (avoids forward-ref)
+        "label":          "TARO",
+    },
+    "mmaudio": {
+        "window_s":       MMAUDIO_WINDOW,        # 8.0 s
+        "sr":             44100,
+        "secs_per_step":  MMAUDIO_SECS_PER_STEP, # 0.25
+        "load_overhead":  MMAUDIO_LOAD_OVERHEAD,  # 15
+        "tab_prefix":     "mma",
+        "regen_fn":       None,
+        "label":          "MMAudio",
+    },
+    "hunyuan": {
+        "window_s":       HUNYUAN_MAX_DUR,        # 15.0 s
+        "sr":             48000,
+        "secs_per_step":  HUNYUAN_SECS_PER_STEP,  # 0.35
+        "load_overhead":  HUNYUAN_LOAD_OVERHEAD,   # 55
+        "tab_prefix":     "hf",
+        "regen_fn":       None,
+        "label":          "HunyuanFoley",
+    },
+}
+def _estimate_gpu_duration(model_key: str, num_samples: int, num_steps: int,
+                           total_dur_s: float = None, crossfade_s: float = 0,
+                           video_file: str = None) -> int:
+    """Generic GPU duration estimator used by all models.
+    Computes: num_samples × n_segs × num_steps × secs_per_step + load_overhead
+    Clamped to [60, GPU_DURATION_CAP].
+    """
+    cfg = MODEL_CONFIGS[model_key]
+    try:
+        if total_dur_s is None:
+            total_dur_s = get_video_duration(video_file)
+        n_segs = len(_build_segments(total_dur_s, cfg["window_s"], float(crossfade_s)))
+    except Exception:
+        n_segs = 1
+    secs   = int(num_samples) * n_segs * int(num_steps) * cfg["secs_per_step"] + cfg["load_overhead"]
+    result = min(GPU_DURATION_CAP, max(60, int(secs)))
+    print(f"[duration] {cfg['label']}: {int(num_samples)}samp × {n_segs}seg × "
+          f"{int(num_steps)}steps → {secs:.0f}s → capped {result}s")
+    return result
+def _estimate_regen_duration(model_key: str, num_steps: int) -> int:
+    """Generic GPU duration estimator for single-segment regen."""
+    cfg  = MODEL_CONFIGS[model_key]
+    secs = int(num_steps) * cfg["secs_per_step"] + cfg["load_overhead"]
+    result = min(GPU_DURATION_CAP, max(60, int(secs)))
+    print(f"[duration] {cfg['label']} regen: 1 seg × {int(num_steps)} steps → {secs:.0f}s → capped {result}s")
+    return result
 _TARO_CACHE_MAXLEN = 16   # evict oldest entries beyond this limit
 _TARO_INFERENCE_CACHE: dict = {}   # keyed by (video_file, seed, cfg, steps, mode, crossfade_s)
 _TARO_CACHE_LOCK = threading.Lock()
 def _taro_duration(video_file, seed_val, cfg_scale, num_steps, mode,
                    crossfade_s, crossfade_db, num_samples):
+    """Pre-GPU callable — must match _taro_gpu_infer's input order exactly."""
+    return _estimate_gpu_duration("taro", int(num_samples), int(num_steps),
+                                  video_file=video_file, crossfade_s=crossfade_s)
 def _taro_infer_segment(
 def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
                       cfg_strength, num_steps, crossfade_s, crossfade_db, num_samples):
+    """Pre-GPU callable — must match _mmaudio_gpu_infer's input order exactly."""
+    return _estimate_gpu_duration("mmaudio", int(num_samples), int(num_steps),
+                                  video_file=video_file, crossfade_s=crossfade_s)
 @spaces.GPU(duration=_mmaudio_duration)
 def _hunyuan_duration(video_file, prompt, negative_prompt, seed_val,
                       guidance_scale, num_steps, model_size, crossfade_s, crossfade_db, num_samples):
+    """Pre-GPU callable — must match _hunyuan_gpu_infer's input order exactly."""
+    return _estimate_gpu_duration("hunyuan", int(num_samples), int(num_steps),
+                                  video_file=video_file, crossfade_s=crossfade_s)
 @spaces.GPU(duration=_hunyuan_duration)
 def _taro_regen_duration(video_file, seg_idx, seg_meta_json,
                          seed_val, cfg_scale, num_steps, mode,
                          crossfade_s, crossfade_db, slot_id=None):
+    return _estimate_regen_duration("taro", int(num_steps))
 @spaces.GPU(duration=_taro_regen_duration)
 def _mmaudio_regen_duration(video_file, seg_idx, seg_meta_json,
                              prompt, negative_prompt, seed_val,
                              cfg_strength, num_steps, crossfade_s, crossfade_db, slot_id=None):
+    return _estimate_regen_duration("mmaudio", int(num_steps))
 @spaces.GPU(duration=_mmaudio_regen_duration)
                              prompt, negative_prompt, seed_val,
                              guidance_scale, num_steps, model_size,
                              crossfade_s, crossfade_db, slot_id=None):
+    return _estimate_regen_duration("hunyuan", int(num_steps))
 @spaces.GPU(duration=_hunyuan_regen_duration)
     return video_path, audio_path, json.dumps(updated_meta), waveform_html
+# Wire up regen_fn references now that the functions are defined
+MODEL_CONFIGS["taro"]["regen_fn"]    = regen_taro_segment
+MODEL_CONFIGS["mmaudio"]["regen_fn"] = regen_mmaudio_segment
+MODEL_CONFIGS["hunyuan"]["regen_fn"] = regen_hunyuan_segment
 # ================================================================== #
 #                        SHARED UI HELPERS                            #
 # ================================================================== #
+def _register_regen_handlers(tab_prefix, model_key, regen_seg_tb, regen_state_tb,
+                              input_components, slot_vids, slot_waves):
+    """Register per-slot regen button handlers for a model tab.
+    This replaces the three nearly-identical for-loops that previously existed
+    for TARO, MMAudio, and HunyuanFoley tabs.
+    Args:
+        tab_prefix:       e.g. "taro", "mma", "hf"
+        model_key:        e.g. "taro", "mmaudio", "hunyuan"
+        regen_seg_tb:     gr.Textbox for seg_idx (render=False)
+        regen_state_tb:   gr.Textbox for state_json (render=False)
+        input_components: list of Gradio input components (video, seed, etc.)
+                          — order must match regen_fn signature after (seg_idx, state_json, video)
+        slot_vids:        list of gr.Video components per slot
+        slot_waves:       list of gr.HTML components per slot
+    Returns:
+        list of hidden gr.Buttons (one per slot)
+    """
+    cfg      = MODEL_CONFIGS[model_key]
+    regen_fn = cfg["regen_fn"]
+    label    = cfg["label"]
+    btns     = []
+    for _i in range(MAX_SLOTS):
+        _slot_id = f"{tab_prefix}_{_i}"
+        _btn = gr.Button(visible=False, elem_id=f"regen_btn_{_slot_id}")
+        btns.append(_btn)
+        print(f"[startup] registering regen handler for slot {_slot_id}")
+        def _make_regen(_si, _sid, _model_key, _label, _regen_fn):
+            def _do(seg_idx, state_json, *args):
+                print(f"[regen {_label}] slot={_sid} seg_idx={seg_idx} "
+                      f"state_json_len={len(state_json) if state_json else 0}")
+                if not state_json:
+                    print(f"[regen {_label}] early-exit: state_json empty")
+                    yield gr.update(), gr.update()
+                    return
+                lock = _get_slot_lock(_sid)
+                with lock:
+                    state        = json.loads(state_json)
+                    pending_html = _build_regen_pending_html(
+                        state["segments"], int(seg_idx), _sid, ""
+                    )
+                    yield gr.update(), gr.update(value=pending_html)
+                    print(f"[regen {_label}] slot={_sid} seg_idx={seg_idx} — calling regen")
+                    try:
+                        # args[0] = video, args[1:] = model-specific params
+                        vid, aud, new_meta_json, html = _regen_fn(
+                            args[0], int(seg_idx), state_json, *args[1:], _sid,
+                        )
+                        print(f"[regen {_label}] slot={_sid} seg_idx={seg_idx} — done, vid={vid!r}")
+                    except Exception as _e:
+                        print(f"[regen {_label}] slot={_sid} seg_idx={seg_idx} — ERROR: {_e}")
+                        raise
+                    yield gr.update(value=vid), gr.update(value=html)
+            return _do
+        _btn.click(
+            fn=_make_regen(_i, _slot_id, model_key, label, regen_fn),
+            inputs=[regen_seg_tb, regen_state_tb] + input_components,
+            outputs=[slot_vids[_i], slot_waves[_i]],
+            api_name=f"regen_{tab_prefix}_{_i}",
+        )
+    return btns
 def _pad_outputs(outputs: list) -> list:
     """Flatten (video, audio, seg_meta) triples and pad to MAX_SLOTS * 3 with None.
                 outputs=taro_slot_grps,
             ))
+            # Per-slot regen handlers — JS calls /gradio_api/queue/join with
+            # fn_index (by api_name) + data=[seg_idx, state_json, video, ...params].
+            taro_regen_btns = _register_regen_handlers(
+                "taro", "taro", taro_regen_seg, taro_regen_state,
+                [taro_video, taro_seed, taro_cfg, taro_steps,
+                 taro_mode, taro_cf_dur, taro_cf_db],
+                taro_slot_vids, taro_slot_waves,
+            )
         # ---------------------------------------------------------- #
         # Tab 2 — MMAudio                                             #
                 outputs=mma_slot_grps,
             ))
+            mma_regen_btns = _register_regen_handlers(
+                "mma", "mmaudio", mma_regen_seg, mma_regen_state,
+                [mma_video, mma_prompt, mma_neg, mma_seed,
+                 mma_cfg, mma_steps, mma_cf_dur, mma_cf_db],
+                mma_slot_vids, mma_slot_waves,
+            )
         # ---------------------------------------------------------- #
         # Tab 3 — HunyuanVideoFoley                                   #
                 outputs=hf_slot_grps,
             ))
+            hf_regen_btns = _register_regen_handlers(
+                "hf", "hunyuan", hf_regen_seg, hf_regen_state,
+                [hf_video, hf_prompt, hf_neg, hf_seed,
+                 hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db],
+                hf_slot_vids, hf_slot_waves,
+            )
     # ---- Cross-tab video sync ----
     _sync = lambda v: (gr.update(value=v), gr.update(value=v))