Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

App Files Files Community

BoxOfColors commited on 6 days ago

Commit

c45a944

1 Parent(s): d2864d0

Fix fireRegen: use correct prototype for textarea native setter

Browse files

Files changed (1) hide show

app.py +87 -69

app.py CHANGED Viewed

@@ -1397,11 +1397,16 @@ def _make_output_slots(tab_prefix: str) -> tuple:
     """Build MAX_SLOTS output groups for one tab.
     Each slot has: video, waveform HTML, hidden regen trigger textbox,
-    hidden JSON state textbox (replaces gr.State to fix Gradio 5 SSR
-    'Too many arguments' caused by gr.State not counting in endpoint outputs).
-    Returns (grps, vids, waveforms, regen_triggers, seg_states).
     """
-    grps, vids, waveforms, regen_triggers, seg_states = [], [], [], [], []
     for i in range(MAX_SLOTS):
         with gr.Group(visible=(i == 0)) as g:
             slot_id = f"{tab_prefix}_{i}"
@@ -1409,11 +1414,7 @@ def _make_output_slots(tab_prefix: str) -> tuple:
             waveforms.append(gr.HTML(
                 value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>",
             ))
-            # Regen trigger: a Textbox that is CSS-hidden (NOT visible=False).
-            # Gradio 5 SSR omits visible=False components from the DOM entirely,
-            # so getElementById() returns null and JS can never fire the event.
-            # By keeping it visible=True but hiding with CSS (elem_classes),
-            # the input element exists in the DOM and JS can write to it.
             regen_triggers.append(gr.Textbox(
                 value="",
                 elem_id=f"regen_trigger_{slot_id}",
@@ -1421,19 +1422,23 @@ def _make_output_slots(tab_prefix: str) -> tuple:
                 label="",
                 show_label=False,
             ))
-            # State textbox: CSS-hidden, has elem_id so JS can READ current state
-            # and embed it in the trigger value (avoids having this component in
-            # both inputs AND outputs of the same .change() handler, which causes
-            # Gradio 5 SSR "Too many arguments" validation errors).
             seg_states.append(gr.Textbox(
                 value="",
-                elem_id=f"seg_state_{slot_id}",
                 elem_classes=["wf-hidden-input"],
                 label="",
                 show_label=False,
             ))
         grps.append(g)
-    return grps, vids, waveforms, regen_triggers, seg_states
 def _unpack_outputs(flat: list, n: int, tab_prefix: str) -> list:
@@ -1555,33 +1560,27 @@ _GLOBAL_JS = """
     const input = el.querySelector('input, textarea');
     if (!input) { console.warn('[fireRegen] no input inside regen_trigger:', slot_id); return; }
-    // Read current seg state JSON from the state textbox so we can embed it
-    // in the trigger value. This avoids having seg_state in BOTH inputs AND
-    // outputs of the .change() handler, which causes Gradio 5 SSR to reject
-    // the call with "Too many arguments provided for the endpoint".
-    const stEl = document.getElementById('seg_state_' + slot_id);
-    const stInput = stEl ? stEl.querySelector('input, textarea') : null;
-    const stateJson = stInput ? stInput.value : '';
-    if (!stateJson) {
-      console.warn('[fireRegen] seg_state is empty for slot', slot_id, '— skipping regen');
-      return;
-    }
-    // Use native setter to bypass React's controlled-input tracking.
-    // We do NOT clear to '' first — that would fire a spurious .change() event
-    // which (as a generator function) causes an SSE stream error that blocks
-    // the real call. Instead we use a timestamp suffix to ensure uniqueness
-    // so repeat clicks on the same segment always look like a new value.
-    const desc = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value')
-              || Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value');
     function setNative(val) {
       if (desc && desc.set) desc.set.call(input, val);
       else input.value = val;
       input.dispatchEvent(new Event('input',  {bubbles: true}));
       input.dispatchEvent(new Event('change', {bubbles: true}));
     }
-    // Encode: "slot_id|seg_idx|ts|{stateJSON}"  (ts = timestamp for uniqueness)
-    const triggerVal = slot_id + '|' + idx + '|' + Date.now() + '|' + stateJson;
     setNative(triggerVal);
     console.log('[fireRegen] fired trigger for', slot_id, 'seg', idx);
@@ -1644,7 +1643,8 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 with gr.Column():
                     (taro_slot_grps, taro_slot_vids,
                      taro_slot_waves, taro_slot_rtrigs,
-                     taro_slot_states) = _make_output_slots("taro")
             for trigger in [taro_video, taro_steps, taro_cf_dur]:
                 trigger.change(
@@ -1681,24 +1681,30 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 outputs=taro_slot_grps,
             ))
             # Per-slot regen trigger wiring for TARO
             for _i, _rtrig in enumerate(taro_slot_rtrigs):
                 _slot_id = f"taro_{_i}"
                 print(f"[startup] registering regen handler for slot {_slot_id}")
                 def _make_taro_regen(_si, _sid):
-                    def _do(trigger_val, video, seed, cfg, steps, mode, cf_dur, cf_db):
-                        print(f"[regen TARO] trigger_val_len={len(trigger_val) if trigger_val else 0} video={video!r}")
                         if not trigger_val:
                             print(f"[regen TARO] early-exit: trigger_val empty")
                             yield gr.update(), gr.update(), gr.update(); return
-                        # Trigger format: "slot_id|seg_idx|timestamp|{stateJSON}"
-                        parts = trigger_val.split("|", 3)
-                        if len(parts) != 4 or parts[0] != _sid:
-                            print(f"[regen TARO] early-exit: parts={parts[:2]} expected slot={_sid!r}")
                             yield gr.update(), gr.update(), gr.update(); return
-                        seg_idx    = int(parts[1])
-                        state_json = parts[3]
-                        print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json)}")
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
@@ -1723,7 +1729,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 _rtrig.change(
                     fn=_make_taro_regen(_i, _slot_id),
                     inputs=[_rtrig, taro_video, taro_seed, taro_cfg, taro_steps,
-                            taro_mode, taro_cf_dur, taro_cf_db],
                     outputs=[taro_slot_vids[_i], taro_slot_waves[_i], taro_slot_states[_i]],
                 )
@@ -1747,7 +1753,8 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 with gr.Column():
                     (mma_slot_grps, mma_slot_vids,
                      mma_slot_waves, mma_slot_rtrigs,
-                     mma_slot_states) = _make_output_slots("mma")
             mma_samples.change(
                 fn=_update_slot_visibility,
@@ -1775,22 +1782,27 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 outputs=mma_slot_grps,
             ))
             for _i, _rtrig in enumerate(mma_slot_rtrigs):
                 _slot_id = f"mma_{_i}"
                 def _make_mma_regen(_si, _sid):
-                    def _do(trigger_val, video, prompt, neg, seed, cfg, steps, cf_dur, cf_db):
-                        print(f"[regen MMA] trigger_val_len={len(trigger_val) if trigger_val else 0} video={video!r}")
                         if not trigger_val:
                             print(f"[regen MMA] early-exit: trigger_val empty")
                             yield gr.update(), gr.update(), gr.update(); return
-                        # Trigger format: "slot_id|seg_idx|timestamp|{stateJSON}"
-                        parts = trigger_val.split("|", 3)
-                        if len(parts) != 4 or parts[0] != _sid:
-                            print(f"[regen MMA] early-exit: parts={parts[:2]} expected slot={_sid!r}")
                             yield gr.update(), gr.update(), gr.update(); return
-                        seg_idx    = int(parts[1])
-                        state_json = parts[3]
-                        print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json)}")
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
@@ -1815,7 +1827,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 _rtrig.change(
                     fn=_make_mma_regen(_i, _slot_id),
                     inputs=[_rtrig, mma_video, mma_prompt, mma_neg, mma_seed,
-                            mma_cfg, mma_steps, mma_cf_dur, mma_cf_db],
                     outputs=[mma_slot_vids[_i], mma_slot_waves[_i], mma_slot_states[_i]],
                 )
@@ -1840,7 +1852,8 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 with gr.Column():
                     (hf_slot_grps, hf_slot_vids,
                      hf_slot_waves, hf_slot_rtrigs,
-                     hf_slot_states) = _make_output_slots("hf")
             hf_samples.change(
                 fn=_update_slot_visibility,
@@ -1868,22 +1881,27 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 outputs=hf_slot_grps,
             ))
             for _i, _rtrig in enumerate(hf_slot_rtrigs):
                 _slot_id = f"hf_{_i}"
                 def _make_hf_regen(_si, _sid):
-                    def _do(trigger_val, video, prompt, neg, seed, guidance, steps, size, cf_dur, cf_db):
-                        print(f"[regen HF] trigger_val_len={len(trigger_val) if trigger_val else 0} video={video!r}")
                         if not trigger_val:
                             print(f"[regen HF] early-exit: trigger_val empty")
                             yield gr.update(), gr.update(), gr.update(); return
-                        # Trigger format: "slot_id|seg_idx|timestamp|{stateJSON}"
-                        parts = trigger_val.split("|", 3)
-                        if len(parts) != 4 or parts[0] != _sid:
-                            print(f"[regen HF] early-exit: parts={parts[:2]} expected slot={_sid!r}")
                             yield gr.update(), gr.update(), gr.update(); return
-                        seg_idx    = int(parts[1])
-                        state_json = parts[3]
-                        print(f"[regen HF] slot={_sid} seg_idx={seg_idx} state_json_len={len(state_json)}")
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
@@ -1908,7 +1926,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 _rtrig.change(
                     fn=_make_hf_regen(_i, _slot_id),
                     inputs=[_rtrig, hf_video, hf_prompt, hf_neg, hf_seed,
-                            hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db],
                     outputs=[hf_slot_vids[_i], hf_slot_waves[_i], hf_slot_states[_i]],
                 )

     """Build MAX_SLOTS output groups for one tab.
     Each slot has: video, waveform HTML, hidden regen trigger textbox,
+    and TWO state textboxes:
+      - seg_states      (write): written by main gen + regen; also an output
+      - seg_state_reads (read):  mirrors seg_states via .change() relay;
+                                 used as input-only for regen handlers so that
+                                 no component ever appears in BOTH inputs AND
+                                 outputs of the same event (which causes Gradio
+                                 5 "Too many arguments" even with SSR disabled).
+    Returns (grps, vids, waveforms, regen_triggers, seg_states, seg_state_reads).
     """
+    grps, vids, waveforms, regen_triggers, seg_states, seg_state_reads = [], [], [], [], [], []
     for i in range(MAX_SLOTS):
         with gr.Group(visible=(i == 0)) as g:
             slot_id = f"{tab_prefix}_{i}"
             waveforms.append(gr.HTML(
                 value="<p style='color:#888;font-size:12px'>Generate audio to see waveform.</p>",
             ))
+            # Regen trigger: CSS-hidden so JS can find and write to it.
             regen_triggers.append(gr.Textbox(
                 value="",
                 elem_id=f"regen_trigger_{slot_id}",
                 label="",
                 show_label=False,
             ))
+            # Write-only state: updated by main gen and regen outputs.
             seg_states.append(gr.Textbox(
                 value="",
+                elem_classes=["wf-hidden-input"],
+                label="",
+                show_label=False,
+            ))
+            # Read-only mirror: fed into regen handler inputs only.
+            # Stays in sync via a .change() relay wired after slot creation.
+            seg_state_reads.append(gr.Textbox(
+                value="",
                 elem_classes=["wf-hidden-input"],
                 label="",
                 show_label=False,
             ))
         grps.append(g)
+    return grps, vids, waveforms, regen_triggers, seg_states, seg_state_reads
 def _unpack_outputs(flat: list, n: int, tab_prefix: str) -> list:
     const input = el.querySelector('input, textarea');
     if (!input) { console.warn('[fireRegen] no input inside regen_trigger:', slot_id); return; }
+    // Use native setter to bypass Svelte's controlled-input tracking.
+    // Timestamp suffix ensures repeat clicks on the same segment always
+    // produce a new value so Svelte's change detection always fires.
+    // State JSON is passed via a separate Gradio input (seg_state_read),
+    // not embedded in the trigger string — Gradio's own state is reliable,
+    // whereas reading the DOM input.value returns '' for Svelte-controlled inputs.
+    // IMPORTANT: Gradio 5 renders Textbox as <textarea>, NOT <input>.
+    // Must use HTMLTextAreaElement.prototype setter — using HTMLInputElement.prototype
+    // on a textarea causes "TypeError: Illegal invocation" and silently aborts.
     function setNative(val) {
+      const proto = input.tagName === 'TEXTAREA'
+        ? HTMLTextAreaElement.prototype
+        : HTMLInputElement.prototype;
+      const desc = Object.getOwnPropertyDescriptor(proto, 'value');
       if (desc && desc.set) desc.set.call(input, val);
       else input.value = val;
       input.dispatchEvent(new Event('input',  {bubbles: true}));
       input.dispatchEvent(new Event('change', {bubbles: true}));
     }
+    // Encode: "slot_id|seg_idx|timestamp"
+    const triggerVal = slot_id + '|' + idx + '|' + Date.now();
     setNative(triggerVal);
     console.log('[fireRegen] fired trigger for', slot_id, 'seg', idx);
                 with gr.Column():
                     (taro_slot_grps, taro_slot_vids,
                      taro_slot_waves, taro_slot_rtrigs,
+                     taro_slot_states,
+                     taro_slot_state_reads) = _make_output_slots("taro")
             for trigger in [taro_video, taro_steps, taro_cf_dur]:
                 trigger.change(
                 outputs=taro_slot_grps,
             ))
+            # Relay: keep seg_state_reads in sync with seg_states (write→read mirror)
+            for _st, _str in zip(taro_slot_states, taro_slot_state_reads):
+                _st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
             # Per-slot regen trigger wiring for TARO
             for _i, _rtrig in enumerate(taro_slot_rtrigs):
                 _slot_id = f"taro_{_i}"
                 print(f"[startup] registering regen handler for slot {_slot_id}")
                 def _make_taro_regen(_si, _sid):
+                    def _do(trigger_val, video, seed, cfg, steps, mode, cf_dur, cf_db, state_json):
+                        print(f"[regen TARO] trigger_val={trigger_val!r} state_json_len={len(state_json) if state_json else 0}")
                         if not trigger_val:
                             print(f"[regen TARO] early-exit: trigger_val empty")
                             yield gr.update(), gr.update(), gr.update(); return
+                        if not state_json:
+                            print(f"[regen TARO] early-exit: state_json empty")
+                            yield gr.update(), gr.update(), gr.update(); return
+                        # Trigger format: "slot_id|seg_idx|timestamp"
+                        parts = trigger_val.split("|", 2)
+                        if len(parts) < 2 or parts[0] != _sid:
+                            print(f"[regen TARO] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
                             yield gr.update(), gr.update(), gr.update(); return
+                        seg_idx = int(parts[1])
+                        print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — acquiring lock")
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen TARO] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
                 _rtrig.change(
                     fn=_make_taro_regen(_i, _slot_id),
                     inputs=[_rtrig, taro_video, taro_seed, taro_cfg, taro_steps,
+                            taro_mode, taro_cf_dur, taro_cf_db, taro_slot_state_reads[_i]],
                     outputs=[taro_slot_vids[_i], taro_slot_waves[_i], taro_slot_states[_i]],
                 )
                 with gr.Column():
                     (mma_slot_grps, mma_slot_vids,
                      mma_slot_waves, mma_slot_rtrigs,
+                     mma_slot_states,
+                     mma_slot_state_reads) = _make_output_slots("mma")
             mma_samples.change(
                 fn=_update_slot_visibility,
                 outputs=mma_slot_grps,
             ))
+            # Relay: keep mma_slot_state_reads in sync with mma_slot_states
+            for _st, _str in zip(mma_slot_states, mma_slot_state_reads):
+                _st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
             for _i, _rtrig in enumerate(mma_slot_rtrigs):
                 _slot_id = f"mma_{_i}"
                 def _make_mma_regen(_si, _sid):
+                    def _do(trigger_val, video, prompt, neg, seed, cfg, steps, cf_dur, cf_db, state_json):
+                        print(f"[regen MMA] trigger_val={trigger_val!r} state_json_len={len(state_json) if state_json else 0}")
                         if not trigger_val:
                             print(f"[regen MMA] early-exit: trigger_val empty")
                             yield gr.update(), gr.update(), gr.update(); return
+                        if not state_json:
+                            print(f"[regen MMA] early-exit: state_json empty")
                             yield gr.update(), gr.update(), gr.update(); return
+                        parts = trigger_val.split("|", 2)
+                        if len(parts) < 2 or parts[0] != _sid:
+                            print(f"[regen MMA] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
+                            yield gr.update(), gr.update(), gr.update(); return
+                        seg_idx = int(parts[1])
+                        print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — acquiring lock")
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen MMA] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
                 _rtrig.change(
                     fn=_make_mma_regen(_i, _slot_id),
                     inputs=[_rtrig, mma_video, mma_prompt, mma_neg, mma_seed,
+                            mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_slot_state_reads[_i]],
                     outputs=[mma_slot_vids[_i], mma_slot_waves[_i], mma_slot_states[_i]],
                 )
                 with gr.Column():
                     (hf_slot_grps, hf_slot_vids,
                      hf_slot_waves, hf_slot_rtrigs,
+                     hf_slot_states,
+                     hf_slot_state_reads) = _make_output_slots("hf")
             hf_samples.change(
                 fn=_update_slot_visibility,
                 outputs=hf_slot_grps,
             ))
+            # Relay: keep hf_slot_state_reads in sync with hf_slot_states
+            for _st, _str in zip(hf_slot_states, hf_slot_state_reads):
+                _st.change(fn=lambda v: v, inputs=[_st], outputs=[_str])
             for _i, _rtrig in enumerate(hf_slot_rtrigs):
                 _slot_id = f"hf_{_i}"
                 def _make_hf_regen(_si, _sid):
+                    def _do(trigger_val, video, prompt, neg, seed, guidance, steps, size, cf_dur, cf_db, state_json):
+                        print(f"[regen HF] trigger_val={trigger_val!r} state_json_len={len(state_json) if state_json else 0}")
                         if not trigger_val:
                             print(f"[regen HF] early-exit: trigger_val empty")
                             yield gr.update(), gr.update(), gr.update(); return
+                        if not state_json:
+                            print(f"[regen HF] early-exit: state_json empty")
+                            yield gr.update(), gr.update(), gr.update(); return
+                        parts = trigger_val.split("|", 2)
+                        if len(parts) < 2 or parts[0] != _sid:
+                            print(f"[regen HF] early-exit: parts[0]={parts[0]!r} expected={_sid!r}")
                             yield gr.update(), gr.update(), gr.update(); return
+                        seg_idx = int(parts[1])
+                        print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — acquiring lock")
                         lock = _get_slot_lock(_sid)
                         with lock:
                             print(f"[regen HF] slot={_sid} seg_idx={seg_idx} — lock acquired, showing spinner")
                 _rtrig.change(
                     fn=_make_hf_regen(_i, _slot_id),
                     inputs=[_rtrig, hf_video, hf_prompt, hf_neg, hf_seed,
+                            hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_slot_state_reads[_i]],
                     outputs=[hf_slot_vids[_i], hf_slot_waves[_i], hf_slot_states[_i]],
                 )