Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

App Files Files Community

BoxOfColors commited on 5 days ago

Commit

e0917de

1 Parent(s): 6d105d2

Sync video upload across all three model tabs

Browse files

Files changed (1) hide show

app.py +16 -12

app.py CHANGED Viewed

@@ -776,13 +776,6 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         "| **HunyuanFoley** | Cinematic foley requiring high fidelity and explicit creative direction — dramatic SFX, layered environmental design, or any scene where you have a clear written description of the desired sound palette. | Quick one-shot clips where you don't want to write a prompt, or raw impact sounds where timing precision matters more than richness. |"
     )
-    # Shared video upload — persists across all model tabs
-    shared_video = gr.Video(label="Input Video", interactive=True)
-    shared_video_state = gr.State(value=None)
-    # Keep state in sync whenever the shared upload changes
-    shared_video.change(fn=lambda v: v, inputs=[shared_video], outputs=[shared_video_state])
     with gr.Tabs():
         # ---------------------------------------------------------- #
@@ -791,6 +784,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         with gr.Tab("TARO"):
             with gr.Row():
                 with gr.Column():
                     taro_seed    = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
                     taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5)
                     taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
@@ -803,10 +797,10 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
                 with gr.Column():
                     taro_slot_grps, taro_slot_vids, taro_slot_auds = _make_output_slots()
-            for trigger in [shared_video, taro_steps, taro_cf_dur]:
                 trigger.change(
                     fn=_on_video_upload_taro,
-                    inputs=[shared_video_state, taro_steps, taro_cf_dur],
                     outputs=[taro_samples],
                 )
             taro_samples.change(
@@ -820,7 +814,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
             taro_btn.click(
                 fn=_run_taro,
-                inputs=[shared_video_state, taro_seed, taro_cfg, taro_steps, taro_mode,
                         taro_cf_dur, taro_cf_db, taro_samples],
                 outputs=taro_slot_grps + taro_slot_vids + taro_slot_auds,
             )
@@ -831,6 +825,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         with gr.Tab("MMAudio"):
             with gr.Row():
                 with gr.Column():
                     mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel")
                     mma_neg      = gr.Textbox(label="Negative Prompt", placeholder="music, speech")
                     mma_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
@@ -855,7 +850,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
             mma_btn.click(
                 fn=_run_mmaudio,
-                inputs=[shared_video_state, mma_prompt, mma_neg, mma_seed,
                         mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
                 outputs=mma_slot_grps + mma_slot_vids + mma_slot_auds,
             )
@@ -866,6 +861,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         with gr.Tab("HunyuanFoley"):
             with gr.Row():
                 with gr.Column():
                     hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof")
                     hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh")
                     hf_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
@@ -891,9 +887,17 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
             hf_btn.click(
                 fn=_run_hunyuan,
-                inputs=[shared_video_state, hf_prompt, hf_neg, hf_seed,
                         hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
                 outputs=hf_slot_grps + hf_slot_vids + hf_slot_auds,
             )
 demo.queue(max_size=10).launch()

         "| **HunyuanFoley** | Cinematic foley requiring high fidelity and explicit creative direction — dramatic SFX, layered environmental design, or any scene where you have a clear written description of the desired sound palette. | Quick one-shot clips where you don't want to write a prompt, or raw impact sounds where timing precision matters more than richness. |"
     )
     with gr.Tabs():
         # ---------------------------------------------------------- #
         with gr.Tab("TARO"):
             with gr.Row():
                 with gr.Column():
+                    taro_video   = gr.Video(label="Input Video")
                     taro_seed    = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
                     taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5)
                     taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
                 with gr.Column():
                     taro_slot_grps, taro_slot_vids, taro_slot_auds = _make_output_slots()
+            for trigger in [taro_video, taro_steps, taro_cf_dur]:
                 trigger.change(
                     fn=_on_video_upload_taro,
+                    inputs=[taro_video, taro_steps, taro_cf_dur],
                     outputs=[taro_samples],
                 )
             taro_samples.change(
             taro_btn.click(
                 fn=_run_taro,
+                inputs=[taro_video, taro_seed, taro_cfg, taro_steps, taro_mode,
                         taro_cf_dur, taro_cf_db, taro_samples],
                 outputs=taro_slot_grps + taro_slot_vids + taro_slot_auds,
             )
         with gr.Tab("MMAudio"):
             with gr.Row():
                 with gr.Column():
+                    mma_video    = gr.Video(label="Input Video")
                     mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel")
                     mma_neg      = gr.Textbox(label="Negative Prompt", placeholder="music, speech")
                     mma_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
             mma_btn.click(
                 fn=_run_mmaudio,
+                inputs=[mma_video, mma_prompt, mma_neg, mma_seed,
                         mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
                 outputs=mma_slot_grps + mma_slot_vids + mma_slot_auds,
             )
         with gr.Tab("HunyuanFoley"):
             with gr.Row():
                 with gr.Column():
+                    hf_video    = gr.Video(label="Input Video")
                     hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof")
                     hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh")
                     hf_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
             hf_btn.click(
                 fn=_run_hunyuan,
+                inputs=[hf_video, hf_prompt, hf_neg, hf_seed,
                         hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
                 outputs=hf_slot_grps + hf_slot_vids + hf_slot_auds,
             )
+    # ---- Cross-tab video sync ----
+    # When any tab's video changes, push the value to the other two tabs.
+    # Clearing (value=None) also propagates so the X button clears all.
+    _sync = lambda v: (gr.update(value=v), gr.update(value=v))
+    taro_video.change(fn=_sync, inputs=[taro_video], outputs=[mma_video, hf_video])
+    mma_video.change(fn=_sync,  inputs=[mma_video],  outputs=[taro_video, hf_video])
+    hf_video.change(fn=_sync,   inputs=[hf_video],   outputs=[taro_video, mma_video])
 demo.queue(max_size=10).launch()