Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

BoxOfColors Claude Sonnet 4.6 commited on 5 days ago

Commit

6d105d2

1 Parent(s): b664ef1

Add shared video upload above tabs — persists across model switches

Replaces the three separate per-tab gr.Video components with a single
shared gr.Video above the tabs backed by a gr.State. All three model
tabs now read from the shared state, so switching tabs no longer resets
the uploaded video. User must explicitly clear it with the X button.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -776,6 +776,13 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         "| **HunyuanFoley** | Cinematic foley requiring high fidelity and explicit creative direction — dramatic SFX, layered environmental design, or any scene where you have a clear written description of the desired sound palette. | Quick one-shot clips where you don't want to write a prompt, or raw impact sounds where timing precision matters more than richness. |"
     )
     with gr.Tabs():
         # ---------------------------------------------------------- #
@@ -784,7 +791,6 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         with gr.Tab("TARO"):
             with gr.Row():
                 with gr.Column():
-                    taro_video   = gr.Video(label="Input Video")
                     taro_seed    = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
                     taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5)
                     taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
@@ -797,10 +803,10 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
                 with gr.Column():
                     taro_slot_grps, taro_slot_vids, taro_slot_auds = _make_output_slots()
-            for trigger in [taro_video, taro_steps, taro_cf_dur]:
                 trigger.change(
                     fn=_on_video_upload_taro,
-                    inputs=[taro_video, taro_steps, taro_cf_dur],
                     outputs=[taro_samples],
                 )
             taro_samples.change(
@@ -814,7 +820,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
             taro_btn.click(
                 fn=_run_taro,
-                inputs=[taro_video, taro_seed, taro_cfg, taro_steps, taro_mode,
                         taro_cf_dur, taro_cf_db, taro_samples],
                 outputs=taro_slot_grps + taro_slot_vids + taro_slot_auds,
             )
@@ -825,7 +831,6 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         with gr.Tab("MMAudio"):
             with gr.Row():
                 with gr.Column():
-                    mma_video    = gr.Video(label="Input Video")
                     mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel")
                     mma_neg      = gr.Textbox(label="Negative Prompt", placeholder="music, speech")
                     mma_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
@@ -850,7 +855,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
             mma_btn.click(
                 fn=_run_mmaudio,
-                inputs=[mma_video, mma_prompt, mma_neg, mma_seed,
                         mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
                 outputs=mma_slot_grps + mma_slot_vids + mma_slot_auds,
             )
@@ -861,7 +866,6 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
         with gr.Tab("HunyuanFoley"):
             with gr.Row():
                 with gr.Column():
-                    hf_video    = gr.Video(label="Input Video")
                     hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof")
                     hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh")
                     hf_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
@@ -887,7 +891,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
             hf_btn.click(
                 fn=_run_hunyuan,
-                inputs=[hf_video, hf_prompt, hf_neg, hf_seed,
                         hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
                 outputs=hf_slot_grps + hf_slot_vids + hf_slot_auds,
             )

         "| **HunyuanFoley** | Cinematic foley requiring high fidelity and explicit creative direction — dramatic SFX, layered environmental design, or any scene where you have a clear written description of the desired sound palette. | Quick one-shot clips where you don't want to write a prompt, or raw impact sounds where timing precision matters more than richness. |"
     )
+    # Shared video upload — persists across all model tabs
+    shared_video = gr.Video(label="Input Video", interactive=True)
+    shared_video_state = gr.State(value=None)
+    # Keep state in sync whenever the shared upload changes
+    shared_video.change(fn=lambda v: v, inputs=[shared_video], outputs=[shared_video_state])
     with gr.Tabs():
         # ---------------------------------------------------------- #
         with gr.Tab("TARO"):
             with gr.Row():
                 with gr.Column():
                     taro_seed    = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
                     taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=7.5, step=0.5)
                     taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1)
                 with gr.Column():
                     taro_slot_grps, taro_slot_vids, taro_slot_auds = _make_output_slots()
+            for trigger in [shared_video, taro_steps, taro_cf_dur]:
                 trigger.change(
                     fn=_on_video_upload_taro,
+                    inputs=[shared_video_state, taro_steps, taro_cf_dur],
                     outputs=[taro_samples],
                 )
             taro_samples.change(
             taro_btn.click(
                 fn=_run_taro,
+                inputs=[shared_video_state, taro_seed, taro_cfg, taro_steps, taro_mode,
                         taro_cf_dur, taro_cf_db, taro_samples],
                 outputs=taro_slot_grps + taro_slot_vids + taro_slot_auds,
             )
         with gr.Tab("MMAudio"):
             with gr.Row():
                 with gr.Column():
                     mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel")
                     mma_neg      = gr.Textbox(label="Negative Prompt", placeholder="music, speech")
                     mma_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
             mma_btn.click(
                 fn=_run_mmaudio,
+                inputs=[shared_video_state, mma_prompt, mma_neg, mma_seed,
                         mma_cfg, mma_steps, mma_cf_dur, mma_cf_db, mma_samples],
                 outputs=mma_slot_grps + mma_slot_vids + mma_slot_auds,
             )
         with gr.Tab("HunyuanFoley"):
             with gr.Row():
                 with gr.Column():
                     hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof")
                     hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh")
                     hf_seed     = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
             hf_btn.click(
                 fn=_run_hunyuan,
+                inputs=[shared_video_state, hf_prompt, hf_neg, hf_seed,
                         hf_guidance, hf_steps, hf_size, hf_cf_dur, hf_cf_db, hf_samples],
                 outputs=hf_slot_grps + hf_slot_vids + hf_slot_auds,
             )