Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

App Files Files Community

BoxOfColors commited on 7 days ago

Commit

db26e7f

1 Parent(s): ba77b8a

Fix: replace custom transcode with gr.Video format=mp4; remove upload handler

Browse files

Files changed (1) hide show

app.py +3 -40

app.py CHANGED Viewed

@@ -156,36 +156,6 @@ def strip_audio_from_video(video_path: str, output_path: str) -> None:
         overwrite_output=True, quiet=True
     )
-def _transcode_for_browser(video_path: str) -> str:
-    """Re-encode *video_path* to H.264 baseline + AAC so all browsers can preview it.
-    Returns the path to the transcoded file (written alongside the original).
-    Falls back to returning the original path if transcoding fails.
-    """
-    if video_path is None:
-        return video_path
-    try:
-        # Probe to check if audio stream exists
-        probe = ffmpeg.probe(video_path)
-        has_audio = any(s["codec_type"] == "audio" for s in probe.get("streams", []))
-        out_path = video_path.rsplit(".", 1)[0] + "_browser.mp4"
-        stream = ffmpeg.input(video_path)
-        output_kwargs = dict(
-            vcodec="libx264", preset="fast", crf=18,
-            profile="baseline", level="3.0",
-            pix_fmt="yuv420p",
-            movflags="+faststart",
-        )
-        if has_audio:
-            output_kwargs["acodec"] = "aac"
-            output_kwargs["audio_bitrate"] = "128k"
-        else:
-            output_kwargs["an"] = None  # no audio track
-        stream.output(out_path, **output_kwargs).run(overwrite_output=True, quiet=True)
-        return out_path
-    except Exception as e:
-        print(f"[transcode_for_browser] failed, using original: {e}")
-        return video_path
 # ------------------------------------------------------------------ #
 # Temp directory registry — tracks dirs for cleanup on new generation #
@@ -2582,7 +2552,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
         with gr.Tab("TARO"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    taro_video   = gr.Video(label="Input Video")
                     taro_seed    = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="taro_seed")
                     taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=8.0, step=0.5, elem_id="taro_cfg")
                     taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1, elem_id="taro_steps")
@@ -2647,7 +2617,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
         with gr.Tab("MMAudio"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    mma_video    = gr.Video(label="Input Video")
                     mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel", elem_id="mma_prompt")
                     mma_neg      = gr.Textbox(label="Negative Prompt", value="music", placeholder="music, speech", elem_id="mma_neg")
                     mma_seed     = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="mma_seed")
@@ -2701,7 +2671,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
         with gr.Tab("HunyuanFoley"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    hf_video    = gr.Video(label="Input Video")
                     hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof", elem_id="hf_prompt")
                     hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh", elem_id="hf_neg")
                     hf_seed     = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="hf_seed")
@@ -2750,13 +2720,6 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
                 hf_slot_vids, hf_slot_waves,
             )
-    # ---- Browser-safe transcode on upload ----
-    # Re-encodes uploaded video to H.264 baseline so all browsers can preview it.
-    # Each tab transcodes independently; cross-tab sync propagates the result.
-    taro_video.upload(fn=_transcode_for_browser, inputs=[taro_video], outputs=[taro_video])
-    mma_video.upload(fn=_transcode_for_browser,  inputs=[mma_video],  outputs=[mma_video])
-    hf_video.upload(fn=_transcode_for_browser,   inputs=[hf_video],   outputs=[hf_video])
     # ---- Cross-tab video sync ----
     _sync = lambda v: (gr.update(value=v), gr.update(value=v))
     taro_video.change(fn=_sync, inputs=[taro_video], outputs=[mma_video, hf_video])

         overwrite_output=True, quiet=True
     )
 # ------------------------------------------------------------------ #
 # Temp directory registry — tracks dirs for cleanup on new generation #
         with gr.Tab("TARO"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    taro_video   = gr.Video(label="Input Video", format="mp4")
                     taro_seed    = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="taro_seed")
                     taro_cfg     = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=8.0, step=0.5, elem_id="taro_cfg")
                     taro_steps   = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1, elem_id="taro_steps")
         with gr.Tab("MMAudio"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    mma_video    = gr.Video(label="Input Video", format="mp4")
                     mma_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. footsteps on gravel", elem_id="mma_prompt")
                     mma_neg      = gr.Textbox(label="Negative Prompt", value="music", placeholder="music, speech", elem_id="mma_neg")
                     mma_seed     = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="mma_seed")
         with gr.Tab("HunyuanFoley"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    hf_video    = gr.Video(label="Input Video", format="mp4")
                     hf_prompt   = gr.Textbox(label="Prompt", placeholder="e.g. rain hitting a metal roof", elem_id="hf_prompt")
                     hf_neg      = gr.Textbox(label="Negative Prompt", value="noisy, harsh", elem_id="hf_neg")
                     hf_seed     = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="hf_seed")
                 hf_slot_vids, hf_slot_waves,
             )
     # ---- Cross-tab video sync ----
     _sync = lambda v: (gr.update(value=v), gr.update(value=v))
     taro_video.change(fn=_sync, inputs=[taro_video], outputs=[mma_video, hf_video])