Spaces:
Running on Zero
Running on Zero
Commit ·
537e4ca
1
Parent(s): 07afd9c
Standardise crossfade defaults to 2s/3dB across all three models
Browse filesMMAudio crossfade_s was 1.0 in function signature, duration callable, and
UI slider default — align to 2.0 to match TARO and HunyuanFoley.
All model inference defaults (CFG, steps, mode) already matched paper
recommendations: TARO 8.0/25/SDE, MMAudio 4.5/25, HunyuanFoley 4.5/50.
app.py
CHANGED
|
@@ -392,7 +392,7 @@ MMAUDIO_WINDOW = 8.0 # seconds — MMAudio's fixed generation window
|
|
| 392 |
|
| 393 |
def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
|
| 394 |
cfg_strength, num_steps, num_samples,
|
| 395 |
-
crossfade_s=
|
| 396 |
"""Pre-GPU callable: returns the GPU seconds to reserve for this MMAudio run."""
|
| 397 |
try:
|
| 398 |
total_s = get_video_duration(video_file)
|
|
@@ -406,7 +406,7 @@ def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
|
|
| 406 |
@spaces.GPU(duration=_mmaudio_duration)
|
| 407 |
def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
|
| 408 |
cfg_strength, num_steps, num_samples,
|
| 409 |
-
crossfade_s=
|
| 410 |
"""MMAudio: flow-matching video-to-audio, 44.1 kHz, 8 s sliding window."""
|
| 411 |
import sys as _sys, os as _os
|
| 412 |
_mmaudio_dir = _os.path.join(_os.path.dirname(_os.path.abspath(__file__)), "MMAudio")
|
|
@@ -797,7 +797,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
|
|
| 797 |
mma_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
|
| 798 |
mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5)
|
| 799 |
mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1)
|
| 800 |
-
mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=
|
| 801 |
mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3")
|
| 802 |
mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 803 |
mma_btn = gr.Button("Generate", variant="primary")
|
|
|
|
| 392 |
|
| 393 |
def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
|
| 394 |
cfg_strength, num_steps, num_samples,
|
| 395 |
+
crossfade_s=2.0, crossfade_db=3.0):
|
| 396 |
"""Pre-GPU callable: returns the GPU seconds to reserve for this MMAudio run."""
|
| 397 |
try:
|
| 398 |
total_s = get_video_duration(video_file)
|
|
|
|
| 406 |
@spaces.GPU(duration=_mmaudio_duration)
|
| 407 |
def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
|
| 408 |
cfg_strength, num_steps, num_samples,
|
| 409 |
+
crossfade_s=2.0, crossfade_db=3.0):
|
| 410 |
"""MMAudio: flow-matching video-to-audio, 44.1 kHz, 8 s sliding window."""
|
| 411 |
import sys as _sys, os as _os
|
| 412 |
_mmaudio_dir = _os.path.join(_os.path.dirname(_os.path.abspath(__file__)), "MMAudio")
|
|
|
|
| 797 |
mma_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
|
| 798 |
mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5)
|
| 799 |
mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1)
|
| 800 |
+
mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
|
| 801 |
mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3")
|
| 802 |
mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
|
| 803 |
mma_btn = gr.Button("Generate", variant="primary")
|