BoxOfColors commited on
Commit
537e4ca
·
1 Parent(s): 07afd9c

Standardise crossfade defaults to 2s/3dB across all three models

Browse files

MMAudio crossfade_s was 1.0 in function signature, duration callable, and
UI slider default — align to 2.0 to match TARO and HunyuanFoley.
All model inference defaults (CFG, steps, mode) already matched paper
recommendations: TARO 8.0/25/SDE, MMAudio 4.5/25, HunyuanFoley 4.5/50.

Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -392,7 +392,7 @@ MMAUDIO_WINDOW = 8.0 # seconds — MMAudio's fixed generation window
392
 
393
  def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
394
  cfg_strength, num_steps, num_samples,
395
- crossfade_s=1.0, crossfade_db=3.0):
396
  """Pre-GPU callable: returns the GPU seconds to reserve for this MMAudio run."""
397
  try:
398
  total_s = get_video_duration(video_file)
@@ -406,7 +406,7 @@ def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
406
  @spaces.GPU(duration=_mmaudio_duration)
407
  def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
408
  cfg_strength, num_steps, num_samples,
409
- crossfade_s=1.0, crossfade_db=3.0):
410
  """MMAudio: flow-matching video-to-audio, 44.1 kHz, 8 s sliding window."""
411
  import sys as _sys, os as _os
412
  _mmaudio_dir = _os.path.join(_os.path.dirname(_os.path.abspath(__file__)), "MMAudio")
@@ -797,7 +797,7 @@ with gr.Blocks(title="Generate Audio for Video") as demo:
797
  mma_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
798
  mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5)
799
  mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1)
800
- mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=1, step=0.1)
801
  mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3")
802
  mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
803
  mma_btn = gr.Button("Generate", variant="primary")
 
392
 
393
  def _mmaudio_duration(video_file, prompt, negative_prompt, seed_val,
394
  cfg_strength, num_steps, num_samples,
395
+ crossfade_s=2.0, crossfade_db=3.0):
396
  """Pre-GPU callable: returns the GPU seconds to reserve for this MMAudio run."""
397
  try:
398
  total_s = get_video_duration(video_file)
 
406
  @spaces.GPU(duration=_mmaudio_duration)
407
  def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
408
  cfg_strength, num_steps, num_samples,
409
+ crossfade_s=2.0, crossfade_db=3.0):
410
  """MMAudio: flow-matching video-to-audio, 44.1 kHz, 8 s sliding window."""
411
  import sys as _sys, os as _os
412
  _mmaudio_dir = _os.path.join(_os.path.dirname(_os.path.abspath(__file__)), "MMAudio")
 
797
  mma_seed = gr.Number(label="Seed (-1 = random)", value=get_random_seed(), precision=0)
798
  mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5)
799
  mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1)
800
+ mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1)
801
  mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3")
802
  mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
803
  mma_btn = gr.Button("Generate", variant="primary")