BoxOfColors commited on
Commit
5cf7a39
·
1 Parent(s): 51979c2

fix: cap crossfade slider max to 4s and clamp in _build_segments

Browse files

Crossfade duration max was 8s — equal to MMAudio's window (8.0s) and
nearly equal to TARO's (8.192s), which would produce step_s <= 0 and
degenerate segments. With cross-model regen the crossfade must be safe
for the smallest window. Cap sliders to 4s and add a safety clamp in
_build_segments (crossfade <= half window) to guard against stale metadata.

Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -279,6 +279,8 @@ def mux_video_audio(silent_video: str, audio_path: str, output_path: str) -> Non
279
  def _build_segments(total_dur_s: float, window_s: float, crossfade_s: float) -> list[tuple[float, float]]:
280
  """Return list of (start, end) pairs covering *total_dur_s* with a sliding
281
  window of *window_s* and *crossfade_s* overlap between consecutive segments."""
 
 
282
  if total_dur_s <= window_s:
283
  return [(0.0, total_dur_s)]
284
  step_s = window_s - crossfade_s
@@ -2399,7 +2401,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
2399
  taro_cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=8.0, step=0.5, elem_id="taro_cfg")
2400
  taro_steps = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1, elem_id="taro_steps")
2401
  taro_mode = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde", elem_id="taro_mode")
2402
- taro_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="taro_cf_dur")
2403
  taro_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="taro_cf_db")
2404
  taro_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
2405
  taro_btn = gr.Button("Generate", variant="primary")
@@ -2465,7 +2467,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
2465
  mma_seed = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="mma_seed")
2466
  mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="mma_cfg")
2467
  mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1, elem_id="mma_steps")
2468
- mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="mma_cf_dur")
2469
  mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="mma_cf_db")
2470
  mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
2471
  mma_btn = gr.Button("Generate", variant="primary")
@@ -2520,7 +2522,7 @@ with gr.Blocks(title="Generate Audio for Video", css=_SLOT_CSS, js=_GLOBAL_JS) a
2520
  hf_guidance = gr.Slider(label="Guidance Scale", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="hf_guidance")
2521
  hf_steps = gr.Slider(label="Steps", minimum=10, maximum=100, value=50, step=5, elem_id="hf_steps")
2522
  hf_size = gr.Radio(label="Model Size", choices=["xl", "xxl"], value="xxl", elem_id="hf_size")
2523
- hf_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=8, value=2, step=0.1, elem_id="hf_cf_dur")
2524
  hf_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="hf_cf_db")
2525
  hf_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
2526
  hf_btn = gr.Button("Generate", variant="primary")
 
279
  def _build_segments(total_dur_s: float, window_s: float, crossfade_s: float) -> list[tuple[float, float]]:
280
  """Return list of (start, end) pairs covering *total_dur_s* with a sliding
281
  window of *window_s* and *crossfade_s* overlap between consecutive segments."""
282
+ # Safety: clamp crossfade to < half the window so step_s stays positive
283
+ crossfade_s = min(crossfade_s, window_s * 0.5)
284
  if total_dur_s <= window_s:
285
  return [(0.0, total_dur_s)]
286
  step_s = window_s - crossfade_s
 
2401
  taro_cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=15, value=8.0, step=0.5, elem_id="taro_cfg")
2402
  taro_steps = gr.Slider(label="Sampling Steps", minimum=10, maximum=50, value=25, step=1, elem_id="taro_steps")
2403
  taro_mode = gr.Radio(label="Sampling Mode", choices=["sde", "ode"], value="sde", elem_id="taro_mode")
2404
+ taro_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=4, value=2, step=0.1, elem_id="taro_cf_dur")
2405
  taro_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="taro_cf_db")
2406
  taro_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
2407
  taro_btn = gr.Button("Generate", variant="primary")
 
2467
  mma_seed = gr.Number(label="Seed (-1 = random)", value=-1, precision=0, elem_id="mma_seed")
2468
  mma_cfg = gr.Slider(label="CFG Strength", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="mma_cfg")
2469
  mma_steps = gr.Slider(label="Steps", minimum=10, maximum=50, value=25, step=1, elem_id="mma_steps")
2470
+ mma_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=4, value=2, step=0.1, elem_id="mma_cf_dur")
2471
  mma_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="mma_cf_db")
2472
  mma_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
2473
  mma_btn = gr.Button("Generate", variant="primary")
 
2522
  hf_guidance = gr.Slider(label="Guidance Scale", minimum=1, maximum=10, value=4.5, step=0.5, elem_id="hf_guidance")
2523
  hf_steps = gr.Slider(label="Steps", minimum=10, maximum=100, value=50, step=5, elem_id="hf_steps")
2524
  hf_size = gr.Radio(label="Model Size", choices=["xl", "xxl"], value="xxl", elem_id="hf_size")
2525
+ hf_cf_dur = gr.Slider(label="Crossfade Duration (s)", minimum=0, maximum=4, value=2, step=0.1, elem_id="hf_cf_dur")
2526
  hf_cf_db = gr.Textbox(label="Crossfade Boost (dB)", value="3", elem_id="hf_cf_db")
2527
  hf_samples = gr.Slider(label="Generations", minimum=1, maximum=MAX_SLOTS, value=1, step=1)
2528
  hf_btn = gr.Button("Generate", variant="primary")