BoxOfColors commited on
Commit
1c5fa8d
·
1 Parent(s): a5f92a7

Fix crossfade: use equal-power fade envelopes in all 3 models

Browse files

All three models had flat summation (both segments at full volume
simultaneously) during the overlap region, causing loud bumps at
segment joints. Replace with equal-power crossfade using cos/sin
fade-out/fade-in envelopes, which maintains constant perceived
loudness through the transition. Applies to TARO, MMAudio, and
HunyuanFoley.

Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -217,8 +217,12 @@ def _crossfade_join(wav_a: np.ndarray, wav_b: np.ndarray,
217
  cf = min(cf, len(wav_a), len(wav_b))
218
  if cf <= 0:
219
  return np.concatenate([wav_a, wav_b])
220
- gain = 10 ** (db_boost / 20.0)
221
- overlap = wav_a[-cf:] * gain + wav_b[:cf] * gain
 
 
 
 
222
  return np.concatenate([wav_a[:-cf], overlap, wav_b[cf:]])
223
 
224
 
@@ -475,14 +479,17 @@ def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
475
  wav = wav[:, :seg_samples]
476
  seg_audios.append(wav)
477
 
478
- # Crossfade-stitch all segments
479
  def _cf_join(a, b, cf_s):
480
  cf = int(round(cf_s * sr))
481
  cf = min(cf, a.shape[1], b.shape[1])
482
  if cf <= 0:
483
  return np.concatenate([a, b], axis=1)
484
  gain = 10 ** (MMA_CF_DB / 20.0)
485
- overlap = a[:, -cf:] * gain + b[:, :cf] * gain
 
 
 
486
  return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
487
 
488
  full_wav = seg_audios[0]
@@ -619,14 +626,17 @@ def generate_hunyuan(video_file, prompt, negative_prompt, seed_val,
619
  wav = wav[:, :seg_samples]
620
  seg_wavs.append(wav)
621
 
622
- # Stitch segments with crossfade (operates on (channels, samples) arrays)
623
  def _cf_join_stereo(a, b, cf_s, db):
624
  cf = int(round(cf_s * sr))
625
  cf = min(cf, a.shape[1], b.shape[1])
626
  if cf <= 0:
627
  return np.concatenate([a, b], axis=1)
628
  gain = 10 ** (db / 20.0)
629
- overlap = a[:, -cf:] * gain + b[:, :cf] * gain
 
 
 
630
  return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
631
 
632
  full_wav = seg_wavs[0]
 
217
  cf = min(cf, len(wav_a), len(wav_b))
218
  if cf <= 0:
219
  return np.concatenate([wav_a, wav_b])
220
+ gain = 10 ** (db_boost / 20.0)
221
+ # Equal-power fade: fade-out a, fade-in b over the overlap region
222
+ t = np.linspace(0.0, 1.0, cf, dtype=np.float32)
223
+ fade_out = np.cos(t * np.pi / 2) # 1 → 0
224
+ fade_in = np.sin(t * np.pi / 2) # 0 → 1
225
+ overlap = wav_a[-cf:] * fade_out * gain + wav_b[:cf] * fade_in * gain
226
  return np.concatenate([wav_a[:-cf], overlap, wav_b[cf:]])
227
 
228
 
 
479
  wav = wav[:, :seg_samples]
480
  seg_audios.append(wav)
481
 
482
+ # Crossfade-stitch all segments (equal-power fade)
483
  def _cf_join(a, b, cf_s):
484
  cf = int(round(cf_s * sr))
485
  cf = min(cf, a.shape[1], b.shape[1])
486
  if cf <= 0:
487
  return np.concatenate([a, b], axis=1)
488
  gain = 10 ** (MMA_CF_DB / 20.0)
489
+ t = np.linspace(0.0, 1.0, cf, dtype=np.float32)
490
+ fade_out = np.cos(t * np.pi / 2)
491
+ fade_in = np.sin(t * np.pi / 2)
492
+ overlap = a[:, -cf:] * fade_out * gain + b[:, :cf] * fade_in * gain
493
  return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
494
 
495
  full_wav = seg_audios[0]
 
626
  wav = wav[:, :seg_samples]
627
  seg_wavs.append(wav)
628
 
629
+ # Stitch segments with equal-power crossfade (operates on (channels, samples) arrays)
630
  def _cf_join_stereo(a, b, cf_s, db):
631
  cf = int(round(cf_s * sr))
632
  cf = min(cf, a.shape[1], b.shape[1])
633
  if cf <= 0:
634
  return np.concatenate([a, b], axis=1)
635
  gain = 10 ** (db / 20.0)
636
+ t = np.linspace(0.0, 1.0, cf, dtype=np.float32)
637
+ fade_out = np.cos(t * np.pi / 2)
638
+ fade_in = np.sin(t * np.pi / 2)
639
+ overlap = a[:, -cf:] * fade_out * gain + b[:, :cf] * fade_in * gain
640
  return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
641
 
642
  full_wav = seg_wavs[0]