Spaces:
Running on Zero
Running on Zero
Commit ·
1c5fa8d
1
Parent(s): a5f92a7
Fix crossfade: use equal-power fade envelopes in all 3 models
Browse filesAll three models had flat summation (both segments at full volume
simultaneously) during the overlap region, causing loud bumps at
segment joints. Replace with equal-power crossfade using cos/sin
fade-out/fade-in envelopes, which maintains constant perceived
loudness through the transition. Applies to TARO, MMAudio, and
HunyuanFoley.
app.py
CHANGED
|
@@ -217,8 +217,12 @@ def _crossfade_join(wav_a: np.ndarray, wav_b: np.ndarray,
|
|
| 217 |
cf = min(cf, len(wav_a), len(wav_b))
|
| 218 |
if cf <= 0:
|
| 219 |
return np.concatenate([wav_a, wav_b])
|
| 220 |
-
gain
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
return np.concatenate([wav_a[:-cf], overlap, wav_b[cf:]])
|
| 223 |
|
| 224 |
|
|
@@ -475,14 +479,17 @@ def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
|
|
| 475 |
wav = wav[:, :seg_samples]
|
| 476 |
seg_audios.append(wav)
|
| 477 |
|
| 478 |
-
# Crossfade-stitch all segments
|
| 479 |
def _cf_join(a, b, cf_s):
|
| 480 |
cf = int(round(cf_s * sr))
|
| 481 |
cf = min(cf, a.shape[1], b.shape[1])
|
| 482 |
if cf <= 0:
|
| 483 |
return np.concatenate([a, b], axis=1)
|
| 484 |
gain = 10 ** (MMA_CF_DB / 20.0)
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
| 486 |
return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
|
| 487 |
|
| 488 |
full_wav = seg_audios[0]
|
|
@@ -619,14 +626,17 @@ def generate_hunyuan(video_file, prompt, negative_prompt, seed_val,
|
|
| 619 |
wav = wav[:, :seg_samples]
|
| 620 |
seg_wavs.append(wav)
|
| 621 |
|
| 622 |
-
# Stitch segments with crossfade (operates on (channels, samples) arrays)
|
| 623 |
def _cf_join_stereo(a, b, cf_s, db):
|
| 624 |
cf = int(round(cf_s * sr))
|
| 625 |
cf = min(cf, a.shape[1], b.shape[1])
|
| 626 |
if cf <= 0:
|
| 627 |
return np.concatenate([a, b], axis=1)
|
| 628 |
gain = 10 ** (db / 20.0)
|
| 629 |
-
|
|
|
|
|
|
|
|
|
|
| 630 |
return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
|
| 631 |
|
| 632 |
full_wav = seg_wavs[0]
|
|
|
|
| 217 |
cf = min(cf, len(wav_a), len(wav_b))
|
| 218 |
if cf <= 0:
|
| 219 |
return np.concatenate([wav_a, wav_b])
|
| 220 |
+
gain = 10 ** (db_boost / 20.0)
|
| 221 |
+
# Equal-power fade: fade-out a, fade-in b over the overlap region
|
| 222 |
+
t = np.linspace(0.0, 1.0, cf, dtype=np.float32)
|
| 223 |
+
fade_out = np.cos(t * np.pi / 2) # 1 → 0
|
| 224 |
+
fade_in = np.sin(t * np.pi / 2) # 0 → 1
|
| 225 |
+
overlap = wav_a[-cf:] * fade_out * gain + wav_b[:cf] * fade_in * gain
|
| 226 |
return np.concatenate([wav_a[:-cf], overlap, wav_b[cf:]])
|
| 227 |
|
| 228 |
|
|
|
|
| 479 |
wav = wav[:, :seg_samples]
|
| 480 |
seg_audios.append(wav)
|
| 481 |
|
| 482 |
+
# Crossfade-stitch all segments (equal-power fade)
|
| 483 |
def _cf_join(a, b, cf_s):
|
| 484 |
cf = int(round(cf_s * sr))
|
| 485 |
cf = min(cf, a.shape[1], b.shape[1])
|
| 486 |
if cf <= 0:
|
| 487 |
return np.concatenate([a, b], axis=1)
|
| 488 |
gain = 10 ** (MMA_CF_DB / 20.0)
|
| 489 |
+
t = np.linspace(0.0, 1.0, cf, dtype=np.float32)
|
| 490 |
+
fade_out = np.cos(t * np.pi / 2)
|
| 491 |
+
fade_in = np.sin(t * np.pi / 2)
|
| 492 |
+
overlap = a[:, -cf:] * fade_out * gain + b[:, :cf] * fade_in * gain
|
| 493 |
return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
|
| 494 |
|
| 495 |
full_wav = seg_audios[0]
|
|
|
|
| 626 |
wav = wav[:, :seg_samples]
|
| 627 |
seg_wavs.append(wav)
|
| 628 |
|
| 629 |
+
# Stitch segments with equal-power crossfade (operates on (channels, samples) arrays)
|
| 630 |
def _cf_join_stereo(a, b, cf_s, db):
|
| 631 |
cf = int(round(cf_s * sr))
|
| 632 |
cf = min(cf, a.shape[1], b.shape[1])
|
| 633 |
if cf <= 0:
|
| 634 |
return np.concatenate([a, b], axis=1)
|
| 635 |
gain = 10 ** (db / 20.0)
|
| 636 |
+
t = np.linspace(0.0, 1.0, cf, dtype=np.float32)
|
| 637 |
+
fade_out = np.cos(t * np.pi / 2)
|
| 638 |
+
fade_in = np.sin(t * np.pi / 2)
|
| 639 |
+
overlap = a[:, -cf:] * fade_out * gain + b[:, :cf] * fade_in * gain
|
| 640 |
return np.concatenate([a[:, :-cf], overlap, b[:, cf:]], axis=1)
|
| 641 |
|
| 642 |
full_wav = seg_wavs[0]
|