Spaces:

mvp-lab
/

audio_generation

Running on Zero

App Files Files Community

Yng314 commited on Feb 28

Commit

83f6055

1 Parent(s): 63f8087

refactor: Standardize audio preview generation for hard splice, rough, and final transitions using a shared comparison window and remove `_assemble_substitute_mix`.

Browse files

Files changed (1) hide show

pipeline/transition_generator.py +29 -18

pipeline/transition_generator.py CHANGED Viewed

@@ -1518,21 +1518,26 @@ def generate_transition_artifacts(request: TransitionRequest) -> TransitionResul
     LOGGER.info("Transition request args: %s", json.dumps(request.to_log_dict(), sort_keys=True))
     rough = _prepare_rough_transition(request)
     rough_stitched_audio = normalize_peak(
-        apply_edge_fades(rough["rough_stitched"].astype(np.float32), rough["target_sr"], fade_ms=25.0),
         peak=0.98,
     )
     write_wav(rough_stitched_path, rough_stitched_audio, rough["target_sr"])
-    # For UI clarity, keep hard-splice baseline as a short seam-focused preview:
-    # 2s before + 2s after the hard cut point.
-    hard_splice_full = np.concatenate([rough["song_a_prefix"], rough["song_b_suffix_substitute"]]).astype(np.float32)
-    seam_n = int(np.asarray(rough["song_a_prefix"]).size)
-    side_n = int(round(float(HARD_SPLICE_PREVIEW_SIDE_SEC) * float(rough["target_sr"])))
-    start_n = max(0, seam_n - side_n)
-    end_n = min(int(hard_splice_full.size), seam_n + side_n)
-    hard_splice_audio = hard_splice_full[start_n:end_n].astype(np.float32)
-    hard_splice_audio = normalize_peak(apply_edge_fades(hard_splice_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
-    write_wav(hard_splice_path, hard_splice_audio, rough["target_sr"])
     transition_audio = rough["rough_seam"]
     repaint_context_audio = rough["rough_stitched"]
@@ -1555,13 +1560,19 @@ def generate_transition_artifacts(request: TransitionRequest) -> TransitionResul
         sr=int(rough["target_sr"]),
     )
-    stitched_audio, boundary_mix_debug = _assemble_substitute_mix(
-        song_a_prefix=rough["song_a_prefix"],
-        transition=transition_audio,
-        song_b_suffix=rough["song_b_suffix_substitute"],
-        boundary_fade_n=int(rough.get("boundary_fade_n", 0)),
-        sr=int(rough["target_sr"]),
-    )
     transition_audio = normalize_peak(apply_edge_fades(transition_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
     stitched_audio = normalize_peak(apply_edge_fades(stitched_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)

     LOGGER.info("Transition request args: %s", json.dumps(request.to_log_dict(), sort_keys=True))
     rough = _prepare_rough_transition(request)
+    # Build a shared comparison window: Song A tail + (middle) + Song B head.
+    # This keeps the same start/end anchors across hard/rough/final for direct listening comparison.
+    side_n = int(round(float(HARD_SPLICE_PREVIEW_SIDE_SEC) * float(rough["target_sr"])))
+    song_a_prefix = np.asarray(rough["song_a_prefix"], dtype=np.float32)
+    song_b_suffix = np.asarray(rough["song_b_suffix_substitute"], dtype=np.float32)
+    comparison_a_context = song_a_prefix[-side_n:].astype(np.float32) if side_n > 0 else np.zeros((0,), dtype=np.float32)
+    comparison_b_context = song_b_suffix[:side_n].astype(np.float32) if side_n > 0 else np.zeros((0,), dtype=np.float32)
+    hard_splice_audio = np.concatenate([comparison_a_context, comparison_b_context]).astype(np.float32)
+    hard_splice_audio = normalize_peak(apply_edge_fades(hard_splice_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
+    write_wav(hard_splice_path, hard_splice_audio, rough["target_sr"])
+    rough_stitched_audio = np.concatenate(
+        [comparison_a_context, rough["rough_seam"].astype(np.float32), comparison_b_context]
+    ).astype(np.float32)
     rough_stitched_audio = normalize_peak(
+        apply_edge_fades(rough_stitched_audio, rough["target_sr"], fade_ms=25.0),
         peak=0.98,
     )
     write_wav(rough_stitched_path, rough_stitched_audio, rough["target_sr"])
     transition_audio = rough["rough_seam"]
     repaint_context_audio = rough["rough_stitched"]
         sr=int(rough["target_sr"]),
     )
+    stitched_audio = np.concatenate(
+        [comparison_a_context, transition_audio.astype(np.float32), comparison_b_context]
+    ).astype(np.float32)
+    boundary_mix_debug = {
+        "method": "shared-anchor-comparison-window",
+        "side_sec": float(HARD_SPLICE_PREVIEW_SIDE_SEC),
+        "song_a_context_sec": round(float(comparison_a_context.size / max(1, rough["target_sr"])), 3),
+        "transition_sec": round(float(np.asarray(transition_audio).size / max(1, rough["target_sr"])), 3),
+        "song_b_context_sec": round(float(comparison_b_context.size / max(1, rough["target_sr"])), 3),
+        "hard_splice_sec": round(float(hard_splice_audio.size / max(1, rough["target_sr"])), 3),
+        "rough_stitched_sec": round(float(rough_stitched_audio.size / max(1, rough["target_sr"])), 3),
+        "stitched_sec": round(float(stitched_audio.size / max(1, rough["target_sr"])), 3),
+    }
     transition_audio = normalize_peak(apply_edge_fades(transition_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
     stitched_audio = normalize_peak(apply_edge_fades(stitched_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)