Yng314 commited on
Commit
83f6055
·
1 Parent(s): 63f8087

refactor: Standardize audio preview generation for hard splice, rough, and final transitions using a shared comparison window and remove `_assemble_substitute_mix`.

Browse files
Files changed (1) hide show
  1. pipeline/transition_generator.py +29 -18
pipeline/transition_generator.py CHANGED
@@ -1518,21 +1518,26 @@ def generate_transition_artifacts(request: TransitionRequest) -> TransitionResul
1518
 
1519
  LOGGER.info("Transition request args: %s", json.dumps(request.to_log_dict(), sort_keys=True))
1520
  rough = _prepare_rough_transition(request)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1521
  rough_stitched_audio = normalize_peak(
1522
- apply_edge_fades(rough["rough_stitched"].astype(np.float32), rough["target_sr"], fade_ms=25.0),
1523
  peak=0.98,
1524
  )
1525
  write_wav(rough_stitched_path, rough_stitched_audio, rough["target_sr"])
1526
- # For UI clarity, keep hard-splice baseline as a short seam-focused preview:
1527
- # 2s before + 2s after the hard cut point.
1528
- hard_splice_full = np.concatenate([rough["song_a_prefix"], rough["song_b_suffix_substitute"]]).astype(np.float32)
1529
- seam_n = int(np.asarray(rough["song_a_prefix"]).size)
1530
- side_n = int(round(float(HARD_SPLICE_PREVIEW_SIDE_SEC) * float(rough["target_sr"])))
1531
- start_n = max(0, seam_n - side_n)
1532
- end_n = min(int(hard_splice_full.size), seam_n + side_n)
1533
- hard_splice_audio = hard_splice_full[start_n:end_n].astype(np.float32)
1534
- hard_splice_audio = normalize_peak(apply_edge_fades(hard_splice_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
1535
- write_wav(hard_splice_path, hard_splice_audio, rough["target_sr"])
1536
 
1537
  transition_audio = rough["rough_seam"]
1538
  repaint_context_audio = rough["rough_stitched"]
@@ -1555,13 +1560,19 @@ def generate_transition_artifacts(request: TransitionRequest) -> TransitionResul
1555
  sr=int(rough["target_sr"]),
1556
  )
1557
 
1558
- stitched_audio, boundary_mix_debug = _assemble_substitute_mix(
1559
- song_a_prefix=rough["song_a_prefix"],
1560
- transition=transition_audio,
1561
- song_b_suffix=rough["song_b_suffix_substitute"],
1562
- boundary_fade_n=int(rough.get("boundary_fade_n", 0)),
1563
- sr=int(rough["target_sr"]),
1564
- )
 
 
 
 
 
 
1565
 
1566
  transition_audio = normalize_peak(apply_edge_fades(transition_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
1567
  stitched_audio = normalize_peak(apply_edge_fades(stitched_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
 
1518
 
1519
  LOGGER.info("Transition request args: %s", json.dumps(request.to_log_dict(), sort_keys=True))
1520
  rough = _prepare_rough_transition(request)
1521
+ # Build a shared comparison window: Song A tail + (middle) + Song B head.
1522
+ # This keeps the same start/end anchors across hard/rough/final for direct listening comparison.
1523
+ side_n = int(round(float(HARD_SPLICE_PREVIEW_SIDE_SEC) * float(rough["target_sr"])))
1524
+ song_a_prefix = np.asarray(rough["song_a_prefix"], dtype=np.float32)
1525
+ song_b_suffix = np.asarray(rough["song_b_suffix_substitute"], dtype=np.float32)
1526
+ comparison_a_context = song_a_prefix[-side_n:].astype(np.float32) if side_n > 0 else np.zeros((0,), dtype=np.float32)
1527
+ comparison_b_context = song_b_suffix[:side_n].astype(np.float32) if side_n > 0 else np.zeros((0,), dtype=np.float32)
1528
+
1529
+ hard_splice_audio = np.concatenate([comparison_a_context, comparison_b_context]).astype(np.float32)
1530
+ hard_splice_audio = normalize_peak(apply_edge_fades(hard_splice_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
1531
+ write_wav(hard_splice_path, hard_splice_audio, rough["target_sr"])
1532
+
1533
+ rough_stitched_audio = np.concatenate(
1534
+ [comparison_a_context, rough["rough_seam"].astype(np.float32), comparison_b_context]
1535
+ ).astype(np.float32)
1536
  rough_stitched_audio = normalize_peak(
1537
+ apply_edge_fades(rough_stitched_audio, rough["target_sr"], fade_ms=25.0),
1538
  peak=0.98,
1539
  )
1540
  write_wav(rough_stitched_path, rough_stitched_audio, rough["target_sr"])
 
 
 
 
 
 
 
 
 
 
1541
 
1542
  transition_audio = rough["rough_seam"]
1543
  repaint_context_audio = rough["rough_stitched"]
 
1560
  sr=int(rough["target_sr"]),
1561
  )
1562
 
1563
+ stitched_audio = np.concatenate(
1564
+ [comparison_a_context, transition_audio.astype(np.float32), comparison_b_context]
1565
+ ).astype(np.float32)
1566
+ boundary_mix_debug = {
1567
+ "method": "shared-anchor-comparison-window",
1568
+ "side_sec": float(HARD_SPLICE_PREVIEW_SIDE_SEC),
1569
+ "song_a_context_sec": round(float(comparison_a_context.size / max(1, rough["target_sr"])), 3),
1570
+ "transition_sec": round(float(np.asarray(transition_audio).size / max(1, rough["target_sr"])), 3),
1571
+ "song_b_context_sec": round(float(comparison_b_context.size / max(1, rough["target_sr"])), 3),
1572
+ "hard_splice_sec": round(float(hard_splice_audio.size / max(1, rough["target_sr"])), 3),
1573
+ "rough_stitched_sec": round(float(rough_stitched_audio.size / max(1, rough["target_sr"])), 3),
1574
+ "stitched_sec": round(float(stitched_audio.size / max(1, rough["target_sr"])), 3),
1575
+ }
1576
 
1577
  transition_audio = normalize_peak(apply_edge_fades(transition_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
1578
  stitched_audio = normalize_peak(apply_edge_fades(stitched_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)