Spaces:
Running on Zero
Running on Zero
Yng314 commited on
Commit ·
83f6055
1
Parent(s): 63f8087
refactor: Standardize audio preview generation for hard splice, rough, and final transitions using a shared comparison window and remove `_assemble_substitute_mix`.
Browse files- pipeline/transition_generator.py +29 -18
pipeline/transition_generator.py
CHANGED
|
@@ -1518,21 +1518,26 @@ def generate_transition_artifacts(request: TransitionRequest) -> TransitionResul
|
|
| 1518 |
|
| 1519 |
LOGGER.info("Transition request args: %s", json.dumps(request.to_log_dict(), sort_keys=True))
|
| 1520 |
rough = _prepare_rough_transition(request)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1521 |
rough_stitched_audio = normalize_peak(
|
| 1522 |
-
apply_edge_fades(
|
| 1523 |
peak=0.98,
|
| 1524 |
)
|
| 1525 |
write_wav(rough_stitched_path, rough_stitched_audio, rough["target_sr"])
|
| 1526 |
-
# For UI clarity, keep hard-splice baseline as a short seam-focused preview:
|
| 1527 |
-
# 2s before + 2s after the hard cut point.
|
| 1528 |
-
hard_splice_full = np.concatenate([rough["song_a_prefix"], rough["song_b_suffix_substitute"]]).astype(np.float32)
|
| 1529 |
-
seam_n = int(np.asarray(rough["song_a_prefix"]).size)
|
| 1530 |
-
side_n = int(round(float(HARD_SPLICE_PREVIEW_SIDE_SEC) * float(rough["target_sr"])))
|
| 1531 |
-
start_n = max(0, seam_n - side_n)
|
| 1532 |
-
end_n = min(int(hard_splice_full.size), seam_n + side_n)
|
| 1533 |
-
hard_splice_audio = hard_splice_full[start_n:end_n].astype(np.float32)
|
| 1534 |
-
hard_splice_audio = normalize_peak(apply_edge_fades(hard_splice_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
|
| 1535 |
-
write_wav(hard_splice_path, hard_splice_audio, rough["target_sr"])
|
| 1536 |
|
| 1537 |
transition_audio = rough["rough_seam"]
|
| 1538 |
repaint_context_audio = rough["rough_stitched"]
|
|
@@ -1555,13 +1560,19 @@ def generate_transition_artifacts(request: TransitionRequest) -> TransitionResul
|
|
| 1555 |
sr=int(rough["target_sr"]),
|
| 1556 |
)
|
| 1557 |
|
| 1558 |
-
stitched_audio
|
| 1559 |
-
|
| 1560 |
-
|
| 1561 |
-
|
| 1562 |
-
|
| 1563 |
-
|
| 1564 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1565 |
|
| 1566 |
transition_audio = normalize_peak(apply_edge_fades(transition_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
|
| 1567 |
stitched_audio = normalize_peak(apply_edge_fades(stitched_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
|
|
|
|
| 1518 |
|
| 1519 |
LOGGER.info("Transition request args: %s", json.dumps(request.to_log_dict(), sort_keys=True))
|
| 1520 |
rough = _prepare_rough_transition(request)
|
| 1521 |
+
# Build a shared comparison window: Song A tail + (middle) + Song B head.
|
| 1522 |
+
# This keeps the same start/end anchors across hard/rough/final for direct listening comparison.
|
| 1523 |
+
side_n = int(round(float(HARD_SPLICE_PREVIEW_SIDE_SEC) * float(rough["target_sr"])))
|
| 1524 |
+
song_a_prefix = np.asarray(rough["song_a_prefix"], dtype=np.float32)
|
| 1525 |
+
song_b_suffix = np.asarray(rough["song_b_suffix_substitute"], dtype=np.float32)
|
| 1526 |
+
comparison_a_context = song_a_prefix[-side_n:].astype(np.float32) if side_n > 0 else np.zeros((0,), dtype=np.float32)
|
| 1527 |
+
comparison_b_context = song_b_suffix[:side_n].astype(np.float32) if side_n > 0 else np.zeros((0,), dtype=np.float32)
|
| 1528 |
+
|
| 1529 |
+
hard_splice_audio = np.concatenate([comparison_a_context, comparison_b_context]).astype(np.float32)
|
| 1530 |
+
hard_splice_audio = normalize_peak(apply_edge_fades(hard_splice_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
|
| 1531 |
+
write_wav(hard_splice_path, hard_splice_audio, rough["target_sr"])
|
| 1532 |
+
|
| 1533 |
+
rough_stitched_audio = np.concatenate(
|
| 1534 |
+
[comparison_a_context, rough["rough_seam"].astype(np.float32), comparison_b_context]
|
| 1535 |
+
).astype(np.float32)
|
| 1536 |
rough_stitched_audio = normalize_peak(
|
| 1537 |
+
apply_edge_fades(rough_stitched_audio, rough["target_sr"], fade_ms=25.0),
|
| 1538 |
peak=0.98,
|
| 1539 |
)
|
| 1540 |
write_wav(rough_stitched_path, rough_stitched_audio, rough["target_sr"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1541 |
|
| 1542 |
transition_audio = rough["rough_seam"]
|
| 1543 |
repaint_context_audio = rough["rough_stitched"]
|
|
|
|
| 1560 |
sr=int(rough["target_sr"]),
|
| 1561 |
)
|
| 1562 |
|
| 1563 |
+
stitched_audio = np.concatenate(
|
| 1564 |
+
[comparison_a_context, transition_audio.astype(np.float32), comparison_b_context]
|
| 1565 |
+
).astype(np.float32)
|
| 1566 |
+
boundary_mix_debug = {
|
| 1567 |
+
"method": "shared-anchor-comparison-window",
|
| 1568 |
+
"side_sec": float(HARD_SPLICE_PREVIEW_SIDE_SEC),
|
| 1569 |
+
"song_a_context_sec": round(float(comparison_a_context.size / max(1, rough["target_sr"])), 3),
|
| 1570 |
+
"transition_sec": round(float(np.asarray(transition_audio).size / max(1, rough["target_sr"])), 3),
|
| 1571 |
+
"song_b_context_sec": round(float(comparison_b_context.size / max(1, rough["target_sr"])), 3),
|
| 1572 |
+
"hard_splice_sec": round(float(hard_splice_audio.size / max(1, rough["target_sr"])), 3),
|
| 1573 |
+
"rough_stitched_sec": round(float(rough_stitched_audio.size / max(1, rough["target_sr"])), 3),
|
| 1574 |
+
"stitched_sec": round(float(stitched_audio.size / max(1, rough["target_sr"])), 3),
|
| 1575 |
+
}
|
| 1576 |
|
| 1577 |
transition_audio = normalize_peak(apply_edge_fades(transition_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
|
| 1578 |
stitched_audio = normalize_peak(apply_edge_fades(stitched_audio, rough["target_sr"], fade_ms=25.0), peak=0.98)
|