| | """Lyrics-to-beat mapping: group beats into segments and assign lyrics.""" |
| |
|
| | import json |
| | from pathlib import Path |
| | from typing import Optional |
| |
|
| |
|
| | def segment_lyrics( |
| | beats: list[dict], |
| | lyrics: list[dict], |
| | beats_per_segment: int = 4, |
| | ) -> list[dict]: |
| | """Map timestamped lyrics onto beat-grouped segments. |
| | |
| | Groups consecutive beats into segments (e.g. 4 beats = 1 bar in 4/4 time) |
| | and assigns words to the segment where they start. |
| | |
| | Args: |
| | beats: List of beat dicts with "beat" and "time" keys. |
| | lyrics: List of word dicts with "word", "start", "end" keys. |
| | beats_per_segment: Number of beats per segment. 4 = one bar in 4/4 time. |
| | |
| | Returns: |
| | List of segment dicts with keys: |
| | - segment: 1-indexed segment number |
| | - start: start time in seconds |
| | - end: end time in seconds |
| | - duration: segment duration in seconds |
| | - lyrics: raw lyrics text for this segment (may be empty) |
| | - words: list of word dicts that fall in this segment |
| | """ |
| | beat_times = [b["time"] for b in beats] |
| |
|
| | |
| | segments = [] |
| | seg_num = 1 |
| | for i in range(0, len(beat_times) - 1, beats_per_segment): |
| | start = beat_times[i] |
| | |
| | end_idx = min(i + beats_per_segment, len(beat_times) - 1) |
| | end = beat_times[end_idx] |
| |
|
| | |
| | seg_beat_times = [ |
| | round(beat_times[j], 3) |
| | for j in range(i, min(i + beats_per_segment + 1, len(beat_times))) |
| | ] |
| |
|
| | segments.append({ |
| | "segment": seg_num, |
| | "start": round(start, 3), |
| | "end": round(end, 3), |
| | "duration": round(end - start, 3), |
| | "beats": seg_beat_times, |
| | "lyrics": "", |
| | "words": [], |
| | }) |
| | seg_num += 1 |
| |
|
| | |
| | for word in lyrics: |
| | word_start = word["start"] |
| | for seg in segments: |
| | if seg["start"] <= word_start < seg["end"]: |
| | seg["words"].append(word) |
| | break |
| | else: |
| | |
| | if segments and word_start >= segments[-1]["start"]: |
| | segments[-1]["words"].append(word) |
| |
|
| | |
| | for seg in segments: |
| | seg["lyrics"] = " ".join(w["word"] for w in seg["words"]) |
| |
|
| | return segments |
| |
|
| |
|
| | def save_segments( |
| | segments: list[dict], |
| | output_path: str | Path, |
| | ) -> Path: |
| | """Save segments to a JSON file. |
| | |
| | Args: |
| | segments: List of segment dicts. |
| | output_path: Path to save the JSON file. |
| | |
| | Returns: |
| | Path to the saved JSON file. |
| | """ |
| | output_path = Path(output_path) |
| | output_path.parent.mkdir(parents=True, exist_ok=True) |
| |
|
| | with open(output_path, "w") as f: |
| | json.dump(segments, f, indent=2) |
| |
|
| | return output_path |
| |
|
| |
|
| | def run( |
| | data_dir: str | Path, |
| | beats_per_segment: int = 4, |
| | ) -> list[dict]: |
| | """Full segmentation pipeline: load beats + lyrics, segment, and save. |
| | |
| | Args: |
| | data_dir: Song data directory containing beats.json and lyrics.json |
| | (e.g. data/Gone/). |
| | beats_per_segment: Number of beats per segment (4 = one bar). |
| | |
| | Returns: |
| | List of segment dicts. |
| | """ |
| | data_dir = Path(data_dir) |
| |
|
| | with open(data_dir / "beats.json") as f: |
| | beats = json.load(f) |
| |
|
| | with open(data_dir / "lyrics.json") as f: |
| | lyrics = json.load(f) |
| |
|
| | segments = segment_lyrics(beats, lyrics, beats_per_segment=beats_per_segment) |
| | save_segments(segments, data_dir / "segments.json") |
| |
|
| | return segments |
| |
|
| |
|
| | if __name__ == "__main__": |
| | import sys |
| |
|
| | if len(sys.argv) < 2: |
| | print("Usage: python -m src.segmenter <data_dir>") |
| | print(" e.g. python -m src.segmenter data/Gone") |
| | sys.exit(1) |
| |
|
| | segments = run(sys.argv[1]) |
| | print(f"Created {len(segments)} segments:\n") |
| | for seg in segments: |
| | lyrics_display = f'"{seg["lyrics"]}"' if seg["lyrics"] else "(instrumental)" |
| | print(f" Seg {seg['segment']}: {seg['start']:.3f}s - {seg['end']:.3f}s " |
| | f"({seg['duration']:.3f}s) {lyrics_display}") |
| |
|