| """ |
| Output formatters for synchronized lyrics. |
| |
| Supports multiple standard formats: |
| - LRC (Enhanced): Word-level timestamps in LRC format |
| - JSON: Structured word-level data |
| - SRT: Subtitle format (line-level) |
| - ASS: Advanced SubStation Alpha (word-level karaoke) |
| - Plain text with inline timestamps |
| """ |
|
|
| import json |
| from typing import Optional |
|
|
| from lyric_sync.transcribe import TimedWord |
|
|
|
|
| def to_enhanced_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str: |
| """ |
| Format as Enhanced LRC with word-level timestamps. |
| |
| Enhanced LRC format: |
| [MM:SS.cc] <MM:SS.cc> word1 <MM:SS.cc> word2 <MM:SS.cc> word3 |
| |
| Args: |
| words: Timed words |
| line_break_gap: Seconds of gap to trigger a new line (default 1.0s) |
| """ |
| if not words: |
| return "" |
|
|
| lines = [] |
| current_line_words = [] |
| current_line_start = words[0].start |
|
|
| for i, word in enumerate(words): |
| |
| if current_line_words: |
| prev_end = current_line_words[-1].end |
| if word.start - prev_end > line_break_gap: |
| |
| lines.append(_format_lrc_line(current_line_words, current_line_start)) |
| current_line_words = [] |
| current_line_start = word.start |
|
|
| current_line_words.append(word) |
|
|
| |
| if current_line_words: |
| lines.append(_format_lrc_line(current_line_words, current_line_start)) |
|
|
| return "\n".join(lines) |
|
|
|
|
| def _format_lrc_line(words: list[TimedWord], line_start: float) -> str: |
| """Format a single Enhanced LRC line.""" |
| line_ts = _format_lrc_timestamp(line_start) |
| word_parts = [] |
| for word in words: |
| word_ts = _format_lrc_timestamp(word.start) |
| word_parts.append(f"<{word_ts}> {word.word}") |
| |
| end_ts = _format_lrc_timestamp(words[-1].end) |
| return f"[{line_ts}] {' '.join(f'<{_format_lrc_timestamp(w.start)}> {w.word}' for w in words)} <{end_ts}>" |
|
|
|
|
| def _format_lrc_timestamp(seconds: float) -> str: |
| """Format seconds as MM:SS.cc (LRC standard).""" |
| minutes = int(seconds // 60) |
| secs = seconds % 60 |
| return f"{minutes:02d}:{secs:05.2f}" |
|
|
|
|
| def to_standard_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str: |
| """ |
| Format as standard LRC (line-level timestamps only). |
| |
| [MM:SS.cc] Line of lyrics text |
| """ |
| if not words: |
| return "" |
|
|
| lines = [] |
| current_line_words = [] |
| current_line_start = words[0].start |
|
|
| for word in words: |
| if current_line_words: |
| prev_end = current_line_words[-1].end |
| if word.start - prev_end > line_break_gap: |
| ts = _format_lrc_timestamp(current_line_start) |
| text = " ".join(w.word for w in current_line_words) |
| lines.append(f"[{ts}] {text}") |
| current_line_words = [] |
| current_line_start = word.start |
|
|
| current_line_words.append(word) |
|
|
| if current_line_words: |
| ts = _format_lrc_timestamp(current_line_start) |
| text = " ".join(w.word for w in current_line_words) |
| lines.append(f"[{ts}] {text}") |
|
|
| return "\n".join(lines) |
|
|
|
|
| def to_json(words: list[TimedWord], indent: int = 2) -> str: |
| """ |
| Format as JSON array of word objects. |
| |
| [{"word": "hello", "start": 0.123, "end": 0.456, "confidence": 0.95}, ...] |
| """ |
| data = [ |
| { |
| "word": w.word, |
| "start": round(w.start, 3), |
| "end": round(w.end, 3), |
| "confidence": round(w.confidence, 3), |
| } |
| for w in words |
| ] |
| return json.dumps(data, indent=indent, ensure_ascii=False) |
|
|
|
|
| def to_srt(words: list[TimedWord], line_break_gap: float = 1.0, max_words_per_line: int = 10) -> str: |
| """ |
| Format as SRT subtitles (line-level). |
| |
| 1 |
| 00:00:01,230 --> 00:00:03,456 |
| Line of lyrics text |
| """ |
| if not words: |
| return "" |
|
|
| entries = [] |
| current_words = [] |
| current_start = words[0].start |
|
|
| for word in words: |
| if current_words: |
| prev_end = current_words[-1].end |
| if word.start - prev_end > line_break_gap or len(current_words) >= max_words_per_line: |
| entries.append((current_start, current_words[-1].end, current_words)) |
| current_words = [] |
| current_start = word.start |
| current_words.append(word) |
|
|
| if current_words: |
| entries.append((current_start, current_words[-1].end, current_words)) |
|
|
| srt_lines = [] |
| for idx, (start, end, line_words) in enumerate(entries, 1): |
| start_ts = _format_srt_timestamp(start) |
| end_ts = _format_srt_timestamp(end) |
| text = " ".join(w.word for w in line_words) |
| srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{text}\n") |
|
|
| return "\n".join(srt_lines) |
|
|
|
|
| def _format_srt_timestamp(seconds: float) -> str: |
| """Format seconds as HH:MM:SS,mmm (SRT standard).""" |
| hours = int(seconds // 3600) |
| minutes = int((seconds % 3600) // 60) |
| secs = seconds % 60 |
| millis = int((secs % 1) * 1000) |
| return f"{hours:02d}:{minutes:02d}:{int(secs):02d},{millis:03d}" |
|
|
|
|
| def to_ass_karaoke( |
| words: list[TimedWord], |
| line_break_gap: float = 1.0, |
| style_name: str = "Default", |
| ) -> str: |
| """ |
| Format as ASS (Advanced SubStation Alpha) with karaoke timing. |
| |
| Uses \\k tags for word-level karaoke highlighting. |
| Each \\kN tag specifies duration in centiseconds until next word highlights. |
| """ |
| if not words: |
| return "" |
|
|
| header = f"""[Script Info] |
| Title: Synced Lyrics |
| ScriptType: v4.00+ |
| PlayResX: 1920 |
| PlayResY: 1080 |
| |
| [V4+ Styles] |
| Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding |
| Style: {style_name},Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2,1,2,10,10,40,1 |
| |
| [Events] |
| Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text |
| """ |
|
|
| |
| line_groups = [] |
| current_line = [] |
| for word in words: |
| if current_line: |
| prev_end = current_line[-1].end |
| if word.start - prev_end > line_break_gap: |
| line_groups.append(current_line) |
| current_line = [] |
| current_line.append(word) |
| if current_line: |
| line_groups.append(current_line) |
|
|
| events = [] |
| for line_words in line_groups: |
| start = _format_ass_timestamp(line_words[0].start) |
| end = _format_ass_timestamp(line_words[-1].end) |
|
|
| |
| karaoke_parts = [] |
| for w in line_words: |
| duration_cs = int(w.duration * 100) |
| karaoke_parts.append(f"{{\\kf{duration_cs}}}{w.word}") |
|
|
| text = " ".join(karaoke_parts) |
| events.append(f"Dialogue: 0,{start},{end},{style_name},,0,0,0,,{text}") |
|
|
| return header + "\n".join(events) |
|
|
|
|
| def _format_ass_timestamp(seconds: float) -> str: |
| """Format seconds as H:MM:SS.cc (ASS standard).""" |
| hours = int(seconds // 3600) |
| minutes = int((seconds % 3600) // 60) |
| secs = seconds % 60 |
| centis = int((secs % 1) * 100) |
| return f"{hours}:{minutes:02d}:{int(secs):02d}.{centis:02d}" |
|
|
|
|
| def to_plain_inline(words: list[TimedWord], line_break_gap: float = 1.0) -> str: |
| """ |
| Plain text with inline timestamps for readability. |
| |
| [0:01.23] Hello world [0:02.45] this is a song |
| """ |
| if not words: |
| return "" |
|
|
| parts = [] |
| prev_end = 0.0 |
|
|
| for word in words: |
| if word.start - prev_end > line_break_gap: |
| parts.append("\n") |
| ts = _format_lrc_timestamp(word.start) |
| parts.append(f"[{ts}] {word.word}") |
| prev_end = word.end |
|
|
| return " ".join(parts).replace(" \n ", "\n") |
|
|