lyric-sync / lyric_sync /output.py
rikhoffbauer2's picture
Upload lyric_sync/output.py
4073070 verified
"""
Output formatters for synchronized lyrics.
Supports multiple standard formats:
- LRC (Enhanced): Word-level timestamps in LRC format
- JSON: Structured word-level data
- SRT: Subtitle format (line-level)
- ASS: Advanced SubStation Alpha (word-level karaoke)
- Plain text with inline timestamps
"""
import json
from typing import Optional
from lyric_sync.transcribe import TimedWord
def to_enhanced_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str:
"""
Format as Enhanced LRC with word-level timestamps.
Enhanced LRC format:
[MM:SS.cc] <MM:SS.cc> word1 <MM:SS.cc> word2 <MM:SS.cc> word3
Args:
words: Timed words
line_break_gap: Seconds of gap to trigger a new line (default 1.0s)
"""
if not words:
return ""
lines = []
current_line_words = []
current_line_start = words[0].start
for i, word in enumerate(words):
# Detect line breaks based on gaps between words
if current_line_words:
prev_end = current_line_words[-1].end
if word.start - prev_end > line_break_gap:
# Emit current line
lines.append(_format_lrc_line(current_line_words, current_line_start))
current_line_words = []
current_line_start = word.start
current_line_words.append(word)
# Emit final line
if current_line_words:
lines.append(_format_lrc_line(current_line_words, current_line_start))
return "\n".join(lines)
def _format_lrc_line(words: list[TimedWord], line_start: float) -> str:
"""Format a single Enhanced LRC line."""
line_ts = _format_lrc_timestamp(line_start)
word_parts = []
for word in words:
word_ts = _format_lrc_timestamp(word.start)
word_parts.append(f"<{word_ts}> {word.word}")
# Add end timestamp
end_ts = _format_lrc_timestamp(words[-1].end)
return f"[{line_ts}] {' '.join(f'<{_format_lrc_timestamp(w.start)}> {w.word}' for w in words)} <{end_ts}>"
def _format_lrc_timestamp(seconds: float) -> str:
"""Format seconds as MM:SS.cc (LRC standard)."""
minutes = int(seconds // 60)
secs = seconds % 60
return f"{minutes:02d}:{secs:05.2f}"
def to_standard_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str:
"""
Format as standard LRC (line-level timestamps only).
[MM:SS.cc] Line of lyrics text
"""
if not words:
return ""
lines = []
current_line_words = []
current_line_start = words[0].start
for word in words:
if current_line_words:
prev_end = current_line_words[-1].end
if word.start - prev_end > line_break_gap:
ts = _format_lrc_timestamp(current_line_start)
text = " ".join(w.word for w in current_line_words)
lines.append(f"[{ts}] {text}")
current_line_words = []
current_line_start = word.start
current_line_words.append(word)
if current_line_words:
ts = _format_lrc_timestamp(current_line_start)
text = " ".join(w.word for w in current_line_words)
lines.append(f"[{ts}] {text}")
return "\n".join(lines)
def to_json(words: list[TimedWord], indent: int = 2) -> str:
"""
Format as JSON array of word objects.
[{"word": "hello", "start": 0.123, "end": 0.456, "confidence": 0.95}, ...]
"""
data = [
{
"word": w.word,
"start": round(w.start, 3),
"end": round(w.end, 3),
"confidence": round(w.confidence, 3),
}
for w in words
]
return json.dumps(data, indent=indent, ensure_ascii=False)
def to_srt(words: list[TimedWord], line_break_gap: float = 1.0, max_words_per_line: int = 10) -> str:
"""
Format as SRT subtitles (line-level).
1
00:00:01,230 --> 00:00:03,456
Line of lyrics text
"""
if not words:
return ""
entries = []
current_words = []
current_start = words[0].start
for word in words:
if current_words:
prev_end = current_words[-1].end
if word.start - prev_end > line_break_gap or len(current_words) >= max_words_per_line:
entries.append((current_start, current_words[-1].end, current_words))
current_words = []
current_start = word.start
current_words.append(word)
if current_words:
entries.append((current_start, current_words[-1].end, current_words))
srt_lines = []
for idx, (start, end, line_words) in enumerate(entries, 1):
start_ts = _format_srt_timestamp(start)
end_ts = _format_srt_timestamp(end)
text = " ".join(w.word for w in line_words)
srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{text}\n")
return "\n".join(srt_lines)
def _format_srt_timestamp(seconds: float) -> str:
"""Format seconds as HH:MM:SS,mmm (SRT standard)."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = seconds % 60
millis = int((secs % 1) * 1000)
return f"{hours:02d}:{minutes:02d}:{int(secs):02d},{millis:03d}"
def to_ass_karaoke(
words: list[TimedWord],
line_break_gap: float = 1.0,
style_name: str = "Default",
) -> str:
"""
Format as ASS (Advanced SubStation Alpha) with karaoke timing.
Uses \\k tags for word-level karaoke highlighting.
Each \\kN tag specifies duration in centiseconds until next word highlights.
"""
if not words:
return ""
header = f"""[Script Info]
Title: Synced Lyrics
ScriptType: v4.00+
PlayResX: 1920
PlayResY: 1080
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: {style_name},Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2,1,2,10,10,40,1
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
# Group words into lines
line_groups = []
current_line = []
for word in words:
if current_line:
prev_end = current_line[-1].end
if word.start - prev_end > line_break_gap:
line_groups.append(current_line)
current_line = []
current_line.append(word)
if current_line:
line_groups.append(current_line)
events = []
for line_words in line_groups:
start = _format_ass_timestamp(line_words[0].start)
end = _format_ass_timestamp(line_words[-1].end)
# Build karaoke text with \k tags
karaoke_parts = []
for w in line_words:
duration_cs = int(w.duration * 100) # centiseconds
karaoke_parts.append(f"{{\\kf{duration_cs}}}{w.word}")
text = " ".join(karaoke_parts)
events.append(f"Dialogue: 0,{start},{end},{style_name},,0,0,0,,{text}")
return header + "\n".join(events)
def _format_ass_timestamp(seconds: float) -> str:
"""Format seconds as H:MM:SS.cc (ASS standard)."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = seconds % 60
centis = int((secs % 1) * 100)
return f"{hours}:{minutes:02d}:{int(secs):02d}.{centis:02d}"
def to_plain_inline(words: list[TimedWord], line_break_gap: float = 1.0) -> str:
"""
Plain text with inline timestamps for readability.
[0:01.23] Hello world [0:02.45] this is a song
"""
if not words:
return ""
parts = []
prev_end = 0.0
for word in words:
if word.start - prev_end > line_break_gap:
parts.append("\n")
ts = _format_lrc_timestamp(word.start)
parts.append(f"[{ts}] {word.word}")
prev_end = word.end
return " ".join(parts).replace(" \n ", "\n")