""" Output formatters for synchronized lyrics. Supports multiple standard formats: - LRC (Enhanced): Word-level timestamps in LRC format - JSON: Structured word-level data - SRT: Subtitle format (line-level) - ASS: Advanced SubStation Alpha (word-level karaoke) - Plain text with inline timestamps """ import json from typing import Optional from lyric_sync.transcribe import TimedWord def to_enhanced_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str: """ Format as Enhanced LRC with word-level timestamps. Enhanced LRC format: [MM:SS.cc] word1 word2 word3 Args: words: Timed words line_break_gap: Seconds of gap to trigger a new line (default 1.0s) """ if not words: return "" lines = [] current_line_words = [] current_line_start = words[0].start for i, word in enumerate(words): # Detect line breaks based on gaps between words if current_line_words: prev_end = current_line_words[-1].end if word.start - prev_end > line_break_gap: # Emit current line lines.append(_format_lrc_line(current_line_words, current_line_start)) current_line_words = [] current_line_start = word.start current_line_words.append(word) # Emit final line if current_line_words: lines.append(_format_lrc_line(current_line_words, current_line_start)) return "\n".join(lines) def _format_lrc_line(words: list[TimedWord], line_start: float) -> str: """Format a single Enhanced LRC line.""" line_ts = _format_lrc_timestamp(line_start) word_parts = [] for word in words: word_ts = _format_lrc_timestamp(word.start) word_parts.append(f"<{word_ts}> {word.word}") # Add end timestamp end_ts = _format_lrc_timestamp(words[-1].end) return f"[{line_ts}] {' '.join(f'<{_format_lrc_timestamp(w.start)}> {w.word}' for w in words)} <{end_ts}>" def _format_lrc_timestamp(seconds: float) -> str: """Format seconds as MM:SS.cc (LRC standard).""" minutes = int(seconds // 60) secs = seconds % 60 return f"{minutes:02d}:{secs:05.2f}" def to_standard_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str: """ Format as standard LRC (line-level timestamps only). [MM:SS.cc] Line of lyrics text """ if not words: return "" lines = [] current_line_words = [] current_line_start = words[0].start for word in words: if current_line_words: prev_end = current_line_words[-1].end if word.start - prev_end > line_break_gap: ts = _format_lrc_timestamp(current_line_start) text = " ".join(w.word for w in current_line_words) lines.append(f"[{ts}] {text}") current_line_words = [] current_line_start = word.start current_line_words.append(word) if current_line_words: ts = _format_lrc_timestamp(current_line_start) text = " ".join(w.word for w in current_line_words) lines.append(f"[{ts}] {text}") return "\n".join(lines) def to_json(words: list[TimedWord], indent: int = 2) -> str: """ Format as JSON array of word objects. [{"word": "hello", "start": 0.123, "end": 0.456, "confidence": 0.95}, ...] """ data = [ { "word": w.word, "start": round(w.start, 3), "end": round(w.end, 3), "confidence": round(w.confidence, 3), } for w in words ] return json.dumps(data, indent=indent, ensure_ascii=False) def to_srt(words: list[TimedWord], line_break_gap: float = 1.0, max_words_per_line: int = 10) -> str: """ Format as SRT subtitles (line-level). 1 00:00:01,230 --> 00:00:03,456 Line of lyrics text """ if not words: return "" entries = [] current_words = [] current_start = words[0].start for word in words: if current_words: prev_end = current_words[-1].end if word.start - prev_end > line_break_gap or len(current_words) >= max_words_per_line: entries.append((current_start, current_words[-1].end, current_words)) current_words = [] current_start = word.start current_words.append(word) if current_words: entries.append((current_start, current_words[-1].end, current_words)) srt_lines = [] for idx, (start, end, line_words) in enumerate(entries, 1): start_ts = _format_srt_timestamp(start) end_ts = _format_srt_timestamp(end) text = " ".join(w.word for w in line_words) srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{text}\n") return "\n".join(srt_lines) def _format_srt_timestamp(seconds: float) -> str: """Format seconds as HH:MM:SS,mmm (SRT standard).""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = seconds % 60 millis = int((secs % 1) * 1000) return f"{hours:02d}:{minutes:02d}:{int(secs):02d},{millis:03d}" def to_ass_karaoke( words: list[TimedWord], line_break_gap: float = 1.0, style_name: str = "Default", ) -> str: """ Format as ASS (Advanced SubStation Alpha) with karaoke timing. Uses \\k tags for word-level karaoke highlighting. Each \\kN tag specifies duration in centiseconds until next word highlights. """ if not words: return "" header = f"""[Script Info] Title: Synced Lyrics ScriptType: v4.00+ PlayResX: 1920 PlayResY: 1080 [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: {style_name},Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2,1,2,10,10,40,1 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text """ # Group words into lines line_groups = [] current_line = [] for word in words: if current_line: prev_end = current_line[-1].end if word.start - prev_end > line_break_gap: line_groups.append(current_line) current_line = [] current_line.append(word) if current_line: line_groups.append(current_line) events = [] for line_words in line_groups: start = _format_ass_timestamp(line_words[0].start) end = _format_ass_timestamp(line_words[-1].end) # Build karaoke text with \k tags karaoke_parts = [] for w in line_words: duration_cs = int(w.duration * 100) # centiseconds karaoke_parts.append(f"{{\\kf{duration_cs}}}{w.word}") text = " ".join(karaoke_parts) events.append(f"Dialogue: 0,{start},{end},{style_name},,0,0,0,,{text}") return header + "\n".join(events) def _format_ass_timestamp(seconds: float) -> str: """Format seconds as H:MM:SS.cc (ASS standard).""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = seconds % 60 centis = int((secs % 1) * 100) return f"{hours}:{minutes:02d}:{int(secs):02d}.{centis:02d}" def to_plain_inline(words: list[TimedWord], line_break_gap: float = 1.0) -> str: """ Plain text with inline timestamps for readability. [0:01.23] Hello world [0:02.45] this is a song """ if not words: return "" parts = [] prev_end = 0.0 for word in words: if word.start - prev_end > line_break_gap: parts.append("\n") ts = _format_lrc_timestamp(word.start) parts.append(f"[{ts}] {word.word}") prev_end = word.end return " ".join(parts).replace(" \n ", "\n")