rikhoffbauer2 commited on
Commit
4073070
·
verified ·
1 Parent(s): 1be2d0f

Upload lyric_sync/output.py

Browse files
Files changed (1) hide show
  1. lyric_sync/output.py +257 -0
lyric_sync/output.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Output formatters for synchronized lyrics.
3
+
4
+ Supports multiple standard formats:
5
+ - LRC (Enhanced): Word-level timestamps in LRC format
6
+ - JSON: Structured word-level data
7
+ - SRT: Subtitle format (line-level)
8
+ - ASS: Advanced SubStation Alpha (word-level karaoke)
9
+ - Plain text with inline timestamps
10
+ """
11
+
12
+ import json
13
+ from typing import Optional
14
+
15
+ from lyric_sync.transcribe import TimedWord
16
+
17
+
18
+ def to_enhanced_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str:
19
+ """
20
+ Format as Enhanced LRC with word-level timestamps.
21
+
22
+ Enhanced LRC format:
23
+ [MM:SS.cc] <MM:SS.cc> word1 <MM:SS.cc> word2 <MM:SS.cc> word3
24
+
25
+ Args:
26
+ words: Timed words
27
+ line_break_gap: Seconds of gap to trigger a new line (default 1.0s)
28
+ """
29
+ if not words:
30
+ return ""
31
+
32
+ lines = []
33
+ current_line_words = []
34
+ current_line_start = words[0].start
35
+
36
+ for i, word in enumerate(words):
37
+ # Detect line breaks based on gaps between words
38
+ if current_line_words:
39
+ prev_end = current_line_words[-1].end
40
+ if word.start - prev_end > line_break_gap:
41
+ # Emit current line
42
+ lines.append(_format_lrc_line(current_line_words, current_line_start))
43
+ current_line_words = []
44
+ current_line_start = word.start
45
+
46
+ current_line_words.append(word)
47
+
48
+ # Emit final line
49
+ if current_line_words:
50
+ lines.append(_format_lrc_line(current_line_words, current_line_start))
51
+
52
+ return "\n".join(lines)
53
+
54
+
55
+ def _format_lrc_line(words: list[TimedWord], line_start: float) -> str:
56
+ """Format a single Enhanced LRC line."""
57
+ line_ts = _format_lrc_timestamp(line_start)
58
+ word_parts = []
59
+ for word in words:
60
+ word_ts = _format_lrc_timestamp(word.start)
61
+ word_parts.append(f"<{word_ts}> {word.word}")
62
+ # Add end timestamp
63
+ end_ts = _format_lrc_timestamp(words[-1].end)
64
+ return f"[{line_ts}] {' '.join(f'<{_format_lrc_timestamp(w.start)}> {w.word}' for w in words)} <{end_ts}>"
65
+
66
+
67
+ def _format_lrc_timestamp(seconds: float) -> str:
68
+ """Format seconds as MM:SS.cc (LRC standard)."""
69
+ minutes = int(seconds // 60)
70
+ secs = seconds % 60
71
+ return f"{minutes:02d}:{secs:05.2f}"
72
+
73
+
74
+ def to_standard_lrc(words: list[TimedWord], line_break_gap: float = 1.0) -> str:
75
+ """
76
+ Format as standard LRC (line-level timestamps only).
77
+
78
+ [MM:SS.cc] Line of lyrics text
79
+ """
80
+ if not words:
81
+ return ""
82
+
83
+ lines = []
84
+ current_line_words = []
85
+ current_line_start = words[0].start
86
+
87
+ for word in words:
88
+ if current_line_words:
89
+ prev_end = current_line_words[-1].end
90
+ if word.start - prev_end > line_break_gap:
91
+ ts = _format_lrc_timestamp(current_line_start)
92
+ text = " ".join(w.word for w in current_line_words)
93
+ lines.append(f"[{ts}] {text}")
94
+ current_line_words = []
95
+ current_line_start = word.start
96
+
97
+ current_line_words.append(word)
98
+
99
+ if current_line_words:
100
+ ts = _format_lrc_timestamp(current_line_start)
101
+ text = " ".join(w.word for w in current_line_words)
102
+ lines.append(f"[{ts}] {text}")
103
+
104
+ return "\n".join(lines)
105
+
106
+
107
+ def to_json(words: list[TimedWord], indent: int = 2) -> str:
108
+ """
109
+ Format as JSON array of word objects.
110
+
111
+ [{"word": "hello", "start": 0.123, "end": 0.456, "confidence": 0.95}, ...]
112
+ """
113
+ data = [
114
+ {
115
+ "word": w.word,
116
+ "start": round(w.start, 3),
117
+ "end": round(w.end, 3),
118
+ "confidence": round(w.confidence, 3),
119
+ }
120
+ for w in words
121
+ ]
122
+ return json.dumps(data, indent=indent, ensure_ascii=False)
123
+
124
+
125
+ def to_srt(words: list[TimedWord], line_break_gap: float = 1.0, max_words_per_line: int = 10) -> str:
126
+ """
127
+ Format as SRT subtitles (line-level).
128
+
129
+ 1
130
+ 00:00:01,230 --> 00:00:03,456
131
+ Line of lyrics text
132
+ """
133
+ if not words:
134
+ return ""
135
+
136
+ entries = []
137
+ current_words = []
138
+ current_start = words[0].start
139
+
140
+ for word in words:
141
+ if current_words:
142
+ prev_end = current_words[-1].end
143
+ if word.start - prev_end > line_break_gap or len(current_words) >= max_words_per_line:
144
+ entries.append((current_start, current_words[-1].end, current_words))
145
+ current_words = []
146
+ current_start = word.start
147
+ current_words.append(word)
148
+
149
+ if current_words:
150
+ entries.append((current_start, current_words[-1].end, current_words))
151
+
152
+ srt_lines = []
153
+ for idx, (start, end, line_words) in enumerate(entries, 1):
154
+ start_ts = _format_srt_timestamp(start)
155
+ end_ts = _format_srt_timestamp(end)
156
+ text = " ".join(w.word for w in line_words)
157
+ srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{text}\n")
158
+
159
+ return "\n".join(srt_lines)
160
+
161
+
162
+ def _format_srt_timestamp(seconds: float) -> str:
163
+ """Format seconds as HH:MM:SS,mmm (SRT standard)."""
164
+ hours = int(seconds // 3600)
165
+ minutes = int((seconds % 3600) // 60)
166
+ secs = seconds % 60
167
+ millis = int((secs % 1) * 1000)
168
+ return f"{hours:02d}:{minutes:02d}:{int(secs):02d},{millis:03d}"
169
+
170
+
171
+ def to_ass_karaoke(
172
+ words: list[TimedWord],
173
+ line_break_gap: float = 1.0,
174
+ style_name: str = "Default",
175
+ ) -> str:
176
+ """
177
+ Format as ASS (Advanced SubStation Alpha) with karaoke timing.
178
+
179
+ Uses \\k tags for word-level karaoke highlighting.
180
+ Each \\kN tag specifies duration in centiseconds until next word highlights.
181
+ """
182
+ if not words:
183
+ return ""
184
+
185
+ header = f"""[Script Info]
186
+ Title: Synced Lyrics
187
+ ScriptType: v4.00+
188
+ PlayResX: 1920
189
+ PlayResY: 1080
190
+
191
+ [V4+ Styles]
192
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
193
+ Style: {style_name},Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H64000000,-1,0,0,0,100,100,0,0,1,2,1,2,10,10,40,1
194
+
195
+ [Events]
196
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
197
+ """
198
+
199
+ # Group words into lines
200
+ line_groups = []
201
+ current_line = []
202
+ for word in words:
203
+ if current_line:
204
+ prev_end = current_line[-1].end
205
+ if word.start - prev_end > line_break_gap:
206
+ line_groups.append(current_line)
207
+ current_line = []
208
+ current_line.append(word)
209
+ if current_line:
210
+ line_groups.append(current_line)
211
+
212
+ events = []
213
+ for line_words in line_groups:
214
+ start = _format_ass_timestamp(line_words[0].start)
215
+ end = _format_ass_timestamp(line_words[-1].end)
216
+
217
+ # Build karaoke text with \k tags
218
+ karaoke_parts = []
219
+ for w in line_words:
220
+ duration_cs = int(w.duration * 100) # centiseconds
221
+ karaoke_parts.append(f"{{\\kf{duration_cs}}}{w.word}")
222
+
223
+ text = " ".join(karaoke_parts)
224
+ events.append(f"Dialogue: 0,{start},{end},{style_name},,0,0,0,,{text}")
225
+
226
+ return header + "\n".join(events)
227
+
228
+
229
+ def _format_ass_timestamp(seconds: float) -> str:
230
+ """Format seconds as H:MM:SS.cc (ASS standard)."""
231
+ hours = int(seconds // 3600)
232
+ minutes = int((seconds % 3600) // 60)
233
+ secs = seconds % 60
234
+ centis = int((secs % 1) * 100)
235
+ return f"{hours}:{minutes:02d}:{int(secs):02d}.{centis:02d}"
236
+
237
+
238
+ def to_plain_inline(words: list[TimedWord], line_break_gap: float = 1.0) -> str:
239
+ """
240
+ Plain text with inline timestamps for readability.
241
+
242
+ [0:01.23] Hello world [0:02.45] this is a song
243
+ """
244
+ if not words:
245
+ return ""
246
+
247
+ parts = []
248
+ prev_end = 0.0
249
+
250
+ for word in words:
251
+ if word.start - prev_end > line_break_gap:
252
+ parts.append("\n")
253
+ ts = _format_lrc_timestamp(word.start)
254
+ parts.append(f"[{ts}] {word.word}")
255
+ prev_end = word.end
256
+
257
+ return " ".join(parts).replace(" \n ", "\n")