Spaces:
Running
Running
jhj0517
commited on
Commit
·
79d567a
1
Parent(s):
3b27598
Add lrc word aligning feature
Browse files
modules/utils/subtitle_manager.py
CHANGED
|
@@ -107,12 +107,14 @@ class SubtitlesWriter(ResultWriter):
|
|
| 107 |
max_line_width: Optional[int] = None,
|
| 108 |
max_line_count: Optional[int] = None,
|
| 109 |
highlight_words: bool = False,
|
|
|
|
| 110 |
max_words_per_line: Optional[int] = None,
|
| 111 |
):
|
| 112 |
options = options or {}
|
| 113 |
max_line_width = max_line_width or options.get("max_line_width")
|
| 114 |
max_line_count = max_line_count or options.get("max_line_count")
|
| 115 |
highlight_words = highlight_words or options.get("highlight_words", False)
|
|
|
|
| 116 |
max_words_per_line = max_words_per_line or options.get("max_words_per_line")
|
| 117 |
preserve_segments = max_line_count is None or max_line_width is None
|
| 118 |
max_line_width = max_line_width or 1000
|
|
@@ -195,6 +197,14 @@ class SubtitlesWriter(ResultWriter):
|
|
| 195 |
]
|
| 196 |
)
|
| 197 |
last = end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
else:
|
| 199 |
yield subtitle_start, subtitle_end, subtitle_text
|
| 200 |
else:
|
|
@@ -291,7 +301,10 @@ class WriteLRC(SubtitlesWriter):
|
|
| 291 |
for i, (start, end, text) in enumerate(
|
| 292 |
self.iterate_result(result, options, **kwargs), start=1
|
| 293 |
):
|
| 294 |
-
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
def to_segments(self, file_path: str) -> List[Segment]:
|
| 297 |
segments = []
|
|
@@ -387,6 +400,10 @@ def generate_file(
|
|
| 387 |
|
| 388 |
file_path = os.path.join(output_dir, f"{output_file_name}.{output_format}")
|
| 389 |
file_writer = get_writer(output_format=output_format, output_dir=output_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
file_writer(result=result, output_file_name=output_file_name, **kwargs)
|
| 391 |
content = read_file(file_path)
|
| 392 |
return content, file_path
|
|
|
|
| 107 |
max_line_width: Optional[int] = None,
|
| 108 |
max_line_count: Optional[int] = None,
|
| 109 |
highlight_words: bool = False,
|
| 110 |
+
align_lrc_words: bool = False,
|
| 111 |
max_words_per_line: Optional[int] = None,
|
| 112 |
):
|
| 113 |
options = options or {}
|
| 114 |
max_line_width = max_line_width or options.get("max_line_width")
|
| 115 |
max_line_count = max_line_count or options.get("max_line_count")
|
| 116 |
highlight_words = highlight_words or options.get("highlight_words", False)
|
| 117 |
+
align_lrc_words = align_lrc_words or options.get("align_lrc_words", False)
|
| 118 |
max_words_per_line = max_words_per_line or options.get("max_words_per_line")
|
| 119 |
preserve_segments = max_line_count is None or max_line_width is None
|
| 120 |
max_line_width = max_line_width or 1000
|
|
|
|
| 197 |
]
|
| 198 |
)
|
| 199 |
last = end
|
| 200 |
+
|
| 201 |
+
if align_lrc_words:
|
| 202 |
+
lrc_aligned_words = [f"[{self.format_timestamp(sub['start'])}]{sub['word']}" for sub in subtitle]
|
| 203 |
+
l_start, l_end = self.format_timestamp(subtitle[-1]['start']), self.format_timestamp(subtitle[-1]['end'])
|
| 204 |
+
lrc_aligned_words[-1] = f"[{l_start}]{subtitle[-1]['word']}[{l_end}]"
|
| 205 |
+
lrc_aligned_words = ' '.join(lrc_aligned_words)
|
| 206 |
+
yield None, None, lrc_aligned_words
|
| 207 |
+
|
| 208 |
else:
|
| 209 |
yield subtitle_start, subtitle_end, subtitle_text
|
| 210 |
else:
|
|
|
|
| 301 |
for i, (start, end, text) in enumerate(
|
| 302 |
self.iterate_result(result, options, **kwargs), start=1
|
| 303 |
):
|
| 304 |
+
if "align_lrc_words" in kwargs and kwargs["align_lrc_words"]:
|
| 305 |
+
print(f"{text}\n", file=file, flush=True)
|
| 306 |
+
else:
|
| 307 |
+
print(f"[{start}]{text}[{end}]\n", file=file, flush=True)
|
| 308 |
|
| 309 |
def to_segments(self, file_path: str) -> List[Segment]:
|
| 310 |
segments = []
|
|
|
|
| 400 |
|
| 401 |
file_path = os.path.join(output_dir, f"{output_file_name}.{output_format}")
|
| 402 |
file_writer = get_writer(output_format=output_format, output_dir=output_dir)
|
| 403 |
+
|
| 404 |
+
if isinstance(file_writer, WriteLRC) and kwargs["highlight_words"]:
|
| 405 |
+
kwargs["highlight_words"], kwargs["align_lrc_words"] = False, True
|
| 406 |
+
|
| 407 |
file_writer(result=result, output_file_name=output_file_name, **kwargs)
|
| 408 |
content = read_file(file_path)
|
| 409 |
return content, file_path
|