File size: 16,573 Bytes
068ed60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 |
"""
The SubtitleRefactor class in this module provides various utilities for working with subtitle files.
It can be used in different scenarios to perform tasks such as splitting ASS subtitle files, converting subtitles to different formats, and organizing files.
* Example usage for splitting an ASS subtitle file based on styles:
subtitle_tool = SubtitleRefactor("sample_subtitle.ass")
subtitle_tool.split_ass()
* Example usage for converting ASS subtitles to SRT format:
subtitle_tool = SubtitleRefactor("sample_subtitle.ass")
subtitle_tool.ass_to_srt()
* Example usage for moving an SRT subtitle file to a specified directory:
subtitle_tool = SubtitleRefactor("sample_subtitle.srt")
subtitle_tool.move_srt()
* Example usage for converting a text file to SRT format and performing other operations:
subtitle_tool = SubtitleRefactor("sample_text.txt")
subtitle_tool.txt_to_srt(1)
* Example usage for converting numbers in an SRT subtitle file to their word equivalents in Polish:
subtitle_tool = SubtitleRefactor("sample_subtitle.srt")
subtitle_tool.convert_numbers_in_srt()
* Example usage for updating subtitles in an existing ASS file using translated subtitles from an SRT file:
subtitle_tool = SubtitleRefactor("sample_subtitle.srt")
subtitle_tool.srt_to_ass()
"""
import re
from contextlib import suppress
from dataclasses import dataclass
from os import makedirs, path, remove, stat
from shutil import move
from typing import List, Tuple
from nltk.tokenize import sent_tokenize
from pyasstosrt import Subtitle
from pysubs2 import load, SSAEvent, SSAFile
from constants import (WORKING_SPACE,
WORKING_SPACE_OUTPUT,
WORKING_SPACE_TEMP,
WORKING_SPACE_TEMP_MAIN_SUBS,
WORKING_SPACE_TEMP_ALT_SUBS,
console)
from utils.number_in_words import NumberInWords
@dataclass(slots=True)
class SubtitleRefactor:
"""
The SubtitleRefactor class is used for manipulating subtitle files. It provides functionalities such as splitting ASS subtitle files based on styles, converting ASS subtitles to SRT format, and moving SRT files to a specified directory.
Attributes:
- filename (str): The name of the subtitle file to be processed.
- working_space (str, optional): The directory where the subtitle file is located.
- working_space_output (str, optional): The directory where the output files will be saved.
- working_space_temp (str, optional): The directory where temporary files will be saved during processing.
- working_space_temp_main_subs (str, optional): The directory for main subtitles during processing.
- working_space_temp_alt_subs (str, optional): The directory for alternate subtitles during processing.
Methods:
- split_ass(self) -> None: Splits an ASS subtitle file into two files based on selected styles.
- ass_to_srt(self) -> None: Converts ASS subtitle files to SRT format.
- move_srt(self) -> None: Moves an SRT subtitle file to a specified directory.
- txt_to_srt(self, lines_per_caption: int) -> None: Converts a text file to SRT (SubRip Text) format.
- convert_numbers_in_srt(self) -> None: Converts numbers in an SRT subtitle file to their word equivalents in Polish.
- srt_to_ass(self) -> None: Updates subtitles in an existing ASS file using translated subtitles from an SRT file. If the ASS file does not exist, the SRT file is moved to the output directory, and its extension is changed to .ass. After these operations, the original ASS and SRT files are deleted.
"""
filename: str
working_space: str = WORKING_SPACE
working_space_output: str = WORKING_SPACE_OUTPUT
working_space_temp: str = WORKING_SPACE_TEMP
working_space_temp_main_subs = WORKING_SPACE_TEMP_MAIN_SUBS
working_space_temp_alt_subs = WORKING_SPACE_TEMP_ALT_SUBS
def split_ass(self) -> None:
"""
Splits an ASS subtitle file into two files based on selected styles.
"""
self._create_directories()
subs: SSAFile = self._load_subs()
styles: List[str] = self._get_styles(subs)
self._display_styles(styles)
selected_styles: List[str] = self._select_styles(styles)
if not selected_styles:
self._move_subs_to_main()
return
main_subs, alt_subs = self._split_subs(subs, selected_styles)
self._copy_metadata_and_styles(
subs, main_subs, alt_subs, selected_styles)
self._save_subs(main_subs, alt_subs, subs, selected_styles)
self._remove_source_file()
console.print("Podział napisów zakończony pomyślnie.",
style='green_bold')
def _create_directories(self) -> None:
"""
Creates directories if they do not exist.
"""
if not path.exists(self.working_space_temp_main_subs):
makedirs(self.working_space_temp_main_subs, exist_ok=True)
if not path.exists(self.working_space_temp_alt_subs):
makedirs(self.working_space_temp_alt_subs, exist_ok=True)
def _load_subs(self) -> SSAFile:
"""
Loads the subtitle file.
"""
with open(path.join(self.working_space_temp, self.filename), 'r', encoding='utf-8') as file:
return SSAFile.from_file(file)
def _get_styles(self, subs: SSAFile) -> List[str]:
"""
Returns a list of styles in the subtitle file.
"""
styles: List[str] = []
for event in subs:
if event.style not in styles:
styles.append(event.style)
return styles
def _display_styles(self, styles: List[str]) -> None:
"""
Displays the styles to the user.
"""
console.print("PODZIAŁ PLIKU:", style='yellow_bold')
console.print(self.filename, style='white_bold')
console.print("Dostępne style do TTS:", style='yellow_bold')
for i, style in enumerate(styles, start=1):
console.print(
f"[yellow_bold]{i}.[/yellow_bold] {style}", style='white_bold')
def _select_styles(self, styles: List[str]) -> List[str]:
"""
Prompts the user to select styles and returns a list of selected styles.
"""
selected_styles: List[str] = []
while True:
console.print("Wybierz style do zapisu (naciśnij ENTER, aby zakończyć):",
style='green_bold', end=" ")
selection: str = input('')
if not selection:
break
with suppress(ValueError):
selected_index: int = int(selection) - 1
if 0 <= selected_index < len(styles):
selected_styles.append(styles[selected_index])
return selected_styles
def _move_subs_to_main(self) -> None:
"""
Moves the subtitle file to the main_subs directory.
"""
console.print("Nie wybrano żadnych stylów. Przeniesiono napisów do main_subs.",
style='red_bold')
makedirs(self.working_space_temp_main_subs, exist_ok=True)
alt_file_path: str = path.join(
self.working_space_temp, self.filename)
main_file_path: str = path.join(
self.working_space_temp_main_subs, self.filename)
move(alt_file_path, main_file_path)
def _split_subs(self, subs: SSAFile, selected_styles: List[str]) -> Tuple[SSAFile, SSAFile]:
"""
Splits the subtitle file into two files based on the selected styles.
"""
main_subs: SSAFile = SSAFile()
alt_subs: SSAFile = SSAFile()
for event in subs:
if event.style in selected_styles:
main_subs.append(event)
else:
alt_subs.append(event)
return main_subs, alt_subs
def _copy_metadata_and_styles(self, subs: SSAFile, main_subs: SSAFile, alt_subs: SSAFile, selected_styles: List[str]) -> None:
"""
Copies metadata and styles to the output files.
"""
main_subs.info = subs.info
alt_subs.info = subs.info
main_style_names: List[str] = [style_name for style_name in subs.styles.keys()
if style_name in selected_styles]
main_subs.styles.clear()
for style_name in main_style_names:
main_subs.styles[style_name] = subs.styles[style_name]
alt_style_names: List[str] = [style_name for style_name in subs.styles.keys()
if style_name not in selected_styles]
alt_subs.styles.clear()
for style_name in alt_style_names:
alt_subs.styles[style_name] = subs.styles[style_name]
def _save_subs(self, main_subs: SSAFile, alt_subs: SSAFile, subs: SSAFile, selected_styles: List[str]) -> None:
"""
Saves the split subtitle files.
"""
main_output_file: str = path.join(
self.working_space_temp_main_subs, self.filename)
alt_output_file: str = path.join(
self.working_space_temp_alt_subs, self.filename)
main_subs.info = subs.info
alt_subs.info = subs.info
main_style_names: List[str] = [style_name for style_name in subs.styles.keys()
if style_name in selected_styles]
main_subs.styles.clear()
for style_name in main_style_names:
main_subs.styles[style_name] = subs.styles[style_name]
alt_style_names: List[str] = [style_name for style_name in subs.styles.keys()
if style_name not in selected_styles]
alt_subs.styles.clear()
for style_name in alt_style_names:
alt_subs.styles[style_name] = subs.styles[style_name]
with open(main_output_file, 'w', encoding='utf-8') as main_file:
main_file.write(main_subs.to_string(format_='ass'))
with open(alt_output_file, 'w', encoding='utf-8') as alt_file:
alt_file.write(alt_subs.to_string(format_='ass'))
def _remove_source_file(self) -> None:
"""
Removes the source subtitle file.
"""
remove(path.join(self.working_space_temp, self.filename))
def ass_to_srt(self) -> None:
"""
Converts ASS subtitle files to SRT format.
"""
folders = [self.working_space_temp_main_subs,
self.working_space_temp_alt_subs]
for folder in folders:
file_path: str = path.join(folder, self.filename)
if not path.exists(file_path):
continue
sub: Subtitle = Subtitle(file_path)
sub.export()
console.print()
def move_srt(self) -> None:
"""
Moves an SRT subtitle file to a specified directory and removes HTML tags.
"""
target_file_path: str = path.join(
self.working_space_temp_main_subs, self.filename)
if not path.exists(self.working_space_temp_main_subs):
makedirs(self.working_space_temp_main_subs, exist_ok=True)
source_file_path: str = path.join(
self.working_space_temp, self.filename)
# Otwórz plik źródłowy i przeczytaj jego zawartość
with open(source_file_path, 'r', encoding='utf-8') as file:
file_content = file.read()
# Usuń znaczniki HTML
file_content = re.sub(r'<.*?>', '', file_content)
# Zapisz zmodyfikowaną zawartość do pliku docelowego
with open(target_file_path, 'w', encoding='utf-8') as file:
file.write(file_content)
# Usuń plik źródłowy
remove(source_file_path)
def txt_to_srt(self, lines_per_caption: int) -> None:
"""
Converts a text file to SRT (SubRip Text) format.
This method reads a text file, tokenizes the text into sentences using the NLTK library, and writes the sentences into a new SRT file. Groups of sentences are combined into a single caption based on the 'lines_per_caption' parameter. Each group becomes a separate subtitle in the SRT file. The original text file is then deleted, and the filename attribute of the class instance is updated to the new SRT file. Finally, the SRT file is moved to the 'main_subs' directory.
Args:
lines_per_caption (int): The number of sentences to include in each caption.
"""
txt_file_path: str = path.join(self.working_space_temp, self.filename)
srt_file_path: str = txt_file_path.replace('.txt', '.srt')
with open(txt_file_path, 'r', encoding='utf-8') as file:
text: str = file.read()
sentences: List[str] = sent_tokenize(text)
subs: SSAFile = SSAFile()
for i in range(0, len(sentences), lines_per_caption):
caption = ' '.join(sentences[i:i+lines_per_caption]).strip()
event: SSAEvent = SSAEvent(start=0, end=0, text=caption)
subs.append(event)
with open(srt_file_path, 'w', encoding='utf-8') as file:
file.write(subs.to_string(format_='srt'))
remove(txt_file_path)
self.filename = self.filename.replace('.txt', '.srt')
self.move_srt()
def convert_numbers_in_srt(self) -> None:
"""
Converts numbers in an SRT subtitle file to their word equivalents in Polish.
"""
srt_file_path: str = path.join(
self.working_space_temp_main_subs, self.filename)
subs = load(srt_file_path, encoding='utf-8')
number_in_words = NumberInWords()
for i, sub in enumerate(subs):
try:
sub.text = number_in_words.convert_numbers_in_text(sub.text)
except IndexError:
console.print(
f"[red_bold]Wystąpił błąd w napisie {i+1}:[/red_bold] {sub.text}.\n[red_bold]Pomijam ten napis.", style='white_bold')
subs.save(srt_file_path)
console.print(
"\nPrzekonwertowano liczby na słowa:", style='green_bold')
console.print(srt_file_path, '\n', style='white_bold')
def srt_to_ass(self) -> None:
"""
This method updates the subtitles in an existing ASS file using the translated subtitles from an SRT file.
If the ASS file does not exist, the SRT file is moved to the output directory.
After these operations, the original ASS and SRT files are deleted.
"""
srt_file_path: str = path.join(
self.working_space_temp_alt_subs, self.filename)
ass_file_path: str = path.join(
self.working_space_temp_alt_subs, self.filename.replace('.srt', '.ass'))
output_file_path: str = path.join(
self.working_space_output, self.filename.replace('.srt', '.ass'))
if stat(srt_file_path).st_size == 0:
return
srt_subs = load(srt_file_path, encoding='utf-8')
if path.exists(ass_file_path):
ass_subs = SSAFile.load(ass_file_path)
srt_index = 0
for event in ass_subs.events:
if event.type == "Dialogue" and srt_index < len(srt_subs):
srt_lines = srt_subs[srt_index].text.split('\n')
ass_lines = event.text.split('}')
if len(ass_lines) >= len(srt_lines):
for j in range(len(srt_lines)):
last_brace_position = ass_lines[j].rfind('}')
if last_brace_position != -1:
ass_lines[j] = ass_lines[j][:last_brace_position +
1] + srt_lines[j]
event.text = '}'.join(ass_lines)
srt_index += 1
ass_subs.save(output_file_path)
else:
output_file_path = output_file_path.replace('.ass', '.srt')
move(srt_file_path, output_file_path)
console.print("Przeniesiono alternatywne napisy:", style="green_bold")
console.print(output_file_path, style="white_bold")
if path.exists(srt_file_path):
remove(srt_file_path)
if path.exists(ass_file_path):
remove(ass_file_path)
|