| | import requests |
| | import time |
| | import os |
| | from datetime import datetime |
| | import gradio as gr |
| |
|
| | from modules.utils.paths import TRANSLATION_OUTPUT_DIR, DEFAULT_PARAMETERS_CONFIG_PATH |
| | from modules.utils.subtitle_manager import * |
| | from modules.utils.files_manager import load_yaml, save_yaml |
| |
|
| | """ |
| | This is written with reference to the DeepL API documentation. |
| | If you want to know the information of the DeepL API, see here: https://www.deepl.com/docs-api/documents |
| | """ |
| |
|
| | DEEPL_AVAILABLE_TARGET_LANGS = { |
| | 'Bulgarian': 'BG', |
| | 'Czech': 'CS', |
| | 'Danish': 'DA', |
| | 'German': 'DE', |
| | 'Greek': 'EL', |
| | 'English': 'EN', |
| | 'English (British)': 'EN-GB', |
| | 'English (American)': 'EN-US', |
| | 'Spanish': 'ES', |
| | 'Estonian': 'ET', |
| | 'Finnish': 'FI', |
| | 'French': 'FR', |
| | 'Hungarian': 'HU', |
| | 'Indonesian': 'ID', |
| | 'Italian': 'IT', |
| | 'Japanese': 'JA', |
| | 'Korean': 'KO', |
| | 'Lithuanian': 'LT', |
| | 'Latvian': 'LV', |
| | 'Norwegian (Bokmål)': 'NB', |
| | 'Dutch': 'NL', |
| | 'Polish': 'PL', |
| | 'Portuguese': 'PT', |
| | 'Portuguese (Brazilian)': 'PT-BR', |
| | 'Portuguese (all Portuguese varieties excluding Brazilian Portuguese)': 'PT-PT', |
| | 'Romanian': 'RO', |
| | 'Russian': 'RU', |
| | 'Slovak': 'SK', |
| | 'Slovenian': 'SL', |
| | 'Swedish': 'SV', |
| | 'Turkish': 'TR', |
| | 'Ukrainian': 'UK', |
| | 'Chinese (simplified)': 'ZH' |
| | } |
| |
|
| | DEEPL_AVAILABLE_SOURCE_LANGS = { |
| | 'Automatic Detection': None, |
| | 'Bulgarian': 'BG', |
| | 'Czech': 'CS', |
| | 'Danish': 'DA', |
| | 'German': 'DE', |
| | 'Greek': 'EL', |
| | 'English': 'EN', |
| | 'Spanish': 'ES', |
| | 'Estonian': 'ET', |
| | 'Finnish': 'FI', |
| | 'French': 'FR', |
| | 'Hungarian': 'HU', |
| | 'Indonesian': 'ID', |
| | 'Italian': 'IT', |
| | 'Japanese': 'JA', |
| | 'Korean': 'KO', |
| | 'Lithuanian': 'LT', |
| | 'Latvian': 'LV', |
| | 'Norwegian (Bokmål)': 'NB', |
| | 'Dutch': 'NL', |
| | 'Polish': 'PL', |
| | 'Portuguese (all Portuguese varieties mixed)': 'PT', |
| | 'Romanian': 'RO', |
| | 'Russian': 'RU', |
| | 'Slovak': 'SK', |
| | 'Slovenian': 'SL', |
| | 'Swedish': 'SV', |
| | 'Turkish': 'TR', |
| | 'Ukrainian': 'UK', |
| | 'Chinese': 'ZH' |
| | } |
| |
|
| |
|
| | class DeepLAPI: |
| | def __init__(self, |
| | output_dir: str = TRANSLATION_OUTPUT_DIR |
| | ): |
| | self.api_interval = 1 |
| | self.max_text_batch_size = 50 |
| | self.available_target_langs = DEEPL_AVAILABLE_TARGET_LANGS |
| | self.available_source_langs = DEEPL_AVAILABLE_SOURCE_LANGS |
| | self.output_dir = output_dir |
| |
|
| | def translate_deepl(self, |
| | auth_key: str, |
| | fileobjs: list, |
| | source_lang: str, |
| | target_lang: str, |
| | is_pro: bool = False, |
| | add_timestamp: bool = True, |
| | progress=gr.Progress()) -> list: |
| | """ |
| | Translate subtitle files using DeepL API |
| | Parameters |
| | ---------- |
| | auth_key: str |
| | API Key for DeepL from gr.Textbox() |
| | fileobjs: list |
| | List of files to transcribe from gr.Files() |
| | source_lang: str |
| | Source language of the file to transcribe from gr.Dropdown() |
| | target_lang: str |
| | Target language of the file to transcribe from gr.Dropdown() |
| | is_pro: str |
| | Boolean value that is about pro user or not from gr.Checkbox(). |
| | add_timestamp: bool |
| | Boolean value from gr.Checkbox() that determines whether to add a timestamp at the end of the filename. |
| | progress: gr.Progress |
| | Indicator to show progress directly in gradio. |
| | |
| | Returns |
| | ---------- |
| | A List of |
| | String to return to gr.Textbox() |
| | Files to return to gr.Files() |
| | """ |
| | if fileobjs and isinstance(fileobjs[0], gr.utils.NamedString): |
| | fileobjs = [fileobj.name for fileobj in fileobjs] |
| |
|
| | self.cache_parameters( |
| | api_key=auth_key, |
| | is_pro=is_pro, |
| | source_lang=source_lang, |
| | target_lang=target_lang, |
| | add_timestamp=add_timestamp |
| | ) |
| |
|
| | files_info = {} |
| | for fileobj in fileobjs: |
| | file_path = fileobj |
| | file_name, file_ext = os.path.splitext(os.path.basename(fileobj)) |
| |
|
| | if file_ext == ".srt": |
| | parsed_dicts = parse_srt(file_path=file_path) |
| |
|
| | elif file_ext == ".vtt": |
| | parsed_dicts = parse_vtt(file_path=file_path) |
| |
|
| | batch_size = self.max_text_batch_size |
| | for batch_start in range(0, len(parsed_dicts), batch_size): |
| | batch_end = min(batch_start + batch_size, len(parsed_dicts)) |
| | sentences_to_translate = [dic["sentence"] for dic in parsed_dicts[batch_start:batch_end]] |
| | translated_texts = self.request_deepl_translate(auth_key, sentences_to_translate, source_lang, |
| | target_lang, is_pro) |
| | for i, translated_text in enumerate(translated_texts): |
| | parsed_dicts[batch_start + i]["sentence"] = translated_text["text"] |
| | progress(batch_end / len(parsed_dicts), desc="Translating...") |
| |
|
| | if file_ext == ".srt": |
| | subtitle = get_serialized_srt(parsed_dicts) |
| | elif file_ext == ".vtt": |
| | subtitle = get_serialized_vtt(parsed_dicts) |
| |
|
| | if add_timestamp: |
| | timestamp = datetime.now().strftime("%m%d%H%M%S") |
| | file_name += f"-{timestamp}" |
| |
|
| | output_path = os.path.join(self.output_dir, f"{file_name}{file_ext}") |
| | write_file(subtitle, output_path) |
| |
|
| | files_info[file_name] = {"subtitle": subtitle, "path": output_path} |
| |
|
| | total_result = '' |
| | for file_name, info in files_info.items(): |
| | total_result += '------------------------------------\n' |
| | total_result += f'{file_name}\n\n' |
| | total_result += f'{info["subtitle"]}' |
| | gr_str = f"Done! Subtitle is in the outputs/translation folder.\n\n{total_result}" |
| |
|
| | output_file_paths = [item["path"] for key, item in files_info.items()] |
| | return [gr_str, output_file_paths] |
| |
|
| | def request_deepl_translate(self, |
| | auth_key: str, |
| | text: list, |
| | source_lang: str, |
| | target_lang: str, |
| | is_pro: bool = False): |
| | """Request API response to DeepL server""" |
| | if source_lang not in list(DEEPL_AVAILABLE_SOURCE_LANGS.keys()): |
| | raise ValueError(f"Source language {source_lang} is not supported." |
| | f"Use one of {list(DEEPL_AVAILABLE_SOURCE_LANGS.keys())}") |
| | if target_lang not in list(DEEPL_AVAILABLE_TARGET_LANGS.keys()): |
| | raise ValueError(f"Target language {target_lang} is not supported." |
| | f"Use one of {list(DEEPL_AVAILABLE_TARGET_LANGS.keys())}") |
| |
|
| | url = 'https://api.deepl.com/v2/translate' if is_pro else 'https://api-free.deepl.com/v2/translate' |
| | headers = { |
| | 'Authorization': f'DeepL-Auth-Key {auth_key}' |
| | } |
| | data = { |
| | 'text': text, |
| | 'source_lang': DEEPL_AVAILABLE_SOURCE_LANGS[source_lang], |
| | 'target_lang': DEEPL_AVAILABLE_TARGET_LANGS[target_lang] |
| | } |
| | response = requests.post(url, headers=headers, data=data).json() |
| | time.sleep(self.api_interval) |
| | return response["translations"] |
| |
|
| | @staticmethod |
| | def cache_parameters(api_key: str, |
| | is_pro: bool, |
| | source_lang: str, |
| | target_lang: str, |
| | add_timestamp: bool): |
| | cached_params = load_yaml(DEFAULT_PARAMETERS_CONFIG_PATH) |
| | cached_params["translation"]["deepl"] = { |
| | "api_key": api_key, |
| | "is_pro": is_pro, |
| | "source_lang": source_lang, |
| | "target_lang": target_lang |
| | } |
| | cached_params["translation"]["add_timestamp"] = add_timestamp |
| | save_yaml(cached_params, DEFAULT_PARAMETERS_CONFIG_PATH) |
| |
|