Spaces:
Configuration error
Configuration error
| import os | |
| import re | |
| from openai import OpenAI | |
| client = OpenAI(api_key=os.environ.get('OPENAI_KEY')) | |
| from tqdm import tqdm | |
| import argparse | |
| # Set your OpenAI API key | |
| # List of target languages | |
| languages = [ | |
| "Spanish", "French", "German", "Italian", "Portuguese", "Arabic", "Japanese", | |
| "Indonesian", "Swedish", "Danish", "Korean", "Polish", "Thai", "Chinese", "Vietnamese" | |
| ] | |
| def read_sbv_file(file_path): | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| return file.readlines() | |
| def write_sbv_file(file_path, lines): | |
| with open(file_path, 'w', encoding='utf-8') as file: | |
| file.writelines(lines) | |
| def translate_text(text, target_language): | |
| response = client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": f"You are a professional translator. Translate the following text to {target_language}. Preserve the original formatting and line breaks." | |
| }, | |
| {"role": "user", "content": text} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| def get_base_filename(input_file): | |
| return os.path.splitext(os.path.basename(input_file))[0] | |
| def create_output_directory(output_dir): | |
| os.makedirs(output_dir, exist_ok=True) | |
| def parse_sbv_file(lines): | |
| blocks = [] | |
| i = 0 | |
| while i < len(lines): | |
| timestamp, text, i = parse_block(lines, i) | |
| if timestamp: | |
| blocks.append({'timestamp': timestamp, 'text': text}) | |
| return blocks | |
| def parse_block(lines, i): | |
| line = lines[i].strip() | |
| if re.match(r'^\d{1,2}:\d{2}:\d{2}\.\d{3},\d{1,2}:\d{2}:\d{2}\.\d{3}$', line): | |
| timestamp = line | |
| i += 1 | |
| text, i = parse_text_lines(lines, i) | |
| return timestamp, text, i | |
| return None, None, i + 1 | |
| def parse_text_lines(lines, i): | |
| text_lines = [] | |
| while i < len(lines) and lines[i].strip() != '': | |
| text_lines.append(lines[i]) | |
| i += 1 | |
| while i < len(lines) and lines[i].strip() == '': | |
| i += 1 | |
| return ''.join(text_lines).strip(), i | |
| def translate_blocks(blocks, target_language): | |
| return [translate_block(block, target_language) for block in tqdm(blocks, desc=f"Translating blocks for {target_language}")] | |
| def translate_block(block, target_language): | |
| translated_text = translate_text(block['text'], target_language) if block['text'] else '' | |
| return {'timestamp': block['timestamp'], 'text': translated_text} | |
| def reconstruct_sbv_content(blocks): | |
| output_lines = [] | |
| for block in blocks: | |
| output_lines.append(block['timestamp'] + '\n') | |
| if block['text']: | |
| output_lines.append(block['text'] + '\n') | |
| output_lines.append('\n') | |
| return output_lines | |
| def get_output_file_path(output_dir, base_filename, lang): | |
| return os.path.join(output_dir, f"{base_filename}_{lang}.sbv") | |
| def translate_to_language(input_file, output_dir, language): | |
| """ | |
| Translate an SBV subtitle file to a single language. | |
| Args: | |
| input_file (str): Path to the input SBV file. | |
| output_dir (str): Path to the directory where translated file will be saved. | |
| language (str): Target language for translation. | |
| Returns: | |
| str: Path to the translated output file. | |
| """ | |
| lines = read_sbv_file(input_file) | |
| base_filename = get_base_filename(input_file) | |
| create_output_directory(output_dir) | |
| blocks = parse_sbv_file(lines) | |
| print(f"\nStarting translation for language: {language}") | |
| translated_blocks = translate_blocks(blocks, language) | |
| output_content = reconstruct_sbv_content(translated_blocks) | |
| output_file = get_output_file_path(output_dir, base_filename, language) | |
| write_sbv_file(output_file, output_content) | |
| print(f"Translated {language} file saved: {output_file}") | |
| return output_file | |
| def translate_to_languages(input_file, output_dir, selected_languages): | |
| """ | |
| Translate an SBV subtitle file to multiple languages. | |
| Args: | |
| input_file (str): Path to the input SBV file. | |
| output_dir (str): Path to the directory where translated files will be saved. | |
| selected_languages (list): List of target languages for translation. | |
| Returns: | |
| list: Paths to the translated output files. | |
| """ | |
| create_output_directory(output_dir) | |
| translated_files = [] | |
| for lang in tqdm(selected_languages, desc="Translating languages"): | |
| output_file = translate_to_language(input_file, output_dir, lang) | |
| translated_files.append(output_file) | |
| return translated_files | |
| # Update the main block to use the new functions | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Translate SBV subtitle files to multiple languages.") | |
| parser.add_argument("input_file", help="Path to the input .sbv file") | |
| parser.add_argument("output_dir", help="Path to the output directory") | |
| parser.add_argument("--languages", nargs="+", choices=languages, default=languages, help="Languages to translate to") | |
| args = parser.parse_args() | |
| translated_files = translate_to_languages(args.input_file, args.output_dir, args.languages) | |
| print(f"\nTranslation completed. {len(translated_files)} files created.") |