import os import re from openai import OpenAI client = OpenAI(api_key=os.environ.get('OPENAI_KEY')) from tqdm import tqdm import argparse # Set your OpenAI API key # List of target languages languages = [ "Spanish", "French", "German", "Italian", "Portuguese", "Arabic", "Japanese", "Indonesian", "Swedish", "Danish", "Korean", "Polish", "Thai", "Chinese", "Vietnamese" ] def read_sbv_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: return file.readlines() def write_sbv_file(file_path, lines): with open(file_path, 'w', encoding='utf-8') as file: file.writelines(lines) def translate_text(text, target_language): response = client.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "system", "content": f"You are a professional translator. Translate the following text to {target_language}. Preserve the original formatting and line breaks." }, {"role": "user", "content": text} ] ) return response.choices[0].message.content def get_base_filename(input_file): return os.path.splitext(os.path.basename(input_file))[0] def create_output_directory(output_dir): os.makedirs(output_dir, exist_ok=True) def parse_sbv_file(lines): blocks = [] i = 0 while i < len(lines): timestamp, text, i = parse_block(lines, i) if timestamp: blocks.append({'timestamp': timestamp, 'text': text}) return blocks def parse_block(lines, i): line = lines[i].strip() if re.match(r'^\d{1,2}:\d{2}:\d{2}\.\d{3},\d{1,2}:\d{2}:\d{2}\.\d{3}$', line): timestamp = line i += 1 text, i = parse_text_lines(lines, i) return timestamp, text, i return None, None, i + 1 def parse_text_lines(lines, i): text_lines = [] while i < len(lines) and lines[i].strip() != '': text_lines.append(lines[i]) i += 1 while i < len(lines) and lines[i].strip() == '': i += 1 return ''.join(text_lines).strip(), i def translate_blocks(blocks, target_language): return [translate_block(block, target_language) for block in tqdm(blocks, desc=f"Translating blocks for {target_language}")] def translate_block(block, target_language): translated_text = translate_text(block['text'], target_language) if block['text'] else '' return {'timestamp': block['timestamp'], 'text': translated_text} def reconstruct_sbv_content(blocks): output_lines = [] for block in blocks: output_lines.append(block['timestamp'] + '\n') if block['text']: output_lines.append(block['text'] + '\n') output_lines.append('\n') return output_lines def get_output_file_path(output_dir, base_filename, lang): return os.path.join(output_dir, f"{base_filename}_{lang}.sbv") def translate_to_language(input_file, output_dir, language): """ Translate an SBV subtitle file to a single language. Args: input_file (str): Path to the input SBV file. output_dir (str): Path to the directory where translated file will be saved. language (str): Target language for translation. Returns: str: Path to the translated output file. """ lines = read_sbv_file(input_file) base_filename = get_base_filename(input_file) create_output_directory(output_dir) blocks = parse_sbv_file(lines) print(f"\nStarting translation for language: {language}") translated_blocks = translate_blocks(blocks, language) output_content = reconstruct_sbv_content(translated_blocks) output_file = get_output_file_path(output_dir, base_filename, language) write_sbv_file(output_file, output_content) print(f"Translated {language} file saved: {output_file}") return output_file def translate_to_languages(input_file, output_dir, selected_languages): """ Translate an SBV subtitle file to multiple languages. Args: input_file (str): Path to the input SBV file. output_dir (str): Path to the directory where translated files will be saved. selected_languages (list): List of target languages for translation. Returns: list: Paths to the translated output files. """ create_output_directory(output_dir) translated_files = [] for lang in tqdm(selected_languages, desc="Translating languages"): output_file = translate_to_language(input_file, output_dir, lang) translated_files.append(output_file) return translated_files # Update the main block to use the new functions if __name__ == "__main__": parser = argparse.ArgumentParser(description="Translate SBV subtitle files to multiple languages.") parser.add_argument("input_file", help="Path to the input .sbv file") parser.add_argument("output_dir", help="Path to the output directory") parser.add_argument("--languages", nargs="+", choices=languages, default=languages, help="Languages to translate to") args = parser.parse_args() translated_files = translate_to_languages(args.input_file, args.output_dir, args.languages) print(f"\nTranslation completed. {len(translated_files)} files created.")