Spaces:
Build error
Build error
| from transformers import MarianMTModel, MarianTokenizer | |
| from tqdm import tqdm | |
| import os | |
| import re | |
| import argparse | |
| # Load Model and Tokenizer | |
| model_name = "Helsinki-NLP/opus-mt-en-es" | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| model = MarianMTModel.from_pretrained(model_name) | |
| # Extract & separate timestamp and text | |
| def extract_timestamp_and_text(line): | |
| match = re.match(r'\[(\d+\.\d+\-\d+\.\d+)\]\s+(.*)', line) | |
| if match: | |
| return match.group(1), match.group(2) | |
| return '', line | |
| # Translate text | |
| def translate_text(text): | |
| lines = text.split('\n') | |
| translated_lines = [] | |
| for line in tqdm(lines, desc="Translating lines", leave=False): | |
| if not line.strip(): | |
| translated_lines.append('') | |
| continue | |
| timestamp, line_text = extract_timestamp_and_text(line) | |
| if line_text.strip(): | |
| model_inputs = tokenizer(line_text, return_tensors="pt", truncation=True, padding="longest") | |
| translated = model.generate(**model_inputs) | |
| translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0] | |
| translated_line = f'[{timestamp}] {translated_text}' | |
| else: | |
| translated_line = f'[{timestamp}]' | |
| translated_lines.append(translated_line) | |
| return '\n'.join(translated_lines) | |
| # Main function to translate a file | |
| def translate_file(src_file_path, dst_file_path): | |
| try: | |
| with open(src_file_path, 'r') as file: | |
| english_text = file.read() | |
| spanish_text = translate_text(english_text) | |
| with open(dst_file_path, 'w') as file: | |
| file.write(spanish_text) | |
| print(f"Translation completed: {dst_file_path}") | |
| except Exception as e: | |
| print(f"Error processing file: {e}") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Translate English text to Spanish") | |
| parser.add_argument("src_file_path", help="Path to the source file with English text") | |
| parser.add_argument("dst_file_path", help="Path to save the translated Spanish text") | |
| args = parser.parse_args() | |
| translate_file(args.src_file_path, args.dst_file_path) | |