import gradio as gr import re from transformers import MarianMTModel, MarianTokenizer # ------------------------- # Fast Model # ------------------------- MODEL_NAME = "Helsinki-NLP/opus-mt-en-hi" tokenizer = MarianTokenizer.from_pretrained(MODEL_NAME) model = MarianMTModel.from_pretrained(MODEL_NAME) # ------------------------- # Parse SRT # ------------------------- def parse_srt(srt_text): pattern = re.compile( r"(\d+)\s*\n" r"(\d{2}:\d{2}:\d{2},\d{3}\s-->\s\d{2}:\d{2}:\d{2},\d{3})\s*\n" r"(.*?)(?=\n\d+\n|\Z)", re.DOTALL ) matches = pattern.findall(srt_text.strip()) subtitles = [] for idx, timestamp, text in matches: subtitles.append({ "index": idx, "timestamp": timestamp, "text": text.replace("\n", " ").strip() }) return subtitles # ------------------------- # Shorten Hindi # ------------------------- def shorten_text(text, max_len): if len(text) <= max_len: return text words = text.split() while len(text) > max_len and len(words) > 1: words.pop() text = " ".join(words) return text # ------------------------- # Batch Translate # ------------------------- def batch_translate(texts): inputs = tokenizer( texts, return_tensors="pt", padding=True, truncation=True ) translated = model.generate( **inputs, max_new_tokens=64 ) outputs = tokenizer.batch_decode( translated, skip_special_tokens=True ) return outputs # ------------------------- # Main Function # ------------------------- def translate_srt(srt_text): subtitles = parse_srt(srt_text) english_texts = [ sub["text"] for sub in subtitles ] # FAST batch translation hindi_texts = batch_translate( english_texts ) output = [] for sub, hindi in zip(subtitles, hindi_texts): english_len = len(sub["text"]) # 130% rule max_hindi_len = int( english_len * 1.3 ) hindi = shorten_text( hindi.strip(), max_hindi_len ) block = ( f'{sub["index"]}\n' f'{sub["timestamp"]}\n' f'{hindi}\n' ) output.append(block) return "\n".join(output) # ------------------------- # UI # ------------------------- demo = gr.Interface( fn=translate_srt, inputs=gr.Textbox( lines=20, label="English SRT" ), outputs=gr.Textbox( lines=20, label="Hindi SRT" ), title="Fast English → Hindi SRT Translator", description="Ultra-fast subtitle translation with timestamp preservation and subtitle length control." ) demo.launch()