import gradio as gr from transformers import MarianMTModel, MarianTokenizer # Load the pre-trained model and tokenizer for English-to-Urdu translation model_name = 'Helsinki-NLP/opus-mt-en-ur' model = MarianMTModel.from_pretrained(model_name) tokenizer = MarianTokenizer.from_pretrained(model_name) # Function to translate English text to Urdu def translate_to_urdu(text, chunk_size=500): # Split the text into chunks to handle long sentences words = text.split() chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] translated_text = [] for chunk in chunks: # Tokenize the chunk and translate it translated = tokenizer.encode(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512) chunk_translation = model.generate(translated, max_length=512, num_beams=4, early_stopping=True) # Decode and add the translated chunk to the result translated_chunk = tokenizer.decode(chunk_translation[0], skip_special_tokens=True) translated_text.append(translated_chunk) # Join all translated chunks into one complete translation return ' '.join(translated_text) # Gradio interface def gradio_interface(english_text): return translate_to_urdu(english_text) # Create the Gradio interface with input and output textboxes iface = gr.Interface( fn=gradio_interface, # Function that handles translation inputs=gr.Textbox(label="English Text", placeholder="Type English text here..."), outputs=gr.Textbox(label="Urdu Translation", placeholder="Translated Urdu text will appear here...", interactive=False), title="English to Urdu Translation", description="Enter English text and get the translation in Urdu. Long texts will be split into chunks for translation.", ) # Launch the app iface.launch()