Translation / app.py
amasood's picture
Create app.py
4578245 verified
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
# Load the pre-trained model and tokenizer for English-to-Urdu translation
model_name = 'Helsinki-NLP/opus-mt-en-ur'
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)
# Function to translate English text to Urdu
def translate_to_urdu(text, chunk_size=500):
# Split the text into chunks to handle long sentences
words = text.split()
chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
translated_text = []
for chunk in chunks:
# Tokenize the chunk and translate it
translated = tokenizer.encode(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
chunk_translation = model.generate(translated, max_length=512, num_beams=4, early_stopping=True)
# Decode and add the translated chunk to the result
translated_chunk = tokenizer.decode(chunk_translation[0], skip_special_tokens=True)
translated_text.append(translated_chunk)
# Join all translated chunks into one complete translation
return ' '.join(translated_text)
# Gradio interface
def gradio_interface(english_text):
return translate_to_urdu(english_text)
# Create the Gradio interface with input and output textboxes
iface = gr.Interface(
fn=gradio_interface, # Function that handles translation
inputs=gr.Textbox(label="English Text", placeholder="Type English text here..."),
outputs=gr.Textbox(label="Urdu Translation", placeholder="Translated Urdu text will appear here...", interactive=False),
title="English to Urdu Translation",
description="Enter English text and get the translation in Urdu. Long texts will be split into chunks for translation.",
)
# Launch the app
iface.launch()