Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import MarianMTModel, MarianTokenizer | |
| # Load the pre-trained model and tokenizer for English-to-Urdu translation | |
| model_name = 'Helsinki-NLP/opus-mt-en-ur' | |
| model = MarianMTModel.from_pretrained(model_name) | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| # Function to translate English text to Urdu | |
| def translate_to_urdu(text, chunk_size=500): | |
| # Split the text into chunks to handle long sentences | |
| words = text.split() | |
| chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] | |
| translated_text = [] | |
| for chunk in chunks: | |
| # Tokenize the chunk and translate it | |
| translated = tokenizer.encode(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512) | |
| chunk_translation = model.generate(translated, max_length=512, num_beams=4, early_stopping=True) | |
| # Decode and add the translated chunk to the result | |
| translated_chunk = tokenizer.decode(chunk_translation[0], skip_special_tokens=True) | |
| translated_text.append(translated_chunk) | |
| # Join all translated chunks into one complete translation | |
| return ' '.join(translated_text) | |
| # Gradio interface | |
| def gradio_interface(english_text): | |
| return translate_to_urdu(english_text) | |
| # Create the Gradio interface with input and output textboxes | |
| iface = gr.Interface( | |
| fn=gradio_interface, # Function that handles translation | |
| inputs=gr.Textbox(label="English Text", placeholder="Type English text here..."), | |
| outputs=gr.Textbox(label="Urdu Translation", placeholder="Translated Urdu text will appear here...", interactive=False), | |
| title="English to Urdu Translation", | |
| description="Enter English text and get the translation in Urdu. Long texts will be split into chunks for translation.", | |
| ) | |
| # Launch the app | |
| iface.launch() | |