import gradio as gr
from transformers import MarianMTModel, MarianTokenizer

# Load the pre-trained model and tokenizer for English-to-Urdu translation
model_name = 'Helsinki-NLP/opus-mt-en-ur'
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

# Function to translate English text to Urdu
def translate_to_urdu(text, chunk_size=500):
    # Split the text into chunks to handle long sentences
    words = text.split()
    chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
    
    translated_text = []
    
    for chunk in chunks:
        # Tokenize the chunk and translate it
        translated = tokenizer.encode(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
        chunk_translation = model.generate(translated, max_length=512, num_beams=4, early_stopping=True)
        
        # Decode and add the translated chunk to the result
        translated_chunk = tokenizer.decode(chunk_translation[0], skip_special_tokens=True)
        translated_text.append(translated_chunk)
    
    # Join all translated chunks into one complete translation
    return ' '.join(translated_text)

# Gradio interface
def gradio_interface(english_text):
    return translate_to_urdu(english_text)

# Create the Gradio interface with input and output textboxes
iface = gr.Interface(
    fn=gradio_interface,  # Function that handles translation
    inputs=gr.Textbox(label="English Text", placeholder="Type English text here..."),
    outputs=gr.Textbox(label="Urdu Translation", placeholder="Translated Urdu text will appear here...", interactive=False),
    title="English to Urdu Translation",
    description="Enter English text and get the translation in Urdu. Long texts will be split into chunks for translation.",
)

# Launch the app
iface.launch()