Spaces:
Sleeping
Sleeping
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| import gradio as gr | |
| # Load M2M100 multilingual model (supports 100+ languages) | |
| model_name = "facebook/m2m100_418M" | |
| tokenizer = M2M100Tokenizer.from_pretrained(model_name) | |
| model = M2M100ForConditionalGeneration.from_pretrained(model_name) | |
| def translate_to_english(text): | |
| # Detect language (roughly by script, can be improved later) | |
| if any("\u0600" <= c <= "\u06FF" for c in text): # Arabic/Urdu script | |
| lang = "ar" # works for Urdu too, since M2M100 uses ISO codes | |
| elif any("\u0900" <= c <= "\u097F" for c in text): # Hindi (Devanagari) | |
| lang = "hi" | |
| else: | |
| lang = "en" | |
| # If already English, return as is | |
| if lang == "en": | |
| return f"π£ You: {text}\nπ Detected: English\nβ Translation: {text}" | |
| # Set tokenizer to source lang | |
| tokenizer.src_lang = lang | |
| encoded = tokenizer(text, return_tensors="pt") | |
| generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id("en")) | |
| translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
| return f"π£ You: {text}\nπ Detected: {lang.upper()}\nβ Translation: {translation}" | |
| # ---- Gradio UI ---- | |
| with gr.Blocks(css=".gradio-container {font-family: 'Poppins', sans-serif;}") as demo: | |
| gr.Markdown("## π Multilingual β English Chatbot (Arabic, Urdu, Hindi, English)") | |
| with gr.Row(): | |
| inp = gr.Textbox(placeholder="Type something here...", label="Your Message") | |
| out = gr.Textbox(label="Chat Response", interactive=False) | |
| inp.submit(translate_to_english, inp, out) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |