Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0" | |
| from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig | |
| import gradio as gr | |
| # ---- Load model & tokenizer ---- | |
| model_name = "Mudasir692/mbart-eng-ur" | |
| # Fix config issue | |
| config = AutoConfig.from_pretrained(model_name) | |
| if getattr(config, "early_stopping", None) is None: | |
| config.early_stopping = True | |
| tokenizer = MBart50TokenizerFast.from_pretrained(model_name) | |
| model = MBartForConditionalGeneration.from_pretrained(model_name, config=config) | |
| # ---- Language mapping ---- | |
| LANG_CODES = { | |
| "Urdu": "ur_PK", | |
| "Arabic": "ar_AR", | |
| "Hindi": "hi_IN", | |
| "French": "fr_XX", | |
| "German": "de_DE", | |
| "Spanish": "es_XX", | |
| "Chinese": "zh_CN", | |
| "Italian": "it_IT", | |
| "Portuguese": "pt_XX", | |
| "Russian": "ru_RU", | |
| "Japanese": "ja_XX", | |
| "Korean": "ko_KR", | |
| "Turkish": "tr_TR", | |
| "Persian": "fa_IR", | |
| "Bengali": "bn_IN", | |
| "Punjabi": "pa_IN", | |
| "Pashto": "ps_AF", | |
| "Malay": "ms_MY", | |
| "Indonesian": "id_ID", | |
| "Tamil": "ta_IN" | |
| } | |
| # ---- Translation function ---- | |
| def translate_text(text, target_lang, auto_detect): | |
| if not text.strip(): | |
| return "โ ๏ธ Please enter text to translate." | |
| # Source language | |
| if auto_detect: | |
| # Very simple heuristic-based detection | |
| if any("\u0600" <= ch <= "\u06FF" for ch in text): | |
| src_lang = "ur_PK" | |
| elif any("\u0900" <= ch <= "\u097F" for ch in text): | |
| src_lang = "hi_IN" | |
| else: | |
| src_lang = "en_XX" | |
| else: | |
| src_lang = "en_XX" | |
| tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK") | |
| tokenizer.src_lang = src_lang | |
| tokenizer.tgt_lang = tgt_lang_code | |
| inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
| translated_tokens = model.generate( | |
| **inputs, | |
| max_length=256, | |
| num_beams=5, | |
| early_stopping=True | |
| ) | |
| output = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) | |
| return output | |
| # ---- Examples ---- | |
| examples = [ | |
| ["How are you?", "Urdu", False], | |
| ["Where are you going?", "Arabic", False], | |
| ["This is my new project.", "Hindi", False], | |
| ["I love learning new languages.", "French", False], | |
| ["Can you help me?", "Spanish", False], | |
| ] | |
| # ---- Gradio Interface ---- | |
| with gr.Blocks(css=""" | |
| body {background: linear-gradient(to bottom right, #f7f9fb, #e0f7fa);} | |
| .gr-button-primary {background-color: #1e3799 !important; color: white !important;} | |
| """) as app: | |
| gr.Markdown(""" | |
| <div style='text-align:center;'> | |
| <h2> Multi-Language Translator (mBART)</h2> | |
| <p>Translate between English and 20+ languages using a fine-tuned mBART model.</p> | |
| <p style='color:gray;'>Built by <b>Khurram Basharat</b> โ powered by Hugging Face & Gradio.</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text_input = gr.Textbox(label="Enter Text", placeholder="Type your sentence here...", lines=4) | |
| target_lang = gr.Dropdown(sorted(LANG_CODES.keys()), label="Select Target Language", value="Urdu") | |
| auto_detect = gr.Checkbox(label="Auto-detect Source Language", value=False) | |
| translate_btn = gr.Button("Translate") | |
| with gr.Column(scale=1): | |
| result_output = gr.Textbox(label="Translation", lines=4) | |
| copy_btn = gr.Button("๐ Copy Translation") | |
| gr.Examples(examples, inputs=[text_input, target_lang, auto_detect]) | |
| # ---- Actions ---- | |
| translate_btn.click(translate_text, inputs=[text_input, target_lang, auto_detect], outputs=result_output) | |
| #copy_btn.click(None, inputs=result_output, outputs=None, _js="(text) => navigator.clipboard.writeText(text)") | |
| # ---- Launch app ---- | |
| app.launch(server_name="0.0.0.0", server_port=7860) | |