Spaces:
Sleeping
Sleeping
File size: 3,823 Bytes
535e8a9 ba04c9b 8fbfae9 ba04c9b 535e8a9 ba04c9b 535e8a9 ba04c9b 535e8a9 1638a57 ba04c9b 1638a57 ba04c9b 535e8a9 4a9c2ef 535e8a9 ba04c9b f8ce75e ba04c9b a23ba4b ba04c9b 535e8a9 ba04c9b 535e8a9 ba04c9b 535e8a9 ba04c9b 535e8a9 ba04c9b 535e8a9 ba04c9b 535e8a9 ba04c9b 1638a57 ba04c9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig
import gradio as gr
# ---- Load model & tokenizer ----
model_name = "Mudasir692/mbart-eng-ur"
# Fix config issue
config = AutoConfig.from_pretrained(model_name)
if getattr(config, "early_stopping", None) is None:
config.early_stopping = True
tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
# ---- Language mapping ----
LANG_CODES = {
"Urdu": "ur_PK",
"Arabic": "ar_AR",
"Hindi": "hi_IN",
"French": "fr_XX",
"German": "de_DE",
"Spanish": "es_XX",
"Chinese": "zh_CN",
"Italian": "it_IT",
"Portuguese": "pt_XX",
"Russian": "ru_RU",
"Japanese": "ja_XX",
"Korean": "ko_KR",
"Turkish": "tr_TR",
"Persian": "fa_IR",
"Bengali": "bn_IN",
"Punjabi": "pa_IN",
"Pashto": "ps_AF",
"Malay": "ms_MY",
"Indonesian": "id_ID",
"Tamil": "ta_IN"
}
# ---- Translation function ----
def translate_text(text, target_lang, auto_detect):
if not text.strip():
return "⚠️ Please enter text to translate."
# Source language
if auto_detect:
# Very simple heuristic-based detection
if any("\u0600" <= ch <= "\u06FF" for ch in text):
src_lang = "ur_PK"
elif any("\u0900" <= ch <= "\u097F" for ch in text):
src_lang = "hi_IN"
else:
src_lang = "en_XX"
else:
src_lang = "en_XX"
tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
tokenizer.src_lang = src_lang
tokenizer.tgt_lang = tgt_lang_code
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
translated_tokens = model.generate(
**inputs,
max_length=256,
num_beams=5,
early_stopping=True
)
output = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return output
# ---- Examples ----
examples = [
["How are you?", "Urdu", False],
["Where are you going?", "Arabic", False],
["This is my new project.", "Hindi", False],
["I love learning new languages.", "French", False],
["Can you help me?", "Spanish", False],
]
# ---- Gradio Interface ----
with gr.Blocks(css="""
body {background: linear-gradient(to bottom right, #f7f9fb, #e0f7fa);}
.gr-button-primary {background-color: #1e3799 !important; color: white !important;}
""") as app:
gr.Markdown("""
<div style='text-align:center;'>
<h2> Multi-Language Translator (mBART)</h2>
<p>Translate between English and 20+ languages using a fine-tuned mBART model.</p>
<p style='color:gray;'>Built by <b>Khurram Basharat</b> — powered by Hugging Face & Gradio.</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
text_input = gr.Textbox(label="Enter Text", placeholder="Type your sentence here...", lines=4)
target_lang = gr.Dropdown(sorted(LANG_CODES.keys()), label="Select Target Language", value="Urdu")
auto_detect = gr.Checkbox(label="Auto-detect Source Language", value=False)
translate_btn = gr.Button("Translate")
with gr.Column(scale=1):
result_output = gr.Textbox(label="Translation", lines=4)
copy_btn = gr.Button("📋 Copy Translation")
gr.Examples(examples, inputs=[text_input, target_lang, auto_detect])
# ---- Actions ----
translate_btn.click(translate_text, inputs=[text_input, target_lang, auto_detect], outputs=result_output)
#copy_btn.click(None, inputs=result_output, outputs=None, _js="(text) => navigator.clipboard.writeText(text)")
# ---- Launch app ----
app.launch(server_name="0.0.0.0", server_port=7860)
|