Spaces:
Running
Running
| # app.py — Gradio demo for the multilingual language classifier (Hugging Face Space) | |
| # Loads the fine-tuned XLM-RoBERTa model from the Hub and serves an interactive UI. | |
| import os | |
| import torch | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| MODEL_ID = os.environ.get("MODEL_ID", "SashaSk/xlm-roberta-language-id") | |
| # Human-readable names for the 20 ISO codes the model predicts. | |
| LANG_NAMES = { | |
| "ar": "Arabic", "bg": "Bulgarian", "de": "German", "el": "Greek", "en": "English", | |
| "es": "Spanish", "fr": "French", "hi": "Hindi", "it": "Italian", "ja": "Japanese", | |
| "nl": "Dutch", "pl": "Polish", "pt": "Portuguese", "ru": "Russian", "sw": "Swahili", | |
| "th": "Thai", "tr": "Turkish", "ur": "Urdu", "vi": "Vietnamese", "zh": "Chinese", | |
| } | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID).eval() | |
| id2label = model.config.id2label | |
| def classify(text: str): | |
| """Return a {label: probability} dict over the top languages for gr.Label.""" | |
| if not text or not text.strip(): | |
| return {} | |
| with torch.no_grad(): | |
| enc = tokenizer(text, return_tensors="pt", truncation=True, max_length=256) | |
| probs = torch.softmax(model(**enc).logits, dim=-1)[0] | |
| out = {} | |
| for i, p in enumerate(probs.tolist()): | |
| code = id2label[i] | |
| out[f"{LANG_NAMES.get(code, code)} ({code})"] = p | |
| return out | |
| EXAMPLES = [ | |
| ["Bonjour, comment allez-vous aujourd'hui ?"], | |
| ["¿Dónde está la biblioteca más cercana?"], | |
| ["こんにちは、お元気ですか?"], | |
| ["Привет, как у тебя дела?"], | |
| ["مرحبا، كيف حالك اليوم؟"], | |
| ["Guten Tag, schön Sie kennenzulernen."], | |
| ] | |
| with gr.Blocks(title="Multilingual Language Classifier") as demo: | |
| gr.Markdown( | |
| "# 🌍 Multilingual Language Classifier\n" | |
| "Fine-tuned **XLM-RoBERTa** detecting one of **20 languages** — " | |
| "**99.6%** test accuracy. Type or pick an example below." | |
| ) | |
| with gr.Row(): | |
| inp = gr.Textbox( | |
| label="Text", lines=3, | |
| placeholder="Type text in any of the 20 supported languages…", | |
| ) | |
| out = gr.Label(num_top_classes=5, label="Predicted language (top 5)") | |
| inp.change(classify, inputs=inp, outputs=out) | |
| gr.Examples(EXAMPLES, inputs=inp) | |
| if __name__ == "__main__": | |
| demo.launch() | |