# app.py — Gradio demo for the multilingual language classifier (Hugging Face Space) # Loads the fine-tuned XLM-RoBERTa model from the Hub and serves an interactive UI. import os import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification MODEL_ID = os.environ.get("MODEL_ID", "SashaSk/xlm-roberta-language-id") # Human-readable names for the 20 ISO codes the model predicts. LANG_NAMES = { "ar": "Arabic", "bg": "Bulgarian", "de": "German", "el": "Greek", "en": "English", "es": "Spanish", "fr": "French", "hi": "Hindi", "it": "Italian", "ja": "Japanese", "nl": "Dutch", "pl": "Polish", "pt": "Portuguese", "ru": "Russian", "sw": "Swahili", "th": "Thai", "tr": "Turkish", "ur": "Urdu", "vi": "Vietnamese", "zh": "Chinese", } tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID).eval() id2label = model.config.id2label def classify(text: str): """Return a {label: probability} dict over the top languages for gr.Label.""" if not text or not text.strip(): return {} with torch.no_grad(): enc = tokenizer(text, return_tensors="pt", truncation=True, max_length=256) probs = torch.softmax(model(**enc).logits, dim=-1)[0] out = {} for i, p in enumerate(probs.tolist()): code = id2label[i] out[f"{LANG_NAMES.get(code, code)} ({code})"] = p return out EXAMPLES = [ ["Bonjour, comment allez-vous aujourd'hui ?"], ["¿Dónde está la biblioteca más cercana?"], ["こんにちは、お元気ですか?"], ["Привет, как у тебя дела?"], ["مرحبا، كيف حالك اليوم؟"], ["Guten Tag, schön Sie kennenzulernen."], ] with gr.Blocks(title="Multilingual Language Classifier") as demo: gr.Markdown( "# 🌍 Multilingual Language Classifier\n" "Fine-tuned **XLM-RoBERTa** detecting one of **20 languages** — " "**99.6%** test accuracy. Type or pick an example below." ) with gr.Row(): inp = gr.Textbox( label="Text", lines=3, placeholder="Type text in any of the 20 supported languages…", ) out = gr.Label(num_top_classes=5, label="Predicted language (top 5)") inp.change(classify, inputs=inp, outputs=out) gr.Examples(EXAMPLES, inputs=inp) if __name__ == "__main__": demo.launch()