Spaces:
Running
Running
File size: 2,430 Bytes
8b1d1ca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | # app.py — Gradio demo for the multilingual language classifier (Hugging Face Space)
# Loads the fine-tuned XLM-RoBERTa model from the Hub and serves an interactive UI.
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
MODEL_ID = os.environ.get("MODEL_ID", "SashaSk/xlm-roberta-language-id")
# Human-readable names for the 20 ISO codes the model predicts.
LANG_NAMES = {
"ar": "Arabic", "bg": "Bulgarian", "de": "German", "el": "Greek", "en": "English",
"es": "Spanish", "fr": "French", "hi": "Hindi", "it": "Italian", "ja": "Japanese",
"nl": "Dutch", "pl": "Polish", "pt": "Portuguese", "ru": "Russian", "sw": "Swahili",
"th": "Thai", "tr": "Turkish", "ur": "Urdu", "vi": "Vietnamese", "zh": "Chinese",
}
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID).eval()
id2label = model.config.id2label
def classify(text: str):
"""Return a {label: probability} dict over the top languages for gr.Label."""
if not text or not text.strip():
return {}
with torch.no_grad():
enc = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
probs = torch.softmax(model(**enc).logits, dim=-1)[0]
out = {}
for i, p in enumerate(probs.tolist()):
code = id2label[i]
out[f"{LANG_NAMES.get(code, code)} ({code})"] = p
return out
EXAMPLES = [
["Bonjour, comment allez-vous aujourd'hui ?"],
["¿Dónde está la biblioteca más cercana?"],
["こんにちは、お元気ですか?"],
["Привет, как у тебя дела?"],
["مرحبا، كيف حالك اليوم؟"],
["Guten Tag, schön Sie kennenzulernen."],
]
with gr.Blocks(title="Multilingual Language Classifier") as demo:
gr.Markdown(
"# 🌍 Multilingual Language Classifier\n"
"Fine-tuned **XLM-RoBERTa** detecting one of **20 languages** — "
"**99.6%** test accuracy. Type or pick an example below."
)
with gr.Row():
inp = gr.Textbox(
label="Text", lines=3,
placeholder="Type text in any of the 20 supported languages…",
)
out = gr.Label(num_top_classes=5, label="Predicted language (top 5)")
inp.change(classify, inputs=inp, outputs=out)
gr.Examples(EXAMPLES, inputs=inp)
if __name__ == "__main__":
demo.launch()
|