Spaces:

SashaSk
/

language-classifier

Running

App Files Files Community

language-classifier / app.py

SashaSk

Upload app.py with huggingface_hub

8b1d1ca verified 3 days ago

raw

history blame contribute delete

2.43 kB

	# app.py — Gradio demo for the multilingual language classifier (Hugging Face Space)
	# Loads the fine-tuned XLM-RoBERTa model from the Hub and serves an interactive UI.
	import os
	import torch
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	MODEL_ID = os.environ.get("MODEL_ID", "SashaSk/xlm-roberta-language-id")

	# Human-readable names for the 20 ISO codes the model predicts.
	LANG_NAMES = {
	"ar": "Arabic", "bg": "Bulgarian", "de": "German", "el": "Greek", "en": "English",
	"es": "Spanish", "fr": "French", "hi": "Hindi", "it": "Italian", "ja": "Japanese",
	"nl": "Dutch", "pl": "Polish", "pt": "Portuguese", "ru": "Russian", "sw": "Swahili",
	"th": "Thai", "tr": "Turkish", "ur": "Urdu", "vi": "Vietnamese", "zh": "Chinese",
	}

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID).eval()
	id2label = model.config.id2label


	def classify(text: str):
	"""Return a {label: probability} dict over the top languages for gr.Label."""
	if not text or not text.strip():
	return {}
	with torch.no_grad():
	enc = tokenizer(text, return_tensors="pt", truncation=True, max_length=256)
	probs = torch.softmax(model(**enc).logits, dim=-1)[0]
	out = {}
	for i, p in enumerate(probs.tolist()):
	code = id2label[i]
	out[f"{LANG_NAMES.get(code, code)} ({code})"] = p
	return out


	EXAMPLES = [
	["Bonjour, comment allez-vous aujourd'hui ?"],
	["¿Dónde está la biblioteca más cercana?"],
	["こんにちは、お元気ですか？"],
	["Привет, как у тебя дела?"],
	["مرحبا، كيف حالك اليوم؟"],
	["Guten Tag, schön Sie kennenzulernen."],
	]

	with gr.Blocks(title="Multilingual Language Classifier") as demo:
	gr.Markdown(
	"# 🌍 Multilingual Language Classifier\n"
	"Fine-tuned XLM-RoBERTa detecting one of 20 languages — "
	"99.6% test accuracy. Type or pick an example below."
	)
	with gr.Row():
	inp = gr.Textbox(
	label="Text", lines=3,
	placeholder="Type text in any of the 20 supported languages…",
	)
	out = gr.Label(num_top_classes=5, label="Predicted language (top 5)")
	inp.change(classify, inputs=inp, outputs=out)
	gr.Examples(EXAMPLES, inputs=inp)

	if __name__ == "__main__":
	demo.launch()