File size: 1,000 Bytes
04183ca
26d0ba9
 
 
 
04183ca
26d0ba9
 
 
 
04183ca
26d0ba9
 
04183ca
26d0ba9
 
 
 
 
 
 
 
 
04183ca
26d0ba9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Modèle de détection de langue
model_name = "papluca/xlm-roberta-base-language-detection"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Récupération des étiquettes de langue depuis la config
id2label = model.config.id2label

# Fonction de détection de langue
def detect_language(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=1)
        confidence, predicted_class = torch.max(probs, dim=1)
        label = id2label[predicted_class.item()]
        return f"{label} ({confidence.item():.2%} confidence)"

# Interface Gradio
demo = gr.Interface(fn=detect_language, inputs="text", outputs="text", title="Language Detection")
demo.launch()