File size: 1,399 Bytes
e055290
 
 
 
 
 
 
 
 
b71ebc3
 
 
 
e055290
 
 
 
 
 
 
 
 
 
 
 
 
b71ebc3
e055290
b71ebc3
 
 
 
e055290
 
 
e9a5c3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e055290
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

MODEL_NAME = "GhadeerALbadani/mmbert-Multilingual_detection_of_hate_speech"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

labels = [
    "Not Hate Speech",
    "Hate Speech"
]

def predict(text):

    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True
    )

    with torch.no_grad():
        outputs = model(**inputs)

    probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0]

    return {
        labels[0]: float(probs[0]),
        labels[1]: float(probs[1])
    }

demo = gr.Interface(
    fn=predict,

    inputs=gr.Textbox(
        lines=4,
        placeholder="Enter text here..."
    ),

    outputs=gr.Label(num_top_classes=2),

    title="Multilingual Hate Speech Detection",

    description="Detect hate speech in multiple languages using mmBERT.",

    examples=[
        ["I respect everyone regardless of religion."],
        ["All immigrants should leave this country."],
        ["أنا أحب جميع الناس بدون تمييز"],
        ["يجب طرد هؤلاء الناس من البلد"],
        ["Je respecte toutes les cultures."],
        ["Ces personnes ne méritent aucun respect."]
    ]
)

demo.launch()