MakD1227 commited on
Commit
2f923fa
·
verified ·
1 Parent(s): b8ad611

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -49
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import re
4
 
5
- # -----------------------------
6
- # Load classifier
7
- # -----------------------------
8
  MODEL_REPO = "MakD1227/afriberta-hsd-full"
9
 
10
  classifier = pipeline(
@@ -13,61 +12,47 @@ classifier = pipeline(
13
  tokenizer=MODEL_REPO
14
  )
15
 
16
- LABEL_MAP = {
17
- "LABEL_0": "Free",
18
- "LABEL_1": "Offensive",
19
- "LABEL_2": "Hate"
20
- }
21
 
22
- # -----------------------------
23
- # Text segmentation (simple & robust)
24
- # -----------------------------
25
- def split_text(text):
26
- # Split by punctuation and line breaks
27
- segments = re.split(r'(?<=[።.!?])\s+|\n+', text)
28
- return [seg.strip() for seg in segments if seg.strip()]
29
 
30
- # -----------------------------
31
- # Prediction with span labeling
32
- # -----------------------------
33
- def predict_with_spans(text):
34
- segments = split_text(text)
35
- highlighted = []
36
 
37
- for seg in segments:
38
- result = classifier(seg)[0]
39
- label = LABEL_MAP[result["label"]]
40
- highlighted.append((seg, label))
41
 
42
- return highlighted
43
-
44
- # -----------------------------
45
  # Gradio Interface
46
- # -----------------------------
47
  demo = gr.Interface(
48
- fn=predict_with_spans,
49
  inputs=gr.Textbox(
50
- lines=4,
51
  label="Input Text",
52
- placeholder="Enter mixed Amharic & Afan Oromo text..."
53
- ),
54
- outputs=gr.HighlightedText(
55
- label="Detected Hate / Offensive / Free Segments",
56
- color_map={
57
- "Hate": "red",
58
- "Offensive": "orange",
59
- "Free": "green"
60
- }
61
- ),
62
- title="Bilingual Hate Speech Detection (Amharic & Afan Oromo)",
63
- description=(
64
- "Fine-grained detection showing which portions of the text "
65
- "are Hate, Offensive, or Free (supports code-mixed input)."
66
  ),
 
 
 
 
 
 
 
 
 
 
 
67
  examples=[
68
- [
69
- "ኢትዮጵያ ለዘላለም ትኑር haatee sali shamtuu situ nuu beekaa"
70
- ]
71
  ]
72
  )
73
 
 
1
  import gradio as gr
2
  from transformers import pipeline
 
3
 
4
+ # ----------------------------------
5
+ # Load model from Hugging Face Hub
6
+ # ----------------------------------
7
  MODEL_REPO = "MakD1227/afriberta-hsd-full"
8
 
9
  classifier = pipeline(
 
12
  tokenizer=MODEL_REPO
13
  )
14
 
15
+ # ----------------------------------
16
+ # Prediction function
17
+ # ----------------------------------
18
+ def predict_speech(text):
19
+ results = classifier(text)
20
 
21
+ label_map = {
22
+ "LABEL_0": "Free (Neutral)",
23
+ "LABEL_1": "Offensive",
24
+ "LABEL_2": "Hate"
25
+ }
 
 
26
 
27
+ label = results[0]["label"]
28
+ score = results[0]["score"]
 
 
 
 
29
 
30
+ return label_map.get(label, label), f"{score * 100:.2f}%"
 
 
 
31
 
32
+ # ----------------------------------
 
 
33
  # Gradio Interface
34
+ # ----------------------------------
35
  demo = gr.Interface(
36
+ fn=predict_speech,
37
  inputs=gr.Textbox(
38
+ lines=2,
39
  label="Input Text",
40
+ placeholder="Enter Amharic or Afan Oromo text..."
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  ),
42
+ outputs=[
43
+ gr.Label(label="Classification"),
44
+ gr.Text(label="Confidence")
45
+ ],
46
+ title="Amharic & Afan Oromo Hate Speech Detector",
47
+ description="Classify text into Free, Offensive, or Hate Speech",
48
+ article="""
49
+ <p style='text-align:center;'>© 2025 Mequanent Degu Belete</p>
50
+ <p style='text-align:center;'>mekuanentde@gmail.com</p>
51
+ <p style='text-align:center;'>SNHCC, Academia Sinica, Taiwan</p>
52
+ """,
53
  examples=[
54
+ ["ኢትዮጵያ ለዘላለም ትኑር"],
55
+ ["haatee sali shamtuu situ nuu beekaa waa ee baalee"]
 
56
  ]
57
  )
58