MakD1227 commited on
Commit
b8ad611
·
verified ·
1 Parent(s): 5401443

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -29
app.py CHANGED
@@ -1,44 +1,75 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import os
4
 
5
- # 1. Load the classifier from your Hugging Face Repo
6
- # This replaces the /content/drive path
7
- model_repo = "MakD1227/afriberta-hsd-model"
8
- classifier = pipeline("text-classification", model=model_repo)
9
 
10
- # 2. Prediction function
11
- def predict_speech(text):
12
- results = classifier(text)
13
- # Mapping: LABEL_0 -> Free, LABEL_1 -> Offensive, LABEL_2 -> Hate
14
- label_map = {"LABEL_0": "Free (Neutral)", "LABEL_1": "Offensive", "LABEL_2": "Hate"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- label = results[0]['label']
17
- score = results[0]['score']
 
 
 
 
18
 
19
- return label_map.get(label, label), f"{round(score * 100, 2)}%"
 
 
 
20
 
21
- # 3. Gradio Interface
22
- interface = gr.Interface(
23
- fn=predict_speech,
 
 
 
 
24
  inputs=gr.Textbox(
25
- lines=2,
26
  label="Input Text",
27
- placeholder="Enter Amharic or Afan Oromo text..."
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ),
29
- outputs=[
30
- gr.Label(label="Classification"),
31
- gr.Text(label="Confidence")
32
- ],
33
- title="Amharic & Afan Oromo Hate Speech Detector",
34
- description="Classify text into Free, Offensive, or Hate Speech",
35
- article="<p style='text-align: center;'>@2025 Mequanent Degu Belete </p><p style='text-align: center;'>mekuanentde@gmail.com</p><p style='text-align: center;'>SNHCC, Academia Sinica, Taiwan</p>",
36
  examples=[
37
- ["ኢትዮጵያ ለዘላለም ትኑር"],
38
- ["haatee sali shamtuu situ nuu beekaa waa ee baalee"]
 
39
  ]
40
  )
41
 
42
- # Launch (No 'share=True' needed on Hugging Face Spaces)
43
  if __name__ == "__main__":
44
- interface.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import re
4
 
5
+ # -----------------------------
6
+ # Load classifier
7
+ # -----------------------------
8
+ MODEL_REPO = "MakD1227/afriberta-hsd-full"
9
 
10
+ classifier = pipeline(
11
+ "text-classification",
12
+ model=MODEL_REPO,
13
+ tokenizer=MODEL_REPO
14
+ )
15
+
16
+ LABEL_MAP = {
17
+ "LABEL_0": "Free",
18
+ "LABEL_1": "Offensive",
19
+ "LABEL_2": "Hate"
20
+ }
21
+
22
+ # -----------------------------
23
+ # Text segmentation (simple & robust)
24
+ # -----------------------------
25
+ def split_text(text):
26
+ # Split by punctuation and line breaks
27
+ segments = re.split(r'(?<=[።.!?])\s+|\n+', text)
28
+ return [seg.strip() for seg in segments if seg.strip()]
29
 
30
+ # -----------------------------
31
+ # Prediction with span labeling
32
+ # -----------------------------
33
+ def predict_with_spans(text):
34
+ segments = split_text(text)
35
+ highlighted = []
36
 
37
+ for seg in segments:
38
+ result = classifier(seg)[0]
39
+ label = LABEL_MAP[result["label"]]
40
+ highlighted.append((seg, label))
41
 
42
+ return highlighted
43
+
44
+ # -----------------------------
45
+ # Gradio Interface
46
+ # -----------------------------
47
+ demo = gr.Interface(
48
+ fn=predict_with_spans,
49
  inputs=gr.Textbox(
50
+ lines=4,
51
  label="Input Text",
52
+ placeholder="Enter mixed Amharic & Afan Oromo text..."
53
+ ),
54
+ outputs=gr.HighlightedText(
55
+ label="Detected Hate / Offensive / Free Segments",
56
+ color_map={
57
+ "Hate": "red",
58
+ "Offensive": "orange",
59
+ "Free": "green"
60
+ }
61
+ ),
62
+ title="Bilingual Hate Speech Detection (Amharic & Afan Oromo)",
63
+ description=(
64
+ "Fine-grained detection showing which portions of the text "
65
+ "are Hate, Offensive, or Free (supports code-mixed input)."
66
  ),
 
 
 
 
 
 
 
67
  examples=[
68
+ [
69
+ "ኢትዮጵያ ለዘላለም ትኑር haatee sali shamtuu situ nuu beekaa"
70
+ ]
71
  ]
72
  )
73
 
 
74
  if __name__ == "__main__":
75
+ demo.launch()