MakD1227
/

afriberta-hsd-model

@@ -1,44 +1,75 @@
 import gradio as gr
 from transformers import pipeline
-import os
-# 1. Load the classifier from your Hugging Face Repo
-# This replaces the /content/drive path
-model_repo = "MakD1227/afriberta-hsd-model"
-classifier = pipeline("text-classification", model=model_repo)
-# 2. Prediction function
-def predict_speech(text):
-    results = classifier(text)
-    # Mapping: LABEL_0 -> Free, LABEL_1 -> Offensive, LABEL_2 -> Hate
-    label_map = {"LABEL_0": "Free (Neutral)", "LABEL_1": "Offensive", "LABEL_2": "Hate"}
-    label = results[0]['label']
-    score = results[0]['score']
-    return label_map.get(label, label), f"{round(score * 100, 2)}%"
-# 3. Gradio Interface
-interface = gr.Interface(
-    fn=predict_speech,
     inputs=gr.Textbox(
-        lines=2,
         label="Input Text",
-        placeholder="Enter Amharic or Afan Oromo text..."
     ),
-    outputs=[
-        gr.Label(label="Classification"),
-        gr.Text(label="Confidence")
-    ],
-    title="Amharic & Afan Oromo Hate Speech Detector",
-    description="Classify text into Free, Offensive, or Hate Speech",
-    article="<p style='text-align: center;'>@2025 Mequanent Degu Belete </p><p style='text-align: center;'>mekuanentde@gmail.com</p><p style='text-align: center;'>SNHCC, Academia Sinica, Taiwan</p>",
     examples=[
-        ["ኢትዮጵያ ለዘላለም ትኑር"],
-        ["haatee sali shamtuu situ nuu beekaa waa ee baalee"]
     ]
 )
-# Launch (No 'share=True' needed on Hugging Face Spaces)
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 from transformers import pipeline
+import re
+# -----------------------------
+# Load classifier
+# -----------------------------
+MODEL_REPO = "MakD1227/afriberta-hsd-full"
+classifier = pipeline(
+    "text-classification",
+    model=MODEL_REPO,
+    tokenizer=MODEL_REPO
+)
+LABEL_MAP = {
+    "LABEL_0": "Free",
+    "LABEL_1": "Offensive",
+    "LABEL_2": "Hate"
+}
+# -----------------------------
+# Text segmentation (simple & robust)
+# -----------------------------
+def split_text(text):
+    # Split by punctuation and line breaks
+    segments = re.split(r'(?<=[።.!?])\s+|\n+', text)
+    return [seg.strip() for seg in segments if seg.strip()]
+# -----------------------------
+# Prediction with span labeling
+# -----------------------------
+def predict_with_spans(text):
+    segments = split_text(text)
+    highlighted = []
+    for seg in segments:
+        result = classifier(seg)[0]
+        label = LABEL_MAP[result["label"]]
+        highlighted.append((seg, label))
+    return highlighted
+# -----------------------------
+# Gradio Interface
+# -----------------------------
+demo = gr.Interface(
+    fn=predict_with_spans,
     inputs=gr.Textbox(
+        lines=4,
         label="Input Text",
+        placeholder="Enter mixed Amharic & Afan Oromo text..."
+    ),
+    outputs=gr.HighlightedText(
+        label="Detected Hate / Offensive / Free Segments",
+        color_map={
+            "Hate": "red",
+            "Offensive": "orange",
+            "Free": "green"
+        }
+    ),
+    title="Bilingual Hate Speech Detection (Amharic & Afan Oromo)",
+    description=(
+        "Fine-grained detection showing which portions of the text "
+        "are Hate, Offensive, or Free (supports code-mixed input)."
     ),
     examples=[
+        [
+            "ኢትዮጵያ ለዘላለም ትኑር haatee sali shamtuu situ nuu beekaa"
+        ]
     ]
 )
 if __name__ == "__main__":
+    demo.launch()