Spaces:

MoAmir
/

Arabic-Toxicity-Detection

Sleeping

App Files Files Community

MoAmir commited on Nov 17, 2025

Commit

481ad27

verified ·

1 Parent(s): 647aeae

Create app.py

Browse files

Files changed (1) hide show

app.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import torch.nn.functional as F
+import re
+import os
+model_path = "."
+#
+try:
+    print("Loading model from local directory...")
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    model = AutoModelForSequenceClassification.from_pretrained(model_path)
+except Exception as e:
+    print(f"Error loading local model: {e}")
+    print("Fallback to base model (Not recommended for final output)...")
+    # كود احتياطي لو الملفات مش موجودة
+    tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/MARBERTv2")
+    model = AutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERTv2", num_labels=5)
+# --- 3.
+id2label = {
+    0: "مسيء / كراهية (Hate)",
+    1: "هجومي (Offensive)",
+    2: "عادي / محايد (Neutral)",
+    3: "إهانة (Insult)",
+    4: "تهديد (Threat)"
+}
+# --- 4.
+def clean_text(text):
+    if not text: return ""
+    text = re.sub(r'[\u064B-\u0652]', '', text) # تشكيل
+    text = re.sub(r'[أإآ]', 'ا', text) # توحيد الألف
+    text = re.sub(r'ى', 'ي', text) # توحيد الياء
+    text = re.sub(r'ة', 'ه', text) # تاء مربوطة
+    text = re.sub(r'(.)\1+', r'\1', text) # تطويل
+    return text
+# --- 5.
+def classify_text(text):
+    if not text: return {}
+    #
+    cleaned = clean_text(text)
+    inputs = tokenizer(cleaned, return_tensors="pt", padding=True, truncation=True, max_length=128)
+    #
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    #
+    probs = F.softmax(logits, dim=-1)[0].numpy()
+    #
+    results = {}
+    for i, score in enumerate(probs):
+        label = id2label.get(i, f"Class {i}")
+        results[label] = float(score)
+    return results
+#
+iface = gr.Interface(
+    fn=classify_text,
+    inputs=gr.Textbox(label="اكتب النص هنا", placeholder="اكتب جملة باللهجة المصرية..."),
+    outputs=gr.Label(label="النتيجة"),
+    title="Arabic Toxicity Detection ",
+    description="نظام ذكي لاكتشاف الكلام المسيء باللهجة المصرية.",
+    examples=[["انت راجل محترم"], ["يا ابن الكلب"], ["دي حاجة تقرف"]]
+)
+iface.launch()