MoAmir commited on
Commit
481ad27
·
verified ·
1 Parent(s): 647aeae

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
+ import torch.nn.functional as F
5
+ import re
6
+ import os
7
+
8
+
9
+ model_path = "."
10
+
11
+ #
12
+ try:
13
+ print("Loading model from local directory...")
14
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
15
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
16
+ except Exception as e:
17
+ print(f"Error loading local model: {e}")
18
+ print("Fallback to base model (Not recommended for final output)...")
19
+ # كود احتياطي لو الملفات مش موجودة
20
+ tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/MARBERTv2")
21
+ model = AutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERTv2", num_labels=5)
22
+
23
+ # --- 3.
24
+ id2label = {
25
+ 0: "مسيء / كراهية (Hate)",
26
+ 1: "هجومي (Offensive)",
27
+ 2: "عادي / محايد (Neutral)",
28
+ 3: "إهانة (Insult)",
29
+ 4: "تهديد (Threat)"
30
+ }
31
+
32
+ # --- 4.
33
+ def clean_text(text):
34
+ if not text: return ""
35
+ text = re.sub(r'[\u064B-\u0652]', '', text) # تشكيل
36
+ text = re.sub(r'[أإآ]', 'ا', text) # توحيد الألف
37
+ text = re.sub(r'ى', 'ي', text) # توحيد الياء
38
+ text = re.sub(r'ة', 'ه', text) # تاء مربوطة
39
+ text = re.sub(r'(.)\1+', r'\1', text) # تطويل
40
+ return text
41
+
42
+ # --- 5.
43
+ def classify_text(text):
44
+ if not text: return {}
45
+
46
+ #
47
+ cleaned = clean_text(text)
48
+ inputs = tokenizer(cleaned, return_tensors="pt", padding=True, truncation=True, max_length=128)
49
+
50
+ #
51
+ with torch.no_grad():
52
+ logits = model(**inputs).logits
53
+
54
+ #
55
+ probs = F.softmax(logits, dim=-1)[0].numpy()
56
+
57
+ #
58
+ results = {}
59
+ for i, score in enumerate(probs):
60
+ label = id2label.get(i, f"Class {i}")
61
+ results[label] = float(score)
62
+
63
+ return results
64
+
65
+ #
66
+ iface = gr.Interface(
67
+ fn=classify_text,
68
+ inputs=gr.Textbox(label="اكتب النص هنا", placeholder="اكتب جملة باللهجة المصرية..."),
69
+ outputs=gr.Label(label="النتيجة"),
70
+ title="Arabic Toxicity Detection ",
71
+ description="نظام ذكي لاكتشاف الكلام المسيء باللهجة المصرية.",
72
+ examples=[["انت راجل محترم"], ["يا ابن الكلب"], ["دي حاجة تقرف"]]
73
+ )
74
+
75
+ iface.launch()