kkAsmaa commited on
Commit
e111c61
·
verified ·
1 Parent(s): e8a997e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -5
app.py CHANGED
@@ -26,6 +26,7 @@ def clean_obfuscation(text):
26
  text = re.sub(r'[^\w\s\.]', ' ', text)
27
  text = re.sub(r'\s+', ' ', text)
28
  return text.strip()
 
29
  def full_preprocess(text):
30
  text_no_trickery = clean_obfuscation(text)
31
  final_text = arabic_prep.preprocess(text_no_trickery)
@@ -33,7 +34,7 @@ def full_preprocess(text):
33
 
34
  def predict_safety_api(text):
35
  """
36
- Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
37
  """
38
  print(f"[Incoming text to evaluate]: {text}")
39
  cleaned_text = full_preprocess(text)
@@ -41,6 +42,9 @@ def predict_safety_api(text):
41
  full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
42
  input_ids = full_encodings['input_ids']
43
 
 
 
 
44
  window_size = 60
45
  overlap = 20
46
  windows = []
@@ -54,11 +58,15 @@ def predict_safety_api(text):
54
  if len(window) > 0: windows.append(window)
55
  if i + window_size >= len(input_ids): break
56
 
 
 
 
57
  is_blocked = False
58
  highest_unsafe_prob = 0.0
 
 
59
 
60
  for win_ids in windows:
61
-
62
  window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
63
 
64
  inputs = tokenizer(
@@ -79,12 +87,30 @@ def predict_safety_api(text):
79
  if unsafe_p > 0.50:
80
  is_blocked = True
81
  highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
 
 
 
82
 
 
83
  if is_blocked:
84
- return {"verdict": "UNSAFE", "block": True, "confidence": f"{highest_unsafe_prob * 100:.2f}%"}
 
 
 
 
 
 
 
85
 
86
  safe_p = 1.0 - highest_unsafe_prob
87
- return {"verdict": "SAFE", "block": False, "confidence": f"{safe_p * 100:.2f}%"}
 
 
 
 
 
 
 
88
 
89
  interface = gr.Interface(
90
  fn=predict_safety_api,
@@ -95,4 +121,3 @@ interface = gr.Interface(
95
 
96
  if __name__ == "__main__":
97
  interface.launch()
98
-
 
26
  text = re.sub(r'[^\w\s\.]', ' ', text)
27
  text = re.sub(r'\s+', ' ', text)
28
  return text.strip()
29
+
30
  def full_preprocess(text):
31
  text_no_trickery = clean_obfuscation(text)
32
  final_text = arabic_prep.preprocess(text_no_trickery)
 
34
 
35
  def predict_safety_api(text):
36
  """
37
+ Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
38
  """
39
  print(f"[Incoming text to evaluate]: {text}")
40
  cleaned_text = full_preprocess(text)
 
42
  full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
43
  input_ids = full_encodings['input_ids']
44
 
45
+ # 🎯 1. حساب عدد التوكنز الكلي الفعلي للنص المدخل
46
+ total_tokens_count = len(input_ids)
47
+
48
  window_size = 60
49
  overlap = 20
50
  windows = []
 
58
  if len(window) > 0: windows.append(window)
59
  if i + window_size >= len(input_ids): break
60
 
61
+ # 🎯 2. حساب عدد النوافذ الناتجة رياضياً عن هذا النص
62
+ total_windows_count = len(windows)
63
+
64
  is_blocked = False
65
  highest_unsafe_prob = 0.0
66
+ # 🎯 3. مصفوفة مخصصة لاصطياد وحفظ النوافذ النصية التي تسببت في إطلاق الخطر
67
+ triggered_sentences = []
68
 
69
  for win_ids in windows:
 
70
  window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
71
 
72
  inputs = tokenizer(
 
87
  if unsafe_p > 0.50:
88
  is_blocked = True
89
  highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
90
+ # اصطياد النافذة الخبيثة المسببة للحظر وحفظها بدون تكرار
91
+ if window_text not in triggered_sentences:
92
+ triggered_sentences.append(window_text)
93
 
94
+ # 🎯 حقن وحفظ الحقول الحسابية الجديدة والتفسيرية مباشرة داخل كائن الرد للـ JSON
95
  if is_blocked:
96
+ return {
97
+ "verdict": "UNSAFE",
98
+ "block": True,
99
+ "confidence": f"{highest_unsafe_prob * 100:.2f}%",
100
+ "total_tokens": total_tokens_count, # عرض التوكنز الكلي
101
+ "total_windows": total_windows_count, # عرض النوافذ الكلية
102
+ "triggered_phrases": triggered_sentences # عرض الجمل المسببة للحظر
103
+ }
104
 
105
  safe_p = 1.0 - highest_unsafe_prob
106
+ return {
107
+ "verdict": "SAFE",
108
+ "block": False,
109
+ "confidence": f"{safe_p * 100:.2f}%",
110
+ "total_tokens": total_tokens_count, # عرض التوكنز الكلي
111
+ "total_windows": total_windows_count, # عرض النوافذ الكلية
112
+ "triggered_phrases": [] # فارغة لأن النص سليم وممرر
113
+ }
114
 
115
  interface = gr.Interface(
116
  fn=predict_safety_api,
 
121
 
122
  if __name__ == "__main__":
123
  interface.launch()