Spaces:

kkAsmaa
/

ChildShield-Interface

Running

App Files Files Community

kkAsmaa commited on 7 days ago

Commit

e111c61

verified ·

1 Parent(s): e8a997e

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -5

app.py CHANGED Viewed

@@ -26,6 +26,7 @@ def clean_obfuscation(text):
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
@@ -33,7 +34,7 @@ def full_preprocess(text):
 def predict_safety_api(text):
     """
-  Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
     """
     print(f"[Incoming text to evaluate]: {text}")
     cleaned_text = full_preprocess(text)
@@ -41,6 +42,9 @@ def predict_safety_api(text):
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
     window_size = 60
     overlap = 20
     windows = []
@@ -54,11 +58,15 @@ def predict_safety_api(text):
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
     is_blocked = False
     highest_unsafe_prob = 0.0
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
         inputs = tokenizer(
@@ -79,12 +87,30 @@ def predict_safety_api(text):
         if unsafe_p > 0.50:
             is_blocked = True
             highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
     if is_blocked:
-        return {"verdict": "UNSAFE", "block": True, "confidence": f"{highest_unsafe_prob * 100:.2f}%"}
     safe_p = 1.0 - highest_unsafe_prob
-    return {"verdict": "SAFE", "block": False, "confidence": f"{safe_p * 100:.2f}%"}
 interface = gr.Interface(
     fn=predict_safety_api,
@@ -95,4 +121,3 @@ interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch()

     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
 def predict_safety_api(text):
     """
+    Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
     """
     print(f"[Incoming text to evaluate]: {text}")
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
+    # 🎯 1. حساب عدد التوكنز الكلي الفعلي للنص المدخل
+    total_tokens_count = len(input_ids)
     window_size = 60
     overlap = 20
     windows = []
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
+    # 🎯 2. حساب عدد النوافذ الناتجة رياضياً عن هذا النص
+    total_windows_count = len(windows)
     is_blocked = False
     highest_unsafe_prob = 0.0
+    # 🎯 3. مصفوفة مخصصة لاصطياد وحفظ النوافذ النصية التي تسببت في إطلاق الخطر
+    triggered_sentences = []
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
         inputs = tokenizer(
         if unsafe_p > 0.50:
             is_blocked = True
             highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
+            # اصطياد النافذة الخبيثة المسببة للحظر وحفظها بدون تكرار
+            if window_text not in triggered_sentences:
+                triggered_sentences.append(window_text)
+    # 🎯 حقن وحفظ الحقول الحسابية الجديدة والتفسيرية مباشرة داخل كائن الرد للـ JSON
     if is_blocked:
+        return {
+            "verdict": "UNSAFE",
+            "block": True,
+            "confidence": f"{highest_unsafe_prob * 100:.2f}%",
+            "total_tokens": total_tokens_count,        # عرض التوكنز الكلي
+            "total_windows": total_windows_count,      # عرض النوافذ الكلية
+            "triggered_phrases": triggered_sentences   # عرض الجمل المسببة للحظر
+        }
     safe_p = 1.0 - highest_unsafe_prob
+    return {
+        "verdict": "SAFE",
+        "block": False,
+        "confidence": f"{safe_p * 100:.2f}%",
+        "total_tokens": total_tokens_count,        # عرض التوكنز الكلي
+        "total_windows": total_windows_count,      # عرض النوافذ الكلية
+        "triggered_phrases": []                    # فارغة لأن النص سليم وممرر
+    }
 interface = gr.Interface(
     fn=predict_safety_api,
 if __name__ == "__main__":
     interface.launch()