Spaces:

kkAsmaa
/

ChildShield-Interface

Running

App Files Files Community

kkAsmaa commited on 7 days ago

Commit

fe08a39

verified ·

1 Parent(s): 604575d

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -31

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import re
 import os
 import torch
 from transformers import BertTokenizer, AutoModelForSequenceClassification
 from arabert.preprocess import ArabertPreprocessor
@@ -26,21 +27,23 @@ def clean_obfuscation(text):
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
     return final_text
-def predict_safety_api(text):
-    """
-  Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
-    """
-    print(f"[Incoming text to evaluate]: {text}")
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
     window_size = 60
     overlap = 20
     windows = []
@@ -54,43 +57,113 @@ def predict_safety_api(text):
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
-    is_blocked = False
-    highest_unsafe_prob = 0.0
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
-        inputs = tokenizer(
-            window_text,
-            return_tensors="pt",
-            truncation=True,
-            padding="max_length",
-            max_length=60
-        )
         with torch.no_grad():
             outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
-        unsafe_p = float(probs[1])
         if unsafe_p > 0.50:
-            is_blocked = True
-            highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
-    if is_blocked:
-        return {"verdict": "UNSAFE", "block": True, "confidence": f"{highest_unsafe_prob * 100:.2f}%"}
-    safe_p = 1.0 - highest_unsafe_prob
-    return {"verdict": "SAFE", "block": False, "confidence": f"{safe_p * 100:.2f}%"}
 interface = gr.Interface(
-    fn=predict_safety_api,
-    inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),
-    outputs=gr.JSON(label="Guard Response Object"),
-    title="ChildShield Production API Gate (Arabic Version)🛡️"
 )
 if __name__ == "__main__":

 import re
 import os
 import torch
+import json
 from transformers import BertTokenizer, AutoModelForSequenceClassification
 from arabert.preprocess import ArabertPreprocessor
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
     return final_text
+def evaluate_single_text(text):
+    """دالة داخلية لتقطيع وفحص النص عبر النوافذ المنزلقة 60/20 وحساب التوكنز والنوافذ"""
+    if not text or len(text.strip()) < 2:
+        return False, 0.0, 0, 0, []
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
+    total_tokens = len(input_ids)
     window_size = 60
     overlap = 20
     windows = []
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
+    total_windows = len(windows)
+    is_unsafe = False
+    highest_prob = 0.0
+    triggered_phrases = []
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
+        inputs = tokenizer(window_text, return_tensors="pt", truncation=True, padding="max_length", max_length=60)
         with torch.no_grad():
             outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
+        unsafe_p = float(probs[1]) # قراءة معامل الخطر بدقة من الفئة رقم 1
         if unsafe_p > 0.50:
+            is_unsafe = True
+            highest_prob = max(highest_prob, unsafe_p)
+            if window_text not in triggered_phrases:
+                triggered_phrases.append(window_text)
+    return is_unsafe, highest_prob, total_tokens, total_windows, triggered_phrases
+def predict_adaptive_shield(post_text, comments_json_string):
+    """
+    البوابة الرئيسية الشاملة لاستقبال المنشور والتعليقات/الإعلانات وحساب توازن الـ 75% والإحصائيات
+    """
+    print(f"[Incoming Evaluation Request] Post length: {len(str(post_text))}")
+    # 1. فحص وتقييم المنشور الرئيسي
+    post_unsafe, post_prob, post_tokens, post_windows, post_triggered = evaluate_single_text(post_text)
+    # علم وضوح المنشور: إذا كان سيئاً أو أمانه متأرجحاً يُعتبر غامضاً
+    is_post_vague_or_unsafe = post_unsafe or (post_prob > 0.35)
+    # 2. فك وفحص التعليقات أو الإعلانات الممررة كـ JSON أو الأسطر المكتوبة يدوياً
+    comments_list = []
+    try:
+        comments_list = json.loads(comments_json_string)
+    except:
+        # لتسهيل التجربة اليدوية في واجهة غرايديو إذا كتبها المستخدم كأسطر يدوية
+        comments_list = [c.strip() for c in comments_json_string.split("\n") if c.strip()]
+    total_comments = len(comments_list)
+    unsafe_comments_count = 0
+    blurred_elements = []
+    all_triggered_phrases = list(post_triggered)
+    total_tokens_evaluated = post_tokens
+    total_windows_processed = post_windows
+    for index, element_text in enumerate(comments_list):
+        c_unsafe, c_prob, c_tokens, c_windows, c_triggered = evaluate_single_text(element_text)
+        total_tokens_evaluated += c_tokens
+        total_windows_processed += c_windows
+        all_triggered_phrases.extend(c_triggered)
+        if c_unsafe:
+            unsafe_comments_count += 1
+            # الاحتفاظ بموقع التعليق أو الإعلان السيئ لتمويهه محلياً بالـ CSS في كروم
+            blurred_elements.append({
+                "element_index": index,
+                "text": element_text,
+                "confidence": f"{c_prob * 100:.2f}%"
+            })
+    # 3. رياضيات أسماء لحساب نسبة التلوث اللغوي في البيئة المحيطة (75%)
+    unsafe_percentage = (unsafe_comments_count / total_comments * 100) if total_comments > 0 else 0.0
+    # 4. مصفوفة اتخاذ القرار التكيفية الصارمة (Decision Matrix)
+    full_page_block = False
+    verdict = "SAFE"
+    if is_post_vague_or_unsafe and unsafe_percentage >= 75.0:
+        full_page_block = True
+        verdict = "CRITICAL_UNSAFE_PAGE_BLOCKED"
+    elif unsafe_percentage > 0:
+        verdict = "PARTIAL_UNSAFE_ELEMENTS_BLURRED"
+    # تقرير المراقبة السحابي الشامل المطبوع في الـ Logs
+    print("\n========= CHILDSHIELD COMPREHENSIVE ADAPTIVE REPORT =========")
+    print(f"📝 Main Post Vague/Unsafe Flag : {is_post_vague_or_unsafe}")
+    print(f"🔑 Total Page Tokens Count      : {total_tokens_evaluated}")
+    print(f"🪟 Total Sliding Windows Run    : {total_windows_processed}")
+    print(f"💬 Unsafe Elements Density      : {unsafe_percentage:.2f}% ({unsafe_comments_count}/{total_comments})")
+    print(f"🛡️ Final Cloud Security Verdict : {verdict}")
+    print(f"🎬 Block Entire Layout Action  : {full_page_block}")
+    print("=============================================================\n")
+    return {
+        "verdict": verdict,
+        "block_entire_page": full_page_block,
+        "unsafe_elements_percentage": f"{unsafe_percentage:.2f}%",
+        "total_evaluated_tokens": total_tokens_evaluated,
+        "total_processed_windows": total_windows_processed,
+        "triggered_phrases": list(set(all_triggered_phrases)), # قائمة الجمل الفرعية المسببة للحظر
+        "elements_to_blur": blurred_elements                   # قائمة العناصر المطلوب تمويهها بالـ Blur
+    }
+# بناء واجهة العرض التفاعلية الشاملة والمطابقة للمناقشة العلمية
 interface = gr.Interface(
+    fn=predict_adaptive_shield,
+    inputs=[
+        gr.Textbox(lines=2, label="Main Post Content (نص المنشور الأساسي)"),
+        gr.Textbox(lines=4, label="Comments / Ads (التعليقات أو الإعلانات - اكتب كل نص في سطر مستقل للتجربة يدوياً)")
+    ],
+    outputs=gr.JSON(label="Adaptive Guard Response Object"),
+    title="ChildShield Adaptive Production API Gate 🛡️"
 )
 if __name__ == "__main__":