Spaces:

kkAsmaa
/

ChildShield-Interface

Running

App Files Files Community

kkAsmaa commited on 7 days ago

Commit

1063491

verified ·

1 Parent(s): ce94a9b

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -19

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ MODEL_REPO = "kkAsmaa/ChildShield"
 MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
 SUB_FOLDER = "ChildShield"
 HF_TOKEN = os.getenv("HF_TOKEN")
-print("🔄 Loading model weights from the secured ChildShield subfolder...")
 tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
@@ -26,21 +26,19 @@ def clean_obfuscation(text):
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
     return final_text
 def predict_safety_api(text):
-    """
-  Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
-    """
-    print(f"[Incoming text to evaluate]: {text}")
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
     window_size = 60
     overlap = 20
     windows = []
@@ -54,46 +52,65 @@ def predict_safety_api(text):
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
     is_blocked = False
     highest_unsafe_prob = 0.0
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
-        inputs = tokenizer(
-            window_text,
-            return_tensors="pt",
-            truncation=True,
-            padding="max_length",
-            max_length=60
-        )
         with torch.no_grad():
             outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
         unsafe_p = float(probs[1])
         if unsafe_p > 0.50:
             is_blocked = True
             highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
     if is_blocked:
-        return {"verdict": "UNSAFE", "block": True, "confidence": f"{highest_unsafe_prob * 100:.2f}%"}
     safe_p = 1.0 - highest_unsafe_prob
-    return {"verdict": "SAFE", "block": False, "confidence": f"{safe_p * 100:.2f}%"}
 interface = gr.Interface(
     fn=predict_safety_api,
     inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),
     outputs=gr.JSON(label="Guard Response Object"),
-    title="ChildShield Production API Gate (Arabic Version)🛡️"
 )
 if __name__ == "__main__":
     interface.launch()

 MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
 SUB_FOLDER = "ChildShield"
 HF_TOKEN = os.getenv("HF_TOKEN")
+print("🔄 Loading ChildShield Explainable AI Core...")
 tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
     return final_text
 def predict_safety_api(text):
+    """بوابة الفحص الأساسية الشاملة مع سجلات الرصد الحية"""
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
+    total_tokens = len(input_ids)
     window_size = 60
     overlap = 20
     windows = []
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
+    total_windows = len(windows)
     is_blocked = False
     highest_unsafe_prob = 0.0
+    triggered_sentences = []
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
+        inputs = tokenizer(window_text, return_tensors="pt", truncation=True, padding="max_length", max_length=60)
         with torch.no_grad():
             outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
         unsafe_p = float(probs[1])
         if unsafe_p > 0.50:
             is_blocked = True
             highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
+            if window_text not in triggered_sentences:
+                triggered_sentences.append(window_text)
+    # 🎯 طباعة التقرير الشامل فوراً داخل شاشة الـ Logs السوداء ليظهر أمام الدكاترة حياً عند اتصال الامتداد
+    print("\n📊 --- ChildShield Core Inspection Report ---")
+    print(f"📥 Received Text Preview: {text[:60]}...")
+    print(f"🔑 Total Tokens Evaluated: {total_tokens}")
+    print(f"🪟 Total Windows Processed: {total_windows}")
+    print(f"🚨 Verdict: {'UNSAFE (BLOCK)' if is_blocked else 'SAFE (PASS)'}")
+    print(f"🛑 Triggered Phrases Captured: {triggered_sentences}")
+    print("---------------------------------------------\n")
     if is_blocked:
+        return {
+            "verdict": "UNSAFE",
+            "block": True,
+            "confidence": f"{highest_unsafe_prob * 100:.2f}%",
+            "evaluated_tokens": total_tokens,
+            "processed_windows": total_windows,
+            "triggered_phrases": triggered_sentences
+        }
     safe_p = 1.0 - highest_unsafe_prob
+    return {
+        "verdict": "SAFE",
+        "block": False,
+        "confidence": f"{safe_p * 100:.2f}%",
+        "evaluated_tokens": total_tokens,
+        "processed_windows": total_windows,
+        "triggered_phrases": []
+    }
 interface = gr.Interface(
     fn=predict_safety_api,
     inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),
     outputs=gr.JSON(label="Guard Response Object"),
+    title="ChildShield Production API Gate 🛡️"
 )
 if __name__ == "__main__":
     interface.launch()