Spaces:

kkAsmaa
/

ChildShield-Interface

Running

App Files Files Community

kkAsmaa commited on 6 days ago

Commit

e8a997e

verified ·

1 Parent(s): ac5b8f2

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -35

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ MODEL_REPO = "kkAsmaa/ChildShield"
 MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
 SUB_FOLDER = "ChildShield"
 HF_TOKEN = os.getenv("HF_TOKEN")
-print("🔄 Loading ChildShield Robust Production Engine...")
 tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
@@ -26,19 +26,21 @@ def clean_obfuscation(text):
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
     return final_text
 def predict_safety_api(text):
-    """بوابة الفحص الأساسية الشاملة والمقاومة لأخطاء التضارب اللغوي للمتصفحات"""
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
-    total_tokens = len(input_ids)
     window_size = 60
     overlap = 20
     windows = []
@@ -52,61 +54,45 @@ def predict_safety_api(text):
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
-    total_windows = len(windows)
     is_blocked = False
     highest_unsafe_prob = 0.0
-    triggered_sentences = []
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
-        inputs = tokenizer(window_text, return_tensors="pt", truncation=True, padding="max_length", max_length=60)
         with torch.no_grad():
             outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
-        unsafe_p = float(probs[1]) # قراءة معامل الخطر بدقة من الفئة رقم 1
         if unsafe_p > 0.50:
             is_blocked = True
             highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
-            if window_text not in triggered_sentences:
-                triggered_sentences.append(window_text)
-    print("\n📊 --- ChildShield Core Inspection Report ---")
-    print(f"📥 Received Text Preview: {text[:60]}...")
-    print(f"🔑 Total Tokens Evaluated: {total_tokens}")
-    print(f"🪟 Total Windows Processed: {total_windows}")
-    print(f"🚨 Verdict: {'UNSAFE' if is_blocked else 'SAFE'}")
-    print("---------------------------------------------\n")
-    # 🎯 التحديث الحاسم: إرسال حالة الحظر كنص صريح يفك عقدة الجافا سكربت فوراً
     if is_blocked:
-        return {
-            "verdict": "UNSAFE",
-            "block": "true", # نص صريح صغير
-            "confidence": f"{highest_unsafe_prob * 100:.2f}%",
-            "evaluated_tokens": total_tokens,
-            "processed_windows": total_windows,
-            "triggered_phrases": triggered_sentences
-        }
     safe_p = 1.0 - highest_unsafe_prob
-    return {
-        "verdict": "SAFE",
-        "block": "false", # نص صريح صغير
-        "confidence": f"{safe_p * 100:.2f}%",
-        "evaluated_tokens": total_tokens,
-        "processed_windows": total_windows,
-        "triggered_phrases": []
-    }
 interface = gr.Interface(
     fn=predict_safety_api,
     inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),
     outputs=gr.JSON(label="Guard Response Object"),
-    title="ChildShield Production API Gate 🛡️"
 )
 if __name__ == "__main__":
     interface.launch()

 MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
 SUB_FOLDER = "ChildShield"
 HF_TOKEN = os.getenv("HF_TOKEN")
+print("🔄 Loading model weights from the secured ChildShield subfolder...")
 tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
     return final_text
 def predict_safety_api(text):
+    """
+  Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
+    """
+    print(f"[Incoming text to evaluate]: {text}")
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
     window_size = 60
     overlap = 20
     windows = []
             if len(window) > 0: windows.append(window)
             if i + window_size >= len(input_ids): break
     is_blocked = False
     highest_unsafe_prob = 0.0
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
+        inputs = tokenizer(
+            window_text,
+            return_tensors="pt",
+            truncation=True,
+            padding="max_length",
+            max_length=60
+        )
         with torch.no_grad():
             outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
+        unsafe_p = float(probs[1])
         if unsafe_p > 0.50:
             is_blocked = True
             highest_unsafe_prob = max(highest_unsafe_prob, unsafe_p)
     if is_blocked:
+        return {"verdict": "UNSAFE", "block": True, "confidence": f"{highest_unsafe_prob * 100:.2f}%"}
     safe_p = 1.0 - highest_unsafe_prob
+    return {"verdict": "SAFE", "block": False, "confidence": f"{safe_p * 100:.2f}%"}
 interface = gr.Interface(
     fn=predict_safety_api,
     inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),
     outputs=gr.JSON(label="Guard Response Object"),
+    title="ChildShield Production API Gate (Arabic Version)🛡️"
 )
 if __name__ == "__main__":
     interface.launch()