Spaces:

kkAsmaa
/

ChildShield-Interface

Running

App Files Files Community

kkAsmaa commited on 9 days ago

Commit

1a9379a

verified ·

1 Parent(s): 274b868

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -17

app.py CHANGED Viewed

@@ -3,26 +3,17 @@ import re
 import os
 import torch
 from transformers import BertTokenizer, AutoModelForSequenceClassification
-# استدعاء معالج التنظيف الرسمي لـ AraBERT الخاص بكِ
 from arabert.preprocess import ArabertPreprocessor
-# معرفات المستودع والمسار الفرعي لأسماء
 MODEL_REPO = "kkAsmaa/ChildShield"
 MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
 SUB_FOLDER = "ChildShield"
-# سحب المفتاح السري تلقائياً وبأعلى درجات الأمان السيبراني
 HF_TOKEN = os.getenv("HF_TOKEN")
 print("🔄 Loading model weights from the secured ChildShield subfolder...")
-# استدعاء المترجم المستقر وتوجيه الموديل بدقة للمجلد الفرعي
 tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
 model.eval()
-# تفعيل المعالج ليتطابق مع داتا كولاب
 arabic_prep = ArabertPreprocessor(model_name=MODEL_NAME)
 def clean_obfuscation(text):
@@ -35,7 +26,6 @@ def clean_obfuscation(text):
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
@@ -43,16 +33,13 @@ def full_preprocess(text):
 def predict_safety_api(text):
     """
-    Main prediction core fully optimized with Asmaa's custom window configuration (60/20)
-    and standardized native padding execution.
     """
     cleaned_text = full_preprocess(text)
-    # 1. تقطيع النص الأولي لأرقام مجهولة الأبعاد بدون حشو مسبق
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
-    # تثبيت أبعاد النوافذ الذكية الخاصة بكِ (60/20)
     window_size = 60
     overlap = 20
     windows = []
@@ -70,7 +57,7 @@ def predict_safety_api(text):
     highest_unsafe_prob = 0.0
     for win_ids in windows:
-        # تحويل الأرقام الفرعية للنافذة الحالية إلى نص مجدداً ليمررها التوكنايزر بطريقتكِ الذكية
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
         inputs = tokenizer(
@@ -86,7 +73,6 @@ def predict_safety_api(text):
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
-        # 🎯 السطر الفائز المصلح: قراءة الخانة رقم 1 المخصصة لنسبة الخطر بدقة
         unsafe_p = float(probs[1])
         if unsafe_p > 0.50:
@@ -99,7 +85,6 @@ def predict_safety_api(text):
     safe_p = 1.0 - highest_unsafe_prob
     return {"verdict": "SAFE", "block": False, "confidence": f"{safe_p * 100:.2f}%"}
-# بناء واجهة Gradio الاحترافية للمشروع
 interface = gr.Interface(
     fn=predict_safety_api,
     inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),

 import os
 import torch
 from transformers import BertTokenizer, AutoModelForSequenceClassification
 from arabert.preprocess import ArabertPreprocessor
 MODEL_REPO = "kkAsmaa/ChildShield"
 MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
 SUB_FOLDER = "ChildShield"
 HF_TOKEN = os.getenv("HF_TOKEN")
 print("🔄 Loading model weights from the secured ChildShield subfolder...")
 tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
 model.eval()
 arabic_prep = ArabertPreprocessor(model_name=MODEL_NAME)
 def clean_obfuscation(text):
     text = re.sub(r'[^\w\s\.]', ' ', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 def full_preprocess(text):
     text_no_trickery = clean_obfuscation(text)
     final_text = arabic_prep.preprocess(text_no_trickery)
 def predict_safety_api(text):
     """
+  Arabic text classification gateway utilizing a custom sliding window configuration with 20 token overlap.
     """
     cleaned_text = full_preprocess(text)
     full_encodings = tokenizer(cleaned_text, add_special_tokens=False, return_attention_mask=False)
     input_ids = full_encodings['input_ids']
     window_size = 60
     overlap = 20
     windows = []
     highest_unsafe_prob = 0.0
     for win_ids in windows:
         window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
         inputs = tokenizer(
         probs = torch.softmax(outputs.logits, dim=-1).flatten().tolist()
         unsafe_p = float(probs[1])
         if unsafe_p > 0.50:
     safe_p = 1.0 - highest_unsafe_prob
     return {"verdict": "SAFE", "block": False, "confidence": f"{safe_p * 100:.2f}%"}
 interface = gr.Interface(
     fn=predict_safety_api,
     inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),