Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,13 +7,14 @@ from fastapi import FastAPI
|
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from transformers import BertTokenizer, AutoModelForSequenceClassification
|
| 9 |
from arabert.preprocess import ArabertPreprocessor
|
|
|
|
| 10 |
|
| 11 |
MODEL_REPO = "kkAsmaa/ChildShield"
|
| 12 |
MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
|
| 13 |
SUB_FOLDER = "ChildShield"
|
| 14 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 15 |
|
| 16 |
-
print("π Loading ChildShield Model Weights...")
|
| 17 |
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
|
| 18 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
|
| 19 |
model.eval()
|
|
@@ -70,6 +71,7 @@ def predict_safety_api(text):
|
|
| 70 |
highest_safe_prob = 0.0
|
| 71 |
windows_analysis = []
|
| 72 |
triggered_windows = []
|
|
|
|
| 73 |
|
| 74 |
for idx, win_ids in enumerate(windows):
|
| 75 |
window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
|
|
@@ -96,31 +98,44 @@ def predict_safety_api(text):
|
|
| 96 |
"prediction": prediction
|
| 97 |
})
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
if unsafe_prob > 0.50:
|
| 100 |
is_blocked = True
|
| 101 |
highest_unsafe_prob = max(highest_unsafe_prob, unsafe_prob)
|
| 102 |
triggered_windows.append(idx + 1)
|
| 103 |
-
|
| 104 |
else:
|
| 105 |
highest_safe_prob = max(highest_safe_prob, safe_prob)
|
| 106 |
|
| 107 |
final_prediction = "UNSAFE" if is_blocked else "SAFE"
|
| 108 |
winning_probability = highest_unsafe_prob if is_blocked else highest_safe_prob
|
| 109 |
-
formatted_confidence= f"{winning_probability *100:.2f}%"
|
| 110 |
|
| 111 |
unsafe_confidence_score = round(highest_unsafe_prob, 4)
|
| 112 |
safe_confidence_score = round(1.0 - highest_unsafe_prob, 4)
|
| 113 |
final_confidence = unsafe_confidence_score if is_blocked else safe_confidence_score
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
print(f"\n
|
| 118 |
-
print(f"
|
| 119 |
-
print(f"
|
| 120 |
-
print(f"
|
| 121 |
-
print(f"
|
| 122 |
-
print(f"
|
| 123 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
try:
|
| 126 |
log_file_path = "production_logs.txt"
|
|
@@ -160,3 +175,4 @@ app = gr.mount_gradio_app(app, gradio_interface, path="/")
|
|
| 160 |
|
| 161 |
if __name__ == "__main__":
|
| 162 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from transformers import BertTokenizer, AutoModelForSequenceClassification
|
| 9 |
from arabert.preprocess import ArabertPreprocessor
|
| 10 |
+
from tabulate import tabulate
|
| 11 |
|
| 12 |
MODEL_REPO = "kkAsmaa/ChildShield"
|
| 13 |
MODEL_NAME = "aubmindlab/bert-base-arabertv02-twitter"
|
| 14 |
SUB_FOLDER = "ChildShield"
|
| 15 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 16 |
|
| 17 |
+
print("π Loading ChildShield Model Weights with Server-Side Logs Dashboard...")
|
| 18 |
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
|
| 19 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO, token=HF_TOKEN, subfolder=SUB_FOLDER)
|
| 20 |
model.eval()
|
|
|
|
| 71 |
highest_safe_prob = 0.0
|
| 72 |
windows_analysis = []
|
| 73 |
triggered_windows = []
|
| 74 |
+
windows_table_data = []
|
| 75 |
|
| 76 |
for idx, win_ids in enumerate(windows):
|
| 77 |
window_text = tokenizer.decode(win_ids, skip_special_tokens=True)
|
|
|
|
| 98 |
"prediction": prediction
|
| 99 |
})
|
| 100 |
|
| 101 |
+
|
| 102 |
+
windows_table_data.append([
|
| 103 |
+
f"Win {idx + 1}",
|
| 104 |
+
window_text[:45] + "..." if len(window_text) > 45 else window_text,
|
| 105 |
+
f"{safe_prob * 100:.2f}%",
|
| 106 |
+
f"{unsafe_prob * 100:.2f}%",
|
| 107 |
+
f"β {prediction}" if prediction == "UNSAFE" else f"π {prediction}"
|
| 108 |
+
])
|
| 109 |
+
|
| 110 |
if unsafe_prob > 0.50:
|
| 111 |
is_blocked = True
|
| 112 |
highest_unsafe_prob = max(highest_unsafe_prob, unsafe_prob)
|
| 113 |
triggered_windows.append(idx + 1)
|
|
|
|
| 114 |
else:
|
| 115 |
highest_safe_prob = max(highest_safe_prob, safe_prob)
|
| 116 |
|
| 117 |
final_prediction = "UNSAFE" if is_blocked else "SAFE"
|
| 118 |
winning_probability = highest_unsafe_prob if is_blocked else highest_safe_prob
|
| 119 |
+
formatted_confidence = f"{winning_probability * 100:.2f}%"
|
| 120 |
|
| 121 |
unsafe_confidence_score = round(highest_unsafe_prob, 4)
|
| 122 |
safe_confidence_score = round(1.0 - highest_unsafe_prob, 4)
|
| 123 |
final_confidence = unsafe_confidence_score if is_blocked else safe_confidence_score
|
| 124 |
|
| 125 |
+
|
| 126 |
+
alert_banner = "π¨ [BLOCK] CHILDSHIELD AI INFERENCE REPORT" if is_blocked else "π [PASS] CHILDSHIELD AI INFERENCE REPORT"
|
| 127 |
+
print(f"\n================ {alert_banner} ================")
|
| 128 |
+
print(f"π₯ Received Original Text:\n\"{text.strip()}\"")
|
| 129 |
+
print(f"\nπ§Ή Preprocessed Cleaned Text:\n\"{cleaned_text}\"")
|
| 130 |
+
print(f"\nπ Total Page Tokens Count : {total_tokens_count}")
|
| 131 |
+
print(f"πͺ Total Sliding Windows Run : {total_windows_count} Windows (Size: 60, Overlap: 20)")
|
| 132 |
+
print(f"π― Final Security Verdict : {final_prediction}")
|
| 133 |
+
print(f"π Model Decision Confidence : {formatted_confidence}")
|
| 134 |
+
print(f"π Triggered Windows ID : {triggered_windows}")
|
| 135 |
+
print("\nπ --- Windows Detailed Semantic Analysis Table ---")
|
| 136 |
+
|
| 137 |
+
print(tabulate(windows_table_data, headers=["ID", "Window Text Preview", "Safe Prob", "Unsafe Prob", "Verdict"], tablefmt="grid"))
|
| 138 |
+
print("========================================================================\n")
|
| 139 |
|
| 140 |
try:
|
| 141 |
log_file_path = "production_logs.txt"
|
|
|
|
| 175 |
|
| 176 |
if __name__ == "__main__":
|
| 177 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 178 |
+
|