Spaces:
Sleeping
Sleeping
File size: 1,175 Bytes
6d6b8af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import re
import unicodedata
DANGEROUS_RANGES = [
(0x200B, 0x200F),
(0x202A, 0x202E),
(0x1F300, 0x1F9FF),
(0xFE00, 0xFE0F),
(0xFFF9, 0xFFFB)
]
def is_dangerous_codepoint(cp):
return any(start <= cp <= end for start, end in DANGEROUS_RANGES)
def detect_unicode_threat(text):
threat_score = 0
confusables = []
normalized = unicodedata.normalize('NFKD', text)
for c in text:
cp = ord(c)
if is_dangerous_codepoint(cp):
threat_score += 1
try:
name = unicodedata.name(c)
if "ZERO WIDTH" in name or "BIDI" in name or "VARIATION SELECTOR" in name:
threat_score += 1
except ValueError:
continue
threat_level = "low"
if threat_score >= 5:
threat_level = "high"
elif threat_score >= 2:
threat_level = "moderate"
return {
"input": text,
"threat_level": threat_level,
"unicode_score": round(threat_score / max(len(text), 1), 2),
"suggested_action": "quarantine" if threat_level == "high" else "monitor",
"normalized": normalized
}
|