Codette3.0 / src /security
Raiff1982's picture
Upload 117 files
6d6b8af verified
import re
import unicodedata
DANGEROUS_RANGES = [
(0x200B, 0x200F),
(0x202A, 0x202E),
(0x1F300, 0x1F9FF),
(0xFE00, 0xFE0F),
(0xFFF9, 0xFFFB)
]
def is_dangerous_codepoint(cp):
return any(start <= cp <= end for start, end in DANGEROUS_RANGES)
def detect_unicode_threat(text):
threat_score = 0
confusables = []
normalized = unicodedata.normalize('NFKD', text)
for c in text:
cp = ord(c)
if is_dangerous_codepoint(cp):
threat_score += 1
try:
name = unicodedata.name(c)
if "ZERO WIDTH" in name or "BIDI" in name or "VARIATION SELECTOR" in name:
threat_score += 1
except ValueError:
continue
threat_level = "low"
if threat_score >= 5:
threat_level = "high"
elif threat_score >= 2:
threat_level = "moderate"
return {
"input": text,
"threat_level": threat_level,
"unicode_score": round(threat_score / max(len(text), 1), 2),
"suggested_action": "quarantine" if threat_level == "high" else "monitor",
"normalized": normalized
}