Spaces:
Sleeping
Sleeping
| import re | |
| import unicodedata | |
| DANGEROUS_RANGES = [ | |
| (0x200B, 0x200F), | |
| (0x202A, 0x202E), | |
| (0x1F300, 0x1F9FF), | |
| (0xFE00, 0xFE0F), | |
| (0xFFF9, 0xFFFB) | |
| ] | |
| def is_dangerous_codepoint(cp): | |
| return any(start <= cp <= end for start, end in DANGEROUS_RANGES) | |
| def detect_unicode_threat(text): | |
| threat_score = 0 | |
| confusables = [] | |
| normalized = unicodedata.normalize('NFKD', text) | |
| for c in text: | |
| cp = ord(c) | |
| if is_dangerous_codepoint(cp): | |
| threat_score += 1 | |
| try: | |
| name = unicodedata.name(c) | |
| if "ZERO WIDTH" in name or "BIDI" in name or "VARIATION SELECTOR" in name: | |
| threat_score += 1 | |
| except ValueError: | |
| continue | |
| threat_level = "low" | |
| if threat_score >= 5: | |
| threat_level = "high" | |
| elif threat_score >= 2: | |
| threat_level = "moderate" | |
| return { | |
| "input": text, | |
| "threat_level": threat_level, | |
| "unicode_score": round(threat_score / max(len(text), 1), 2), | |
| "suggested_action": "quarantine" if threat_level == "high" else "monitor", | |
| "normalized": normalized | |
| } | |