File size: 1,175 Bytes
6d6b8af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

import re
import unicodedata

DANGEROUS_RANGES = [
    (0x200B, 0x200F),
    (0x202A, 0x202E),
    (0x1F300, 0x1F9FF),
    (0xFE00, 0xFE0F),
    (0xFFF9, 0xFFFB)
]

def is_dangerous_codepoint(cp):
    return any(start <= cp <= end for start, end in DANGEROUS_RANGES)

def detect_unicode_threat(text):
    threat_score = 0
    confusables = []
    normalized = unicodedata.normalize('NFKD', text)
    for c in text:
        cp = ord(c)
        if is_dangerous_codepoint(cp):
            threat_score += 1
        try:
            name = unicodedata.name(c)
            if "ZERO WIDTH" in name or "BIDI" in name or "VARIATION SELECTOR" in name:
                threat_score += 1
        except ValueError:
            continue
    threat_level = "low"
    if threat_score >= 5:
        threat_level = "high"
    elif threat_score >= 2:
        threat_level = "moderate"
    return {
        "input": text,
        "threat_level": threat_level,
        "unicode_score": round(threat_score / max(len(text), 1), 2),
        "suggested_action": "quarantine" if threat_level == "high" else "monitor",
        "normalized": normalized
    }