File size: 2,835 Bytes
a4c47a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# attachment_analyzer.py
import hashlib
import re
from typing import List, Dict

# Dangerous extensions frequently abused
DANGEROUS_EXTENSIONS = {
    "exe", "js", "vbs", "scr", "bat", "cmd", "ps1", "hta", "jar" "svg"
}

ARCHIVE_EXTENSIONS = {"zip", "rar", "7z", "iso", "img"}
DOCUMENT_EXTENSIONS = {"doc", "docx", "xls", "xlsx", "ppt", "pptx", "pdf", "html", "htm"}

DOUBLE_EXT_REGEX = re.compile(r"\.(pdf|docx|xlsx|pptx|jpg|png)\.(html|exe|js|zip)$", re.IGNORECASE)


def sha256_hash(data: bytes) -> str:
    return hashlib.sha256(data).hexdigest()


def analyze_attachments(attachments: List[Dict]):
    """
    attachments: list of dicts
    [
        {
            "filename": str,
            "content_type": str,
            "size": int,
            "data": bytes
        }
    ]
    """
    findings = []
    score = 0
    hashes = []

    if not attachments:
        return ["No attachments detected."], 0, []

    for att in attachments:
        name = (att.get("filename") or "").lower()
        ctype = att.get("content_type", "")
        size = att.get("size", 0)
        data = att.get("data", b"")

        ext = name.split(".")[-1] if "." in name else ""

        # Hashing
        if data:
            hashes.append(sha256_hash(data))

        # 1️⃣ Dangerous file extensions
        if ext in DANGEROUS_EXTENSIONS:
            findings.append(f"Attachment: Dangerous executable attachment detected ({name})")
            score += 50

        # 2️⃣ Double extension (HTML smuggling / masquerading)
        if DOUBLE_EXT_REGEX.search(name):
            findings.append(f"Attachment: Double extension detected (possible HTML smuggling): {name}")
            score += 40

        # 3️⃣ HTML attachment
        if ext in {"html", "htm"}:
            findings.append(f"Attachment: HTML attachment detected ({name}) — commonly used in phishing")
            score += 35

        # 4️⃣ Archive / container abuse
        if ext in ARCHIVE_EXTENSIONS:
            findings.append(f"Attachment: Compressed or disk image attachment detected ({name})")
            score += 25

        # 5️⃣ Office documents (macro risk)
        if ext in {"docm", "xlsm", "pptm"}:
            findings.append(f"Attachment: Macro-enabled Office document detected ({name})")
            score += 45

        # 6️⃣ Suspicious size (tiny payload delivery)
        if size and size < 10_000:
            findings.append(f"Attachment: Very small attachment size ({size} bytes) — possible loader")
            score += 15

        # 7️⃣ Content-type mismatch
        if ctype and ext and ext not in ctype:
            findings.append(
                f"Attachment: Content-Type mismatch ({name} reported as {ctype})"
            )
            score += 20

    score = min(score, 100)

    return findings, score, hashes