# attachment_analyzer.py import hashlib import re from typing import List, Dict # Dangerous extensions frequently abused DANGEROUS_EXTENSIONS = { "exe", "js", "vbs", "scr", "bat", "cmd", "ps1", "hta", "jar" "svg" } ARCHIVE_EXTENSIONS = {"zip", "rar", "7z", "iso", "img"} DOCUMENT_EXTENSIONS = {"doc", "docx", "xls", "xlsx", "ppt", "pptx", "pdf", "html", "htm"} DOUBLE_EXT_REGEX = re.compile(r"\.(pdf|docx|xlsx|pptx|jpg|png)\.(html|exe|js|zip)$", re.IGNORECASE) def sha256_hash(data: bytes) -> str: return hashlib.sha256(data).hexdigest() def analyze_attachments(attachments: List[Dict]): """ attachments: list of dicts [ { "filename": str, "content_type": str, "size": int, "data": bytes } ] """ findings = [] score = 0 hashes = [] if not attachments: return ["No attachments detected."], 0, [] for att in attachments: name = (att.get("filename") or "").lower() ctype = att.get("content_type", "") size = att.get("size", 0) data = att.get("data", b"") ext = name.split(".")[-1] if "." in name else "" # Hashing if data: hashes.append(sha256_hash(data)) # 1️⃣ Dangerous file extensions if ext in DANGEROUS_EXTENSIONS: findings.append(f"Attachment: Dangerous executable attachment detected ({name})") score += 50 # 2️⃣ Double extension (HTML smuggling / masquerading) if DOUBLE_EXT_REGEX.search(name): findings.append(f"Attachment: Double extension detected (possible HTML smuggling): {name}") score += 40 # 3️⃣ HTML attachment if ext in {"html", "htm"}: findings.append(f"Attachment: HTML attachment detected ({name}) — commonly used in phishing") score += 35 # 4️⃣ Archive / container abuse if ext in ARCHIVE_EXTENSIONS: findings.append(f"Attachment: Compressed or disk image attachment detected ({name})") score += 25 # 5️⃣ Office documents (macro risk) if ext in {"docm", "xlsm", "pptm"}: findings.append(f"Attachment: Macro-enabled Office document detected ({name})") score += 45 # 6️⃣ Suspicious size (tiny payload delivery) if size and size < 10_000: findings.append(f"Attachment: Very small attachment size ({size} bytes) — possible loader") score += 15 # 7️⃣ Content-type mismatch if ctype and ext and ext not in ctype: findings.append( f"Attachment: Content-Type mismatch ({name} reported as {ctype})" ) score += 20 score = min(score, 100) return findings, score, hashes