Spaces:
Sleeping
Sleeping
| # attachment_analyzer.py | |
| import hashlib | |
| import re | |
| from typing import List, Dict | |
| # Dangerous extensions frequently abused | |
| DANGEROUS_EXTENSIONS = { | |
| "exe", "js", "vbs", "scr", "bat", "cmd", "ps1", "hta", "jar" "svg" | |
| } | |
| ARCHIVE_EXTENSIONS = {"zip", "rar", "7z", "iso", "img"} | |
| DOCUMENT_EXTENSIONS = {"doc", "docx", "xls", "xlsx", "ppt", "pptx", "pdf", "html", "htm"} | |
| DOUBLE_EXT_REGEX = re.compile(r"\.(pdf|docx|xlsx|pptx|jpg|png)\.(html|exe|js|zip)$", re.IGNORECASE) | |
| def sha256_hash(data: bytes) -> str: | |
| return hashlib.sha256(data).hexdigest() | |
| def analyze_attachments(attachments: List[Dict]): | |
| """ | |
| attachments: list of dicts | |
| [ | |
| { | |
| "filename": str, | |
| "content_type": str, | |
| "size": int, | |
| "data": bytes | |
| } | |
| ] | |
| """ | |
| findings = [] | |
| score = 0 | |
| hashes = [] | |
| if not attachments: | |
| return ["No attachments detected."], 0, [] | |
| for att in attachments: | |
| name = (att.get("filename") or "").lower() | |
| ctype = att.get("content_type", "") | |
| size = att.get("size", 0) | |
| data = att.get("data", b"") | |
| ext = name.split(".")[-1] if "." in name else "" | |
| # Hashing | |
| if data: | |
| hashes.append(sha256_hash(data)) | |
| # 1️⃣ Dangerous file extensions | |
| if ext in DANGEROUS_EXTENSIONS: | |
| findings.append(f"Attachment: Dangerous executable attachment detected ({name})") | |
| score += 50 | |
| # 2️⃣ Double extension (HTML smuggling / masquerading) | |
| if DOUBLE_EXT_REGEX.search(name): | |
| findings.append(f"Attachment: Double extension detected (possible HTML smuggling): {name}") | |
| score += 40 | |
| # 3️⃣ HTML attachment | |
| if ext in {"html", "htm"}: | |
| findings.append(f"Attachment: HTML attachment detected ({name}) — commonly used in phishing") | |
| score += 35 | |
| # 4️⃣ Archive / container abuse | |
| if ext in ARCHIVE_EXTENSIONS: | |
| findings.append(f"Attachment: Compressed or disk image attachment detected ({name})") | |
| score += 25 | |
| # 5️⃣ Office documents (macro risk) | |
| if ext in {"docm", "xlsm", "pptm"}: | |
| findings.append(f"Attachment: Macro-enabled Office document detected ({name})") | |
| score += 45 | |
| # 6️⃣ Suspicious size (tiny payload delivery) | |
| if size and size < 10_000: | |
| findings.append(f"Attachment: Very small attachment size ({size} bytes) — possible loader") | |
| score += 15 | |
| # 7️⃣ Content-type mismatch | |
| if ctype and ext and ext not in ctype: | |
| findings.append( | |
| f"Attachment: Content-Type mismatch ({name} reported as {ctype})" | |
| ) | |
| score += 20 | |
| score = min(score, 100) | |
| return findings, score, hashes | |