Spaces:
Sleeping
Sleeping
File size: 2,835 Bytes
a4c47a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# attachment_analyzer.py
import hashlib
import re
from typing import List, Dict
# Dangerous extensions frequently abused
DANGEROUS_EXTENSIONS = {
"exe", "js", "vbs", "scr", "bat", "cmd", "ps1", "hta", "jar" "svg"
}
ARCHIVE_EXTENSIONS = {"zip", "rar", "7z", "iso", "img"}
DOCUMENT_EXTENSIONS = {"doc", "docx", "xls", "xlsx", "ppt", "pptx", "pdf", "html", "htm"}
DOUBLE_EXT_REGEX = re.compile(r"\.(pdf|docx|xlsx|pptx|jpg|png)\.(html|exe|js|zip)$", re.IGNORECASE)
def sha256_hash(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
def analyze_attachments(attachments: List[Dict]):
"""
attachments: list of dicts
[
{
"filename": str,
"content_type": str,
"size": int,
"data": bytes
}
]
"""
findings = []
score = 0
hashes = []
if not attachments:
return ["No attachments detected."], 0, []
for att in attachments:
name = (att.get("filename") or "").lower()
ctype = att.get("content_type", "")
size = att.get("size", 0)
data = att.get("data", b"")
ext = name.split(".")[-1] if "." in name else ""
# Hashing
if data:
hashes.append(sha256_hash(data))
# 1️⃣ Dangerous file extensions
if ext in DANGEROUS_EXTENSIONS:
findings.append(f"Attachment: Dangerous executable attachment detected ({name})")
score += 50
# 2️⃣ Double extension (HTML smuggling / masquerading)
if DOUBLE_EXT_REGEX.search(name):
findings.append(f"Attachment: Double extension detected (possible HTML smuggling): {name}")
score += 40
# 3️⃣ HTML attachment
if ext in {"html", "htm"}:
findings.append(f"Attachment: HTML attachment detected ({name}) — commonly used in phishing")
score += 35
# 4️⃣ Archive / container abuse
if ext in ARCHIVE_EXTENSIONS:
findings.append(f"Attachment: Compressed or disk image attachment detected ({name})")
score += 25
# 5️⃣ Office documents (macro risk)
if ext in {"docm", "xlsm", "pptm"}:
findings.append(f"Attachment: Macro-enabled Office document detected ({name})")
score += 45
# 6️⃣ Suspicious size (tiny payload delivery)
if size and size < 10_000:
findings.append(f"Attachment: Very small attachment size ({size} bytes) — possible loader")
score += 15
# 7️⃣ Content-type mismatch
if ctype and ext and ext not in ctype:
findings.append(
f"Attachment: Content-Type mismatch ({name} reported as {ctype})"
)
score += 20
score = min(score, 100)
return findings, score, hashes
|