CySecGuardians / attachment_analyzer.py
princemaxp's picture
Create attachment_analyzer.py
a4c47a0 verified
# attachment_analyzer.py
import hashlib
import re
from typing import List, Dict
# Dangerous extensions frequently abused
DANGEROUS_EXTENSIONS = {
"exe", "js", "vbs", "scr", "bat", "cmd", "ps1", "hta", "jar" "svg"
}
ARCHIVE_EXTENSIONS = {"zip", "rar", "7z", "iso", "img"}
DOCUMENT_EXTENSIONS = {"doc", "docx", "xls", "xlsx", "ppt", "pptx", "pdf", "html", "htm"}
DOUBLE_EXT_REGEX = re.compile(r"\.(pdf|docx|xlsx|pptx|jpg|png)\.(html|exe|js|zip)$", re.IGNORECASE)
def sha256_hash(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
def analyze_attachments(attachments: List[Dict]):
"""
attachments: list of dicts
[
{
"filename": str,
"content_type": str,
"size": int,
"data": bytes
}
]
"""
findings = []
score = 0
hashes = []
if not attachments:
return ["No attachments detected."], 0, []
for att in attachments:
name = (att.get("filename") or "").lower()
ctype = att.get("content_type", "")
size = att.get("size", 0)
data = att.get("data", b"")
ext = name.split(".")[-1] if "." in name else ""
# Hashing
if data:
hashes.append(sha256_hash(data))
# 1️⃣ Dangerous file extensions
if ext in DANGEROUS_EXTENSIONS:
findings.append(f"Attachment: Dangerous executable attachment detected ({name})")
score += 50
# 2️⃣ Double extension (HTML smuggling / masquerading)
if DOUBLE_EXT_REGEX.search(name):
findings.append(f"Attachment: Double extension detected (possible HTML smuggling): {name}")
score += 40
# 3️⃣ HTML attachment
if ext in {"html", "htm"}:
findings.append(f"Attachment: HTML attachment detected ({name}) — commonly used in phishing")
score += 35
# 4️⃣ Archive / container abuse
if ext in ARCHIVE_EXTENSIONS:
findings.append(f"Attachment: Compressed or disk image attachment detected ({name})")
score += 25
# 5️⃣ Office documents (macro risk)
if ext in {"docm", "xlsm", "pptm"}:
findings.append(f"Attachment: Macro-enabled Office document detected ({name})")
score += 45
# 6️⃣ Suspicious size (tiny payload delivery)
if size and size < 10_000:
findings.append(f"Attachment: Very small attachment size ({size} bytes) — possible loader")
score += 15
# 7️⃣ Content-type mismatch
if ctype and ext and ext not in ctype:
findings.append(
f"Attachment: Content-Type mismatch ({name} reported as {ctype})"
)
score += 20
score = min(score, 100)
return findings, score, hashes