Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

princemaxp commited on Jan 8

Commit

a4c47a0

verified ·

1 Parent(s): 89a43f0

Create attachment_analyzer.py

Browse files

Files changed (1) hide show

attachment_analyzer.py +91 -0

attachment_analyzer.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# attachment_analyzer.py
+import hashlib
+import re
+from typing import List, Dict
+# Dangerous extensions frequently abused
+DANGEROUS_EXTENSIONS = {
+    "exe", "js", "vbs", "scr", "bat", "cmd", "ps1", "hta", "jar" "svg"
+}
+ARCHIVE_EXTENSIONS = {"zip", "rar", "7z", "iso", "img"}
+DOCUMENT_EXTENSIONS = {"doc", "docx", "xls", "xlsx", "ppt", "pptx", "pdf", "html", "htm"}
+DOUBLE_EXT_REGEX = re.compile(r"\.(pdf|docx|xlsx|pptx|jpg|png)\.(html|exe|js|zip)$", re.IGNORECASE)
+def sha256_hash(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+def analyze_attachments(attachments: List[Dict]):
+    """
+    attachments: list of dicts
+    [
+        {
+            "filename": str,
+            "content_type": str,
+            "size": int,
+            "data": bytes
+        }
+    ]
+    """
+    findings = []
+    score = 0
+    hashes = []
+    if not attachments:
+        return ["No attachments detected."], 0, []
+    for att in attachments:
+        name = (att.get("filename") or "").lower()
+        ctype = att.get("content_type", "")
+        size = att.get("size", 0)
+        data = att.get("data", b"")
+        ext = name.split(".")[-1] if "." in name else ""
+        # Hashing
+        if data:
+            hashes.append(sha256_hash(data))
+        # 1️⃣ Dangerous file extensions
+        if ext in DANGEROUS_EXTENSIONS:
+            findings.append(f"Attachment: Dangerous executable attachment detected ({name})")
+            score += 50
+        # 2️⃣ Double extension (HTML smuggling / masquerading)
+        if DOUBLE_EXT_REGEX.search(name):
+            findings.append(f"Attachment: Double extension detected (possible HTML smuggling): {name}")
+            score += 40
+        # 3️⃣ HTML attachment
+        if ext in {"html", "htm"}:
+            findings.append(f"Attachment: HTML attachment detected ({name}) — commonly used in phishing")
+            score += 35
+        # 4️⃣ Archive / container abuse
+        if ext in ARCHIVE_EXTENSIONS:
+            findings.append(f"Attachment: Compressed or disk image attachment detected ({name})")
+            score += 25
+        # 5️⃣ Office documents (macro risk)
+        if ext in {"docm", "xlsm", "pptm"}:
+            findings.append(f"Attachment: Macro-enabled Office document detected ({name})")
+            score += 45
+        # 6️⃣ Suspicious size (tiny payload delivery)
+        if size and size < 10_000:
+            findings.append(f"Attachment: Very small attachment size ({size} bytes) — possible loader")
+            score += 15
+        # 7️⃣ Content-type mismatch
+        if ctype and ext and ext not in ctype:
+            findings.append(
+                f"Attachment: Content-Type mismatch ({name} reported as {ctype})"
+            )
+            score += 20
+    score = min(score, 100)
+    return findings, score, hashes