Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

princemaxp commited on Jan 8

Commit

b4bbffc

verified ·

1 Parent(s): 748e27f

Update analyze_email_main.py

Browse files

Files changed (1) hide show

analyze_email_main.py +62 -71

analyze_email_main.py CHANGED Viewed

@@ -3,49 +3,39 @@ from parse_email import parse_email
 from header_analyzer import analyze_headers
 from body_analyzer import analyze_body
 from url_analyzer import analyze_urls
-import re
-def parse_auth_results(auth_results: str):
-    """Extract SPF, DKIM, and DMARC values from Authentication-Results header."""
-    results = {"spf": "unknown", "dkim": "unknown", "dmarc": "unknown"}
-    if not auth_results:
-        return results
-    auth_results = auth_results.lower()
-    for key in results.keys():
-        m = re.search(rf"{key}=([\w-]+)", auth_results)
-        if m:
-            results[key] = m.group(1)
-    return results
 def analyze(file_path):
-    # parse
-    headers, subject, body, urls, images = parse_email(file_path)
-    # header analysis
-    header_findings, header_score, auth_summary = analyze_headers(headers or {})
-    # url analysis (keeps previous checks like Safe Browsing / URLHaus)
-    url_findings, url_score = analyze_urls(urls or [])
-    # body analysis (subject, body, urls, images)
-    body_findings, body_score, highlighted_body, body_verdict = analyze_body(subject, body, urls or [], images or [])
-    # combine scores
     total_score = 0
-    total_score += (header_score or 0)
-    total_score += (body_score or 0) * 1.0  # weight body normally
-    total_score += (url_score or 0) * 1.2   # URLs a bit heavier
-    # clamp
-    try:
-        total_score = float(total_score)
-    except Exception:
-        total_score = 0.0
-    total_score = max(0.0, min(total_score, 100.0))
-    total_score_rounded = round(total_score)
-    # final verdict
     if total_score >= 70:
         verdict = "🚨 Malicious"
     elif 50 <= total_score < 70:
@@ -55,57 +45,58 @@ def analyze(file_path):
     else:
         verdict = "✅ Safe"
-    # attack type heuristics
     attack_type = "General Phishing"
-    combined_text_lower = ((subject or "") + "\n" + (body or "")).lower()
-    if any(k in combined_text_lower for k in ["invoice", "payment", "wire transfer", "bank details"]):
-        attack_type = "Invoice/Payment Fraud (BEC)"
-    elif any(k in combined_text_lower for k in ["password", "verify", "account", "login", "credentials"]):
-        attack_type = "Credential Harvesting (Phishing)"
-    elif any("reply-to domain mismatch" in f.lower() for f in header_findings):
         attack_type = "Business Email Compromise (BEC)"
-    elif any("spam" in f.lower() for f in body_findings + url_findings):
         attack_type = "Spam / Marketing"
     elif verdict == "✅ Safe":
-        attack_type = "Benign / Normal Email"
-    # tags
-    tags = []
-    for finding in (header_findings + body_findings + url_findings):
-        fl = finding.lower()
-        if "domain" in fl:
-            tags.append("Suspicious Sender Domain")
-        if "phishing" in fl or "malicious url" in fl or "urlhaus" in fl:
-            tags.append("Phishing / Malicious URL")
-        if "urgent" in fl or "suspicious phrase" in fl:
-            tags.append("Urgent Language")
-        if "spam" in fl or "marketing" in fl:
-            tags.append("Spam Tone")
         if "spf" in fl or "dkim" in fl or "dmarc" in fl:
-            tags.append("Auth Failures (SPF/DKIM/DMARC)")
-        if "ocr" in fl or "extracted text" in fl:
-            tags.append("Image-based content detected")
     summary = {
         "Final Verdict": verdict,
         "Attack Type": attack_type,
-        "Attack Score": total_score_rounded,
-        "Main Tags": ", ".join(sorted(set(tags))) if tags else "No special tags",
     }
     details = {
-        "Header Findings": header_findings or [],
-        "Body Findings": body_findings or [],
-        "URL Findings": url_findings or [],
-        "Highlighted Body": highlighted_body or "",
-        "Auth Results": auth_summary or {},   # <-- NEW: show SPF, DKIM, DMARC results
     }
     return summary, details
 if __name__ == "__main__":
-    fp = "sample.eml"
-    s, d = analyze(fp)
-    print("SUMMARY:", s)
-    print("DETAILS:", d)

 from header_analyzer import analyze_headers
 from body_analyzer import analyze_body
 from url_analyzer import analyze_urls
+from attachment_analyzer import analyze_attachments
 def analyze(file_path):
+    # ---------- PARSE ----------
+    headers, subject, body, urls, images, attachments = parse_email(file_path)
+    # ---------- HEADER ----------
+    header_findings, header_score, auth_summary = analyze_headers(headers, body)
+    # ---------- BODY ----------
+    body_findings, body_score, highlighted_body, body_verdict = analyze_body(
+        subject, body, urls, images
+    )
+    # ---------- URL ----------
+    url_findings, url_score = analyze_urls(urls)
+    # ---------- ATTACHMENTS ----------
+    attachment_findings, attachment_score, attachment_hashes = analyze_attachments(
+        attachments
+    )
+    # ---------- SCORE ENGINE (Phase 4.1 Simple Version) ----------
     total_score = 0
+    total_score += header_score * 1.0
+    total_score += body_score * 1.0
+    total_score += url_score * 1.2
+    total_score += attachment_score * 1.3
+    total_score = max(0, min(int(total_score), 100))
+    # ---------- FINAL VERDICT ----------
     if total_score >= 70:
         verdict = "🚨 Malicious"
     elif 50 <= total_score < 70:
     else:
         verdict = "✅ Safe"
+    # ---------- ATTACK TYPE ----------
     attack_type = "General Phishing"
+    if attachment_score >= 40:
+        attack_type = "Malware / Malicious Attachment"
+    elif "reply-to domain mismatch" in " ".join(header_findings).lower():
         attack_type = "Business Email Compromise (BEC)"
+    elif url_score >= 30:
+        attack_type = "Credential Harvesting / Phishing"
+    elif verdict == "📩 Spam":
         attack_type = "Spam / Marketing"
     elif verdict == "✅ Safe":
+        attack_type = "Benign"
+    # ---------- TAGGING ----------
+    tags = set()
+    for f in header_findings + body_findings + url_findings + attachment_findings:
+        fl = f.lower()
+        if "attachment" in fl:
+            tags.add("Malicious Attachment")
+        if "url" in fl:
+            tags.add("Suspicious URL")
         if "spf" in fl or "dkim" in fl or "dmarc" in fl:
+            tags.add("Auth Failure")
+        if "reply-to" in fl:
+            tags.add("BEC Indicator")
+        if "urgent" in fl:
+            tags.add("Urgency")
+    # ---------- OUTPUT ----------
     summary = {
         "Final Verdict": verdict,
         "Attack Type": attack_type,
+        "Attack Score": total_score,
+        "Main Tags": ", ".join(sorted(tags)) if tags else "No special tags",
     }
     details = {
+        "Header Findings": header_findings,
+        "Body Findings": body_findings,
+        "URL Findings": url_findings,
+        "Attachment Findings": attachment_findings,
+        "Attachment Hashes": attachment_hashes,
+        "Highlighted Body": highlighted_body,
+        "Auth Results": auth_summary,
     }
     return summary, details
 if __name__ == "__main__":
+    s, d = analyze("sample.eml")
+    print(s)
+    print(d)