princemaxp commited on
Commit
b4bbffc
·
verified ·
1 Parent(s): 748e27f

Update analyze_email_main.py

Browse files
Files changed (1) hide show
  1. analyze_email_main.py +62 -71
analyze_email_main.py CHANGED
@@ -3,49 +3,39 @@ from parse_email import parse_email
3
  from header_analyzer import analyze_headers
4
  from body_analyzer import analyze_body
5
  from url_analyzer import analyze_urls
6
- import re
7
-
8
- def parse_auth_results(auth_results: str):
9
- """Extract SPF, DKIM, and DMARC values from Authentication-Results header."""
10
- results = {"spf": "unknown", "dkim": "unknown", "dmarc": "unknown"}
11
- if not auth_results:
12
- return results
13
- auth_results = auth_results.lower()
14
- for key in results.keys():
15
- m = re.search(rf"{key}=([\w-]+)", auth_results)
16
- if m:
17
- results[key] = m.group(1)
18
- return results
19
 
20
 
21
  def analyze(file_path):
22
- # parse
23
- headers, subject, body, urls, images = parse_email(file_path)
 
 
 
24
 
25
- # header analysis
26
- header_findings, header_score, auth_summary = analyze_headers(headers or {})
 
 
27
 
28
- # url analysis (keeps previous checks like Safe Browsing / URLHaus)
29
- url_findings, url_score = analyze_urls(urls or [])
30
 
31
- # body analysis (subject, body, urls, images)
32
- body_findings, body_score, highlighted_body, body_verdict = analyze_body(subject, body, urls or [], images or [])
 
 
33
 
34
- # combine scores
35
  total_score = 0
36
- total_score += (header_score or 0)
37
- total_score += (body_score or 0) * 1.0 # weight body normally
38
- total_score += (url_score or 0) * 1.2 # URLs a bit heavier
39
-
40
- # clamp
41
- try:
42
- total_score = float(total_score)
43
- except Exception:
44
- total_score = 0.0
45
- total_score = max(0.0, min(total_score, 100.0))
46
- total_score_rounded = round(total_score)
47
-
48
- # final verdict
49
  if total_score >= 70:
50
  verdict = "🚨 Malicious"
51
  elif 50 <= total_score < 70:
@@ -55,57 +45,58 @@ def analyze(file_path):
55
  else:
56
  verdict = "✅ Safe"
57
 
58
- # attack type heuristics
59
  attack_type = "General Phishing"
60
- combined_text_lower = ((subject or "") + "\n" + (body or "")).lower()
61
- if any(k in combined_text_lower for k in ["invoice", "payment", "wire transfer", "bank details"]):
62
- attack_type = "Invoice/Payment Fraud (BEC)"
63
- elif any(k in combined_text_lower for k in ["password", "verify", "account", "login", "credentials"]):
64
- attack_type = "Credential Harvesting (Phishing)"
65
- elif any("reply-to domain mismatch" in f.lower() for f in header_findings):
66
  attack_type = "Business Email Compromise (BEC)"
67
- elif any("spam" in f.lower() for f in body_findings + url_findings):
 
 
68
  attack_type = "Spam / Marketing"
69
  elif verdict == "✅ Safe":
70
- attack_type = "Benign / Normal Email"
71
-
72
- # tags
73
- tags = []
74
- for finding in (header_findings + body_findings + url_findings):
75
- fl = finding.lower()
76
- if "domain" in fl:
77
- tags.append("Suspicious Sender Domain")
78
- if "phishing" in fl or "malicious url" in fl or "urlhaus" in fl:
79
- tags.append("Phishing / Malicious URL")
80
- if "urgent" in fl or "suspicious phrase" in fl:
81
- tags.append("Urgent Language")
82
- if "spam" in fl or "marketing" in fl:
83
- tags.append("Spam Tone")
84
  if "spf" in fl or "dkim" in fl or "dmarc" in fl:
85
- tags.append("Auth Failures (SPF/DKIM/DMARC)")
86
- if "ocr" in fl or "extracted text" in fl:
87
- tags.append("Image-based content detected")
 
 
88
 
 
89
  summary = {
90
  "Final Verdict": verdict,
91
  "Attack Type": attack_type,
92
- "Attack Score": total_score_rounded,
93
- "Main Tags": ", ".join(sorted(set(tags))) if tags else "No special tags",
94
  }
95
 
96
  details = {
97
- "Header Findings": header_findings or [],
98
- "Body Findings": body_findings or [],
99
- "URL Findings": url_findings or [],
100
- "Highlighted Body": highlighted_body or "",
101
- "Auth Results": auth_summary or {}, # <-- NEW: show SPF, DKIM, DMARC results
 
 
102
  }
103
 
104
  return summary, details
105
 
106
 
107
  if __name__ == "__main__":
108
- fp = "sample.eml"
109
- s, d = analyze(fp)
110
- print("SUMMARY:", s)
111
- print("DETAILS:", d)
 
3
  from header_analyzer import analyze_headers
4
  from body_analyzer import analyze_body
5
  from url_analyzer import analyze_urls
6
+ from attachment_analyzer import analyze_attachments
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  def analyze(file_path):
10
+ # ---------- PARSE ----------
11
+ headers, subject, body, urls, images, attachments = parse_email(file_path)
12
+
13
+ # ---------- HEADER ----------
14
+ header_findings, header_score, auth_summary = analyze_headers(headers, body)
15
 
16
+ # ---------- BODY ----------
17
+ body_findings, body_score, highlighted_body, body_verdict = analyze_body(
18
+ subject, body, urls, images
19
+ )
20
 
21
+ # ---------- URL ----------
22
+ url_findings, url_score = analyze_urls(urls)
23
 
24
+ # ---------- ATTACHMENTS ----------
25
+ attachment_findings, attachment_score, attachment_hashes = analyze_attachments(
26
+ attachments
27
+ )
28
 
29
+ # ---------- SCORE ENGINE (Phase 4.1 Simple Version) ----------
30
  total_score = 0
31
+ total_score += header_score * 1.0
32
+ total_score += body_score * 1.0
33
+ total_score += url_score * 1.2
34
+ total_score += attachment_score * 1.3
35
+
36
+ total_score = max(0, min(int(total_score), 100))
37
+
38
+ # ---------- FINAL VERDICT ----------
 
 
 
 
 
39
  if total_score >= 70:
40
  verdict = "🚨 Malicious"
41
  elif 50 <= total_score < 70:
 
45
  else:
46
  verdict = "✅ Safe"
47
 
48
+ # ---------- ATTACK TYPE ----------
49
  attack_type = "General Phishing"
50
+
51
+ if attachment_score >= 40:
52
+ attack_type = "Malware / Malicious Attachment"
53
+ elif "reply-to domain mismatch" in " ".join(header_findings).lower():
 
 
54
  attack_type = "Business Email Compromise (BEC)"
55
+ elif url_score >= 30:
56
+ attack_type = "Credential Harvesting / Phishing"
57
+ elif verdict == "📩 Spam":
58
  attack_type = "Spam / Marketing"
59
  elif verdict == "✅ Safe":
60
+ attack_type = "Benign"
61
+
62
+ # ---------- TAGGING ----------
63
+ tags = set()
64
+
65
+ for f in header_findings + body_findings + url_findings + attachment_findings:
66
+ fl = f.lower()
67
+ if "attachment" in fl:
68
+ tags.add("Malicious Attachment")
69
+ if "url" in fl:
70
+ tags.add("Suspicious URL")
 
 
 
71
  if "spf" in fl or "dkim" in fl or "dmarc" in fl:
72
+ tags.add("Auth Failure")
73
+ if "reply-to" in fl:
74
+ tags.add("BEC Indicator")
75
+ if "urgent" in fl:
76
+ tags.add("Urgency")
77
 
78
+ # ---------- OUTPUT ----------
79
  summary = {
80
  "Final Verdict": verdict,
81
  "Attack Type": attack_type,
82
+ "Attack Score": total_score,
83
+ "Main Tags": ", ".join(sorted(tags)) if tags else "No special tags",
84
  }
85
 
86
  details = {
87
+ "Header Findings": header_findings,
88
+ "Body Findings": body_findings,
89
+ "URL Findings": url_findings,
90
+ "Attachment Findings": attachment_findings,
91
+ "Attachment Hashes": attachment_hashes,
92
+ "Highlighted Body": highlighted_body,
93
+ "Auth Results": auth_summary,
94
  }
95
 
96
  return summary, details
97
 
98
 
99
  if __name__ == "__main__":
100
+ s, d = analyze("sample.eml")
101
+ print(s)
102
+ print(d)