Spaces:
Running
Running
Update analyze_email_main.py
Browse files- analyze_email_main.py +43 -37
analyze_email_main.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# analyze_email_main.py
|
| 2 |
|
| 3 |
-
import time
|
| 4 |
|
| 5 |
from parse_email import parse_email
|
| 6 |
from header_analyzer import analyze_headers
|
|
@@ -8,14 +8,15 @@ from body_analyzer import analyze_body
|
|
| 8 |
from url_analyzer import analyze_urls
|
| 9 |
from attachment_analyzer import analyze_attachments
|
| 10 |
from scoring_engine import compute_final_score
|
| 11 |
-
from
|
| 12 |
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def analyze(file_path):
|
| 15 |
-
|
| 16 |
-
# ⏱️ START TIMER
|
| 17 |
-
# =========================
|
| 18 |
-
start_time = time.perf_counter()
|
| 19 |
|
| 20 |
# =========================
|
| 21 |
# 📥 PARSE EMAIL
|
|
@@ -26,43 +27,46 @@ def analyze(file_path):
|
|
| 26 |
# 🧠 ANALYZERS
|
| 27 |
# =========================
|
| 28 |
|
| 29 |
-
|
|
|
|
| 30 |
|
|
|
|
| 31 |
body_findings, body_score, highlighted_body, body_verdict = analyze_body(
|
| 32 |
subject, body, urls, images
|
| 33 |
)
|
| 34 |
|
|
|
|
| 35 |
url_findings, url_score = analyze_urls(urls)
|
| 36 |
|
|
|
|
| 37 |
attachment_findings, attachment_score, attachment_hashes = analyze_attachments(
|
| 38 |
attachments
|
| 39 |
)
|
| 40 |
|
| 41 |
# =========================
|
| 42 |
-
#
|
| 43 |
# =========================
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
header_score=header_score,
|
| 46 |
body_score=body_score,
|
| 47 |
url_score=url_score,
|
| 48 |
attachment_score=attachment_score,
|
|
|
|
| 49 |
header_findings=header_findings,
|
| 50 |
body_findings=body_findings,
|
| 51 |
url_findings=url_findings,
|
| 52 |
attachment_findings=attachment_findings,
|
| 53 |
auth_results=auth_summary,
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
# =========================
|
| 57 |
-
# 🎯 ATTACK CLASSIFICATION
|
| 58 |
-
# =========================
|
| 59 |
-
attack_type, attack_confidence, attack_reasoning = classify_attack(
|
| 60 |
-
final_score=final_score,
|
| 61 |
-
header_findings=header_findings,
|
| 62 |
-
body_findings=body_findings,
|
| 63 |
-
url_findings=url_findings,
|
| 64 |
-
attachment_findings=attachment_findings,
|
| 65 |
-
auth_results=auth_summary,
|
| 66 |
)
|
| 67 |
|
| 68 |
# =========================
|
|
@@ -75,13 +79,13 @@ def analyze(file_path):
|
|
| 75 |
):
|
| 76 |
fl = finding.lower()
|
| 77 |
|
| 78 |
-
if "attachment" in fl or "macro" in fl or "html
|
| 79 |
tags.add("Malicious Attachment")
|
| 80 |
|
| 81 |
-
if "reply-to" in fl or "
|
| 82 |
tags.add("BEC Indicator")
|
| 83 |
|
| 84 |
-
if "url" in fl or "phishing" in fl
|
| 85 |
tags.add("Malicious / Phishing URL")
|
| 86 |
|
| 87 |
if "spf" in fl or "dkim" in fl or "dmarc" in fl:
|
|
@@ -90,27 +94,28 @@ def analyze(file_path):
|
|
| 90 |
if "brand" in fl or "look-alike" in fl:
|
| 91 |
tags.add("Brand Spoofing")
|
| 92 |
|
| 93 |
-
if "urgent" in fl or "immediately" in fl
|
| 94 |
tags.add("Urgency / Social Engineering")
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
|
| 99 |
# =========================
|
| 100 |
-
#
|
| 101 |
# =========================
|
| 102 |
-
|
| 103 |
-
processing_time = round(end_time - start_time, 3) # seconds
|
| 104 |
|
| 105 |
# =========================
|
| 106 |
# 📊 SUMMARY OUTPUT
|
| 107 |
# =========================
|
| 108 |
summary = {
|
| 109 |
"Final Verdict": verdict,
|
| 110 |
-
"Attack Type":
|
| 111 |
-
"Attack
|
| 112 |
-
"
|
| 113 |
-
"Processing Time": f"{processing_time} seconds",
|
| 114 |
"Main Tags": ", ".join(sorted(tags)) if tags else "No special tags",
|
| 115 |
}
|
| 116 |
|
|
@@ -125,8 +130,9 @@ def analyze(file_path):
|
|
| 125 |
"Attachment Hashes": attachment_hashes,
|
| 126 |
"Highlighted Body": highlighted_body,
|
| 127 |
"Auth Results": auth_summary,
|
| 128 |
-
"
|
| 129 |
-
"
|
|
|
|
| 130 |
}
|
| 131 |
|
| 132 |
return summary, details
|
|
@@ -149,4 +155,4 @@ if __name__ == "__main__":
|
|
| 149 |
for item in v:
|
| 150 |
print(f" - {item}")
|
| 151 |
else:
|
| 152 |
-
print(
|
|
|
|
| 1 |
# analyze_email_main.py
|
| 2 |
|
| 3 |
+
import time
|
| 4 |
|
| 5 |
from parse_email import parse_email
|
| 6 |
from header_analyzer import analyze_headers
|
|
|
|
| 8 |
from url_analyzer import analyze_urls
|
| 9 |
from attachment_analyzer import analyze_attachments
|
| 10 |
from scoring_engine import compute_final_score
|
| 11 |
+
from behavioral_analyzer import analyze_behavior, behavioral_summary
|
| 12 |
|
| 13 |
|
| 14 |
+
# =========================
|
| 15 |
+
# MAIN ANALYSIS FUNCTION
|
| 16 |
+
# =========================
|
| 17 |
+
|
| 18 |
def analyze(file_path):
|
| 19 |
+
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# =========================
|
| 22 |
# 📥 PARSE EMAIL
|
|
|
|
| 27 |
# 🧠 ANALYZERS
|
| 28 |
# =========================
|
| 29 |
|
| 30 |
+
# Header analysis (auth, spoofing, BEC signals)
|
| 31 |
+
header_findings, header_score, auth_summary = analyze_headers(headers)
|
| 32 |
|
| 33 |
+
# Body heuristic / NLP analysis
|
| 34 |
body_findings, body_score, highlighted_body, body_verdict = analyze_body(
|
| 35 |
subject, body, urls, images
|
| 36 |
)
|
| 37 |
|
| 38 |
+
# URL analysis
|
| 39 |
url_findings, url_score = analyze_urls(urls)
|
| 40 |
|
| 41 |
+
# Attachment analysis
|
| 42 |
attachment_findings, attachment_score, attachment_hashes = analyze_attachments(
|
| 43 |
attachments
|
| 44 |
)
|
| 45 |
|
| 46 |
# =========================
|
| 47 |
+
# 🧠 BEHAVIORAL ANALYSIS (PHASE 4.3 – CORE FIX)
|
| 48 |
# =========================
|
| 49 |
+
behavior_result = analyze_behavior(body)
|
| 50 |
+
behavior_score = behavior_result["confidence_score"]
|
| 51 |
+
behavior_attack = behavior_result["dominant_attack"]
|
| 52 |
+
behavior_verdict = behavior_result["verdict"]
|
| 53 |
+
behavior_text = behavioral_summary(behavior_result)
|
| 54 |
+
|
| 55 |
+
# =========================
|
| 56 |
+
# 🧮 FINAL CORRELATION SCORING
|
| 57 |
+
# =========================
|
| 58 |
+
final_score, verdict, reasoning = compute_final_score(
|
| 59 |
header_score=header_score,
|
| 60 |
body_score=body_score,
|
| 61 |
url_score=url_score,
|
| 62 |
attachment_score=attachment_score,
|
| 63 |
+
behavior_score=behavior_score, # 🔥 NEW
|
| 64 |
header_findings=header_findings,
|
| 65 |
body_findings=body_findings,
|
| 66 |
url_findings=url_findings,
|
| 67 |
attachment_findings=attachment_findings,
|
| 68 |
auth_results=auth_summary,
|
| 69 |
+
behavior_result=behavior_result, # 🔥 NEW
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
# =========================
|
|
|
|
| 79 |
):
|
| 80 |
fl = finding.lower()
|
| 81 |
|
| 82 |
+
if "attachment" in fl or "macro" in fl or "html" in fl:
|
| 83 |
tags.add("Malicious Attachment")
|
| 84 |
|
| 85 |
+
if "reply-to" in fl or "bec" in fl:
|
| 86 |
tags.add("BEC Indicator")
|
| 87 |
|
| 88 |
+
if "url" in fl or "phishing" in fl:
|
| 89 |
tags.add("Malicious / Phishing URL")
|
| 90 |
|
| 91 |
if "spf" in fl or "dkim" in fl or "dmarc" in fl:
|
|
|
|
| 94 |
if "brand" in fl or "look-alike" in fl:
|
| 95 |
tags.add("Brand Spoofing")
|
| 96 |
|
| 97 |
+
if "urgent" in fl or "immediately" in fl:
|
| 98 |
tags.add("Urgency / Social Engineering")
|
| 99 |
|
| 100 |
+
# Behavioral tags (VERY IMPORTANT)
|
| 101 |
+
if behavior_attack != "None":
|
| 102 |
+
tags.add(behavior_attack.upper())
|
| 103 |
+
tags.add("Behavioral Threat")
|
| 104 |
|
| 105 |
# =========================
|
| 106 |
+
# ⏱ PROCESSING TIME
|
| 107 |
# =========================
|
| 108 |
+
processing_time = round(time.time() - start_time, 2)
|
|
|
|
| 109 |
|
| 110 |
# =========================
|
| 111 |
# 📊 SUMMARY OUTPUT
|
| 112 |
# =========================
|
| 113 |
summary = {
|
| 114 |
"Final Verdict": verdict,
|
| 115 |
+
"Attack Type": behavior_attack if behavior_attack != "None" else "Undetermined",
|
| 116 |
+
"Attack Score": f"{final_score}/100",
|
| 117 |
+
"Behavior Confidence": f"{behavior_score}/100",
|
| 118 |
+
"Processing Time": f"{processing_time} seconds",
|
| 119 |
"Main Tags": ", ".join(sorted(tags)) if tags else "No special tags",
|
| 120 |
}
|
| 121 |
|
|
|
|
| 130 |
"Attachment Hashes": attachment_hashes,
|
| 131 |
"Highlighted Body": highlighted_body,
|
| 132 |
"Auth Results": auth_summary,
|
| 133 |
+
"Behavioral Analysis": behavior_result,
|
| 134 |
+
"Behavioral Summary": behavior_text,
|
| 135 |
+
"Scoring Reasoning": reasoning,
|
| 136 |
}
|
| 137 |
|
| 138 |
return summary, details
|
|
|
|
| 155 |
for item in v:
|
| 156 |
print(f" - {item}")
|
| 157 |
else:
|
| 158 |
+
print(v)
|