Spaces:
Sleeping
Sleeping
Update analyze_email_main.py
Browse files- analyze_email_main.py +20 -52
analyze_email_main.py
CHANGED
|
@@ -11,10 +11,6 @@ from scoring_engine import compute_final_score
|
|
| 11 |
from behavioral_analyzer import analyze_behavior, behavioral_summary
|
| 12 |
|
| 13 |
|
| 14 |
-
# =========================
|
| 15 |
-
# MAIN ANALYSIS FUNCTION
|
| 16 |
-
# =========================
|
| 17 |
-
|
| 18 |
def analyze(file_path):
|
| 19 |
start_time = time.time()
|
| 20 |
|
|
@@ -26,51 +22,52 @@ def analyze(file_path):
|
|
| 26 |
# =========================
|
| 27 |
# ๐ง ANALYZERS
|
| 28 |
# =========================
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
# Header analysis (auth, spoofing, BEC signals)
|
| 31 |
-
header_findings, header_score, auth_summary = analyze_headers(headers)
|
| 32 |
-
|
| 33 |
-
# Body heuristic / NLP analysis
|
| 34 |
body_findings, body_score, highlighted_body, body_verdict = analyze_body(
|
| 35 |
subject, body, urls, images
|
| 36 |
)
|
| 37 |
|
| 38 |
-
# URL analysis
|
| 39 |
url_findings, url_score = analyze_urls(urls)
|
| 40 |
|
| 41 |
-
# Attachment analysis
|
| 42 |
attachment_findings, attachment_score, attachment_hashes = analyze_attachments(
|
| 43 |
attachments
|
| 44 |
)
|
| 45 |
|
| 46 |
# =========================
|
| 47 |
-
# ๐ง BEHAVIORAL ANALYSIS
|
| 48 |
# =========================
|
| 49 |
behavior_result = analyze_behavior(body)
|
| 50 |
behavior_score = behavior_result["confidence_score"]
|
| 51 |
behavior_attack = behavior_result["dominant_attack"]
|
| 52 |
behavior_verdict = behavior_result["verdict"]
|
|
|
|
| 53 |
behavior_text = behavioral_summary(behavior_result)
|
| 54 |
|
| 55 |
# =========================
|
| 56 |
-
# ๐งฎ FINAL
|
| 57 |
# =========================
|
| 58 |
final_score, verdict, reasoning = compute_final_score(
|
| 59 |
header_score=header_score,
|
| 60 |
body_score=body_score,
|
| 61 |
url_score=url_score,
|
| 62 |
attachment_score=attachment_score,
|
| 63 |
-
behavior_score=behavior_score,
|
| 64 |
header_findings=header_findings,
|
| 65 |
body_findings=body_findings,
|
| 66 |
url_findings=url_findings,
|
| 67 |
attachment_findings=attachment_findings,
|
| 68 |
-
|
| 69 |
-
|
| 70 |
)
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
# =========================
|
| 73 |
-
# ๐ท TAGGING
|
| 74 |
# =========================
|
| 75 |
tags = set()
|
| 76 |
|
|
@@ -79,36 +76,30 @@ def analyze(file_path):
|
|
| 79 |
):
|
| 80 |
fl = finding.lower()
|
| 81 |
|
| 82 |
-
if "attachment" in fl or "macro" in fl
|
| 83 |
tags.add("Malicious Attachment")
|
| 84 |
-
|
| 85 |
if "reply-to" in fl or "bec" in fl:
|
| 86 |
tags.add("BEC Indicator")
|
| 87 |
-
|
| 88 |
if "url" in fl or "phishing" in fl:
|
| 89 |
-
tags.add("Malicious
|
| 90 |
-
|
| 91 |
if "spf" in fl or "dkim" in fl or "dmarc" in fl:
|
| 92 |
tags.add("Email Authentication Failure")
|
| 93 |
-
|
| 94 |
if "brand" in fl or "look-alike" in fl:
|
| 95 |
tags.add("Brand Spoofing")
|
| 96 |
-
|
| 97 |
-
if "urgent" in fl or "immediately" in fl:
|
| 98 |
tags.add("Urgency / Social Engineering")
|
| 99 |
|
| 100 |
-
# Behavioral tags (VERY IMPORTANT)
|
| 101 |
if behavior_attack != "None":
|
| 102 |
-
tags.add(behavior_attack.upper())
|
| 103 |
tags.add("Behavioral Threat")
|
|
|
|
| 104 |
|
| 105 |
# =========================
|
| 106 |
-
# โฑ
|
| 107 |
# =========================
|
| 108 |
processing_time = round(time.time() - start_time, 2)
|
| 109 |
|
| 110 |
# =========================
|
| 111 |
-
# ๐
|
| 112 |
# =========================
|
| 113 |
summary = {
|
| 114 |
"Final Verdict": verdict,
|
|
@@ -119,9 +110,6 @@ def analyze(file_path):
|
|
| 119 |
"Main Tags": ", ".join(sorted(tags)) if tags else "No special tags",
|
| 120 |
}
|
| 121 |
|
| 122 |
-
# =========================
|
| 123 |
-
# ๐ DETAILED OUTPUT
|
| 124 |
-
# =========================
|
| 125 |
details = {
|
| 126 |
"Header Findings": header_findings,
|
| 127 |
"Body Findings": body_findings,
|
|
@@ -129,30 +117,10 @@ def analyze(file_path):
|
|
| 129 |
"Attachment Findings": attachment_findings,
|
| 130 |
"Attachment Hashes": attachment_hashes,
|
| 131 |
"Highlighted Body": highlighted_body,
|
| 132 |
-
"Auth Results":
|
| 133 |
"Behavioral Analysis": behavior_result,
|
| 134 |
"Behavioral Summary": behavior_text,
|
| 135 |
"Scoring Reasoning": reasoning,
|
| 136 |
}
|
| 137 |
|
| 138 |
return summary, details
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
# =========================
|
| 142 |
-
# ๐งช LOCAL TEST
|
| 143 |
-
# =========================
|
| 144 |
-
if __name__ == "__main__":
|
| 145 |
-
summary, details = analyze("sample.eml")
|
| 146 |
-
|
| 147 |
-
print("\n===== SUMMARY =====")
|
| 148 |
-
for k, v in summary.items():
|
| 149 |
-
print(f"{k}: {v}")
|
| 150 |
-
|
| 151 |
-
print("\n===== DETAILS =====")
|
| 152 |
-
for k, v in details.items():
|
| 153 |
-
print(f"\n{k}:")
|
| 154 |
-
if isinstance(v, list):
|
| 155 |
-
for item in v:
|
| 156 |
-
print(f" - {item}")
|
| 157 |
-
else:
|
| 158 |
-
print(v)
|
|
|
|
| 11 |
from behavioral_analyzer import analyze_behavior, behavioral_summary
|
| 12 |
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def analyze(file_path):
|
| 15 |
start_time = time.time()
|
| 16 |
|
|
|
|
| 22 |
# =========================
|
| 23 |
# ๐ง ANALYZERS
|
| 24 |
# =========================
|
| 25 |
+
header_findings, header_score, auth_summary = analyze_headers(headers, body)
|
| 26 |
+
auth_results = auth_summary # must be dict
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
body_findings, body_score, highlighted_body, body_verdict = analyze_body(
|
| 29 |
subject, body, urls, images
|
| 30 |
)
|
| 31 |
|
|
|
|
| 32 |
url_findings, url_score = analyze_urls(urls)
|
| 33 |
|
|
|
|
| 34 |
attachment_findings, attachment_score, attachment_hashes = analyze_attachments(
|
| 35 |
attachments
|
| 36 |
)
|
| 37 |
|
| 38 |
# =========================
|
| 39 |
+
# ๐ง BEHAVIORAL ANALYSIS
|
| 40 |
# =========================
|
| 41 |
behavior_result = analyze_behavior(body)
|
| 42 |
behavior_score = behavior_result["confidence_score"]
|
| 43 |
behavior_attack = behavior_result["dominant_attack"]
|
| 44 |
behavior_verdict = behavior_result["verdict"]
|
| 45 |
+
behavior_findings = behavior_result.get("findings", [])
|
| 46 |
behavior_text = behavioral_summary(behavior_result)
|
| 47 |
|
| 48 |
# =========================
|
| 49 |
+
# ๐งฎ FINAL SCORING
|
| 50 |
# =========================
|
| 51 |
final_score, verdict, reasoning = compute_final_score(
|
| 52 |
header_score=header_score,
|
| 53 |
body_score=body_score,
|
| 54 |
url_score=url_score,
|
| 55 |
attachment_score=attachment_score,
|
| 56 |
+
behavior_score=behavior_score,
|
| 57 |
header_findings=header_findings,
|
| 58 |
body_findings=body_findings,
|
| 59 |
url_findings=url_findings,
|
| 60 |
attachment_findings=attachment_findings,
|
| 61 |
+
behavior_findings=behavior_findings,
|
| 62 |
+
auth_results=auth_results,
|
| 63 |
)
|
| 64 |
|
| 65 |
+
# ๐ Safety override
|
| 66 |
+
if behavior_verdict == "Malicious" and verdict == "โ
Safe":
|
| 67 |
+
verdict = "โ ๏ธ Suspicious"
|
| 68 |
+
|
| 69 |
# =========================
|
| 70 |
+
# ๐ท TAGGING
|
| 71 |
# =========================
|
| 72 |
tags = set()
|
| 73 |
|
|
|
|
| 76 |
):
|
| 77 |
fl = finding.lower()
|
| 78 |
|
| 79 |
+
if "attachment" in fl or "macro" in fl:
|
| 80 |
tags.add("Malicious Attachment")
|
|
|
|
| 81 |
if "reply-to" in fl or "bec" in fl:
|
| 82 |
tags.add("BEC Indicator")
|
|
|
|
| 83 |
if "url" in fl or "phishing" in fl:
|
| 84 |
+
tags.add("Malicious URL")
|
|
|
|
| 85 |
if "spf" in fl or "dkim" in fl or "dmarc" in fl:
|
| 86 |
tags.add("Email Authentication Failure")
|
|
|
|
| 87 |
if "brand" in fl or "look-alike" in fl:
|
| 88 |
tags.add("Brand Spoofing")
|
| 89 |
+
if "urgent" in fl:
|
|
|
|
| 90 |
tags.add("Urgency / Social Engineering")
|
| 91 |
|
|
|
|
| 92 |
if behavior_attack != "None":
|
|
|
|
| 93 |
tags.add("Behavioral Threat")
|
| 94 |
+
tags.add(behavior_attack.upper())
|
| 95 |
|
| 96 |
# =========================
|
| 97 |
+
# โฑ TIME
|
| 98 |
# =========================
|
| 99 |
processing_time = round(time.time() - start_time, 2)
|
| 100 |
|
| 101 |
# =========================
|
| 102 |
+
# ๐ OUTPUT
|
| 103 |
# =========================
|
| 104 |
summary = {
|
| 105 |
"Final Verdict": verdict,
|
|
|
|
| 110 |
"Main Tags": ", ".join(sorted(tags)) if tags else "No special tags",
|
| 111 |
}
|
| 112 |
|
|
|
|
|
|
|
|
|
|
| 113 |
details = {
|
| 114 |
"Header Findings": header_findings,
|
| 115 |
"Body Findings": body_findings,
|
|
|
|
| 117 |
"Attachment Findings": attachment_findings,
|
| 118 |
"Attachment Hashes": attachment_hashes,
|
| 119 |
"Highlighted Body": highlighted_body,
|
| 120 |
+
"Auth Results": auth_results,
|
| 121 |
"Behavioral Analysis": behavior_result,
|
| 122 |
"Behavioral Summary": behavior_text,
|
| 123 |
"Scoring Reasoning": reasoning,
|
| 124 |
}
|
| 125 |
|
| 126 |
return summary, details
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|