Phising_Mail / app.py
harshitmahour360's picture
Update app.py
e91a523 verified
import traceback
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
# ---------------------------------------------------------
# 1. CONFIG
# ---------------------------------------------------------
# You can swap this with another text-classification model if you like.
# This one is trained for phishing / spam-like detection.
MODEL_NAME = "mrm8488/bert-tiny-finetuned-sms-spam-detection"
clf_pipe = None
model_load_error = None
# ---------------------------------------------------------
# 2. LOAD MODEL ON STARTUP
# ---------------------------------------------------------
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
clf_pipe = pipeline(
"text-classification",
model=model,
tokenizer=tokenizer,
return_all_scores=True,
)
print(f"[INFO] Loaded HF model: {MODEL_NAME}")
except Exception as e:
model_load_error = f"Failed to load model {MODEL_NAME}: {e}"
print("[ERROR]", model_load_error)
# ---------------------------------------------------------
# 3. INFERENCE FUNCTION
# ---------------------------------------------------------
def predict_email(email_text: str):
try:
text = (email_text or "").strip()
if not text:
return (
"❌ No text provided",
0.0,
{"error": "Please paste an email body first."},
)
if clf_pipe is None:
return (
"❌ Model load error",
0.0,
{"error": model_load_error},
)
# Run model
outputs = clf_pipe(text, truncation=True, max_length=512)[0]
# outputs is a list of dicts: [{"label": "...", "score": ...}, ...]
label_scores = {o["label"]: float(o["score"]) for o in outputs}
# Heuristic: treat labels containing "spam" or "phish" as phishing.
phishing_prob = 0.0
for label, score in label_scores.items():
if "spam" in label.lower() or "phish" in label.lower():
phishing_prob += score
# Fallback: if model only has LABEL_0 / LABEL_1
if phishing_prob == 0.0 and len(label_scores) == 2:
# assume LABEL_1 / SPAM is phishing
phishing_prob = max(
score for lab, score in label_scores.items()
if "1" in lab or "spam" in lab.lower()
)
final_prob = float(phishing_prob)
prediction = (
"⚠️ Phishing / Suspicious"
if final_prob >= 0.5
else "✅ Likely Legitimate"
)
details = {
"raw_label_scores": label_scores,
"phishing_probability_used": final_prob,
"model_name": MODEL_NAME,
}
return prediction, round(final_prob, 4), details
except Exception as e:
# Catch ANY internal error so Gradio never shows blank "Error"
return (
"❌ Internal error",
0.0,
{
"exception": str(e),
"traceback": traceback.format_exc(),
},
)
# ---------------------------------------------------------
# 4. GRADIO UI
# ---------------------------------------------------------
with gr.Blocks(title="Phishing / Spam Email Detector (LLM)") as demo:
gr.Markdown(
"""
# 🛡️ Phishing / Spam Email Detector (LLM-only)
- Uses a **BERT-tiny** spam/phishing-style classifier from Hugging Face
- No `.pkl` models required
- Fast and lightweight enough for CPU Spaces
Paste an email body below to analyse it.
"""
)
with gr.Row():
with gr.Column(scale=3):
gr.Markdown("### ✉️ Email Content")
email_input = gr.Textbox(
lines=15,
placeholder="Paste the full email text here...",
label="Email body",
)
with gr.Row():
submit_btn = gr.Button("🔍 Analyze", variant="primary")
clear_btn = gr.Button("🧹 Clear")
with gr.Column(scale=2):
gr.Markdown("### 🧾 Result")
label_out = gr.Label(label="Prediction")
prob_out = gr.Number(
label="Phishing probability (0–1)",
precision=4,
)
details_out = gr.JSON(
label="Model details / debug",
)
submit_btn.click(
fn=predict_email,
inputs=email_input,
outputs=[label_out, prob_out, details_out],
)
clear_btn.click(
fn=lambda: ("", None, {}),
inputs=None,
outputs=[email_input, prob_out, details_out],
)
if __name__ == "__main__":
demo.launch()