Spaces:

harshitmahour360
/

Phising_Mail

Sleeping

App Files Files Community

Phising_Mail / app.py

harshitmahour360

Update app.py

e91a523 verified 5 months ago

raw

history blame contribute delete

4.84 kB

	import traceback
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

	# ---------------------------------------------------------
	# 1. CONFIG
	# ---------------------------------------------------------

	# You can swap this with another text-classification model if you like.
	# This one is trained for phishing / spam-like detection.
	MODEL_NAME = "mrm8488/bert-tiny-finetuned-sms-spam-detection"

	clf_pipe = None
	model_load_error = None

	# ---------------------------------------------------------
	# 2. LOAD MODEL ON STARTUP
	# ---------------------------------------------------------
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
	clf_pipe = pipeline(
	"text-classification",
	model=model,
	tokenizer=tokenizer,
	return_all_scores=True,
	)
	print(f"[INFO] Loaded HF model: {MODEL_NAME}")
	except Exception as e:
	model_load_error = f"Failed to load model {MODEL_NAME}: {e}"
	print("[ERROR]", model_load_error)


	# ---------------------------------------------------------
	# 3. INFERENCE FUNCTION
	# ---------------------------------------------------------
	def predict_email(email_text: str):
	try:
	text = (email_text or "").strip()

	if not text:
	return (
	"❌ No text provided",
	0.0,
	{"error": "Please paste an email body first."},
	)

	if clf_pipe is None:
	return (
	"❌ Model load error",
	0.0,
	{"error": model_load_error},
	)

	# Run model
	outputs = clf_pipe(text, truncation=True, max_length=512)[0]
	# outputs is a list of dicts: [{"label": "...", "score": ...}, ...]
	label_scores = {o["label"]: float(o["score"]) for o in outputs}

	# Heuristic: treat labels containing "spam" or "phish" as phishing.
	phishing_prob = 0.0
	for label, score in label_scores.items():
	if "spam" in label.lower() or "phish" in label.lower():
	phishing_prob += score

	# Fallback: if model only has LABEL_0 / LABEL_1
	if phishing_prob == 0.0 and len(label_scores) == 2:
	# assume LABEL_1 / SPAM is phishing
	phishing_prob = max(
	score for lab, score in label_scores.items()
	if "1" in lab or "spam" in lab.lower()
	)

	final_prob = float(phishing_prob)
	prediction = (
	"⚠️ Phishing / Suspicious"
	if final_prob >= 0.5
	else "✅ Likely Legitimate"
	)

	details = {
	"raw_label_scores": label_scores,
	"phishing_probability_used": final_prob,
	"model_name": MODEL_NAME,
	}

	return prediction, round(final_prob, 4), details

	except Exception as e:
	# Catch ANY internal error so Gradio never shows blank "Error"
	return (
	"❌ Internal error",
	0.0,
	{
	"exception": str(e),
	"traceback": traceback.format_exc(),
	},
	)


	# ---------------------------------------------------------
	# 4. GRADIO UI
	# ---------------------------------------------------------
	with gr.Blocks(title="Phishing / Spam Email Detector (LLM)") as demo:
	gr.Markdown(
	"""
	# 🛡️ Phishing / Spam Email Detector (LLM-only)

	- Uses a BERT-tiny spam/phishing-style classifier from Hugging Face
	- No `.pkl` models required
	- Fast and lightweight enough for CPU Spaces

	Paste an email body below to analyse it.
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	gr.Markdown("### ✉️ Email Content")
	email_input = gr.Textbox(
	lines=15,
	placeholder="Paste the full email text here...",
	label="Email body",
	)

	with gr.Row():
	submit_btn = gr.Button("🔍 Analyze", variant="primary")
	clear_btn = gr.Button("🧹 Clear")

	with gr.Column(scale=2):
	gr.Markdown("### 🧾 Result")

	label_out = gr.Label(label="Prediction")
	prob_out = gr.Number(
	label="Phishing probability (0–1)",
	precision=4,
	)
	details_out = gr.JSON(
	label="Model details / debug",
	)

	submit_btn.click(
	fn=predict_email,
	inputs=email_input,
	outputs=[label_out, prob_out, details_out],
	)

	clear_btn.click(
	fn=lambda: ("", None, {}),
	inputs=None,
	outputs=[email_input, prob_out, details_out],
	)

	if __name__ == "__main__":
	demo.launch()