Spaces:
Sleeping
Sleeping
| import traceback | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
| # --------------------------------------------------------- | |
| # 1. CONFIG | |
| # --------------------------------------------------------- | |
| # You can swap this with another text-classification model if you like. | |
| # This one is trained for phishing / spam-like detection. | |
| MODEL_NAME = "mrm8488/bert-tiny-finetuned-sms-spam-detection" | |
| clf_pipe = None | |
| model_load_error = None | |
| # --------------------------------------------------------- | |
| # 2. LOAD MODEL ON STARTUP | |
| # --------------------------------------------------------- | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
| clf_pipe = pipeline( | |
| "text-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| return_all_scores=True, | |
| ) | |
| print(f"[INFO] Loaded HF model: {MODEL_NAME}") | |
| except Exception as e: | |
| model_load_error = f"Failed to load model {MODEL_NAME}: {e}" | |
| print("[ERROR]", model_load_error) | |
| # --------------------------------------------------------- | |
| # 3. INFERENCE FUNCTION | |
| # --------------------------------------------------------- | |
| def predict_email(email_text: str): | |
| try: | |
| text = (email_text or "").strip() | |
| if not text: | |
| return ( | |
| "❌ No text provided", | |
| 0.0, | |
| {"error": "Please paste an email body first."}, | |
| ) | |
| if clf_pipe is None: | |
| return ( | |
| "❌ Model load error", | |
| 0.0, | |
| {"error": model_load_error}, | |
| ) | |
| # Run model | |
| outputs = clf_pipe(text, truncation=True, max_length=512)[0] | |
| # outputs is a list of dicts: [{"label": "...", "score": ...}, ...] | |
| label_scores = {o["label"]: float(o["score"]) for o in outputs} | |
| # Heuristic: treat labels containing "spam" or "phish" as phishing. | |
| phishing_prob = 0.0 | |
| for label, score in label_scores.items(): | |
| if "spam" in label.lower() or "phish" in label.lower(): | |
| phishing_prob += score | |
| # Fallback: if model only has LABEL_0 / LABEL_1 | |
| if phishing_prob == 0.0 and len(label_scores) == 2: | |
| # assume LABEL_1 / SPAM is phishing | |
| phishing_prob = max( | |
| score for lab, score in label_scores.items() | |
| if "1" in lab or "spam" in lab.lower() | |
| ) | |
| final_prob = float(phishing_prob) | |
| prediction = ( | |
| "⚠️ Phishing / Suspicious" | |
| if final_prob >= 0.5 | |
| else "✅ Likely Legitimate" | |
| ) | |
| details = { | |
| "raw_label_scores": label_scores, | |
| "phishing_probability_used": final_prob, | |
| "model_name": MODEL_NAME, | |
| } | |
| return prediction, round(final_prob, 4), details | |
| except Exception as e: | |
| # Catch ANY internal error so Gradio never shows blank "Error" | |
| return ( | |
| "❌ Internal error", | |
| 0.0, | |
| { | |
| "exception": str(e), | |
| "traceback": traceback.format_exc(), | |
| }, | |
| ) | |
| # --------------------------------------------------------- | |
| # 4. GRADIO UI | |
| # --------------------------------------------------------- | |
| with gr.Blocks(title="Phishing / Spam Email Detector (LLM)") as demo: | |
| gr.Markdown( | |
| """ | |
| # 🛡️ Phishing / Spam Email Detector (LLM-only) | |
| - Uses a **BERT-tiny** spam/phishing-style classifier from Hugging Face | |
| - No `.pkl` models required | |
| - Fast and lightweight enough for CPU Spaces | |
| Paste an email body below to analyse it. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| gr.Markdown("### ✉️ Email Content") | |
| email_input = gr.Textbox( | |
| lines=15, | |
| placeholder="Paste the full email text here...", | |
| label="Email body", | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("🔍 Analyze", variant="primary") | |
| clear_btn = gr.Button("🧹 Clear") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 🧾 Result") | |
| label_out = gr.Label(label="Prediction") | |
| prob_out = gr.Number( | |
| label="Phishing probability (0–1)", | |
| precision=4, | |
| ) | |
| details_out = gr.JSON( | |
| label="Model details / debug", | |
| ) | |
| submit_btn.click( | |
| fn=predict_email, | |
| inputs=email_input, | |
| outputs=[label_out, prob_out, details_out], | |
| ) | |
| clear_btn.click( | |
| fn=lambda: ("", None, {}), | |
| inputs=None, | |
| outputs=[email_input, prob_out, details_out], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |