Spaces:
Sleeping
Sleeping
File size: 5,600 Bytes
5b18525 f46002b 5b18525 f46002b 5b18525 f46002b 5b18525 f46002b 5b18525 f46002b cd7de3f f46002b 5b18525 f46002b 5b18525 f46002b 5b18525 f46002b 5b18525 f46002b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
# Model configuration
MODEL_NAME = "cybersectony/phishing-email-detection-distilbert_v2.4.1"
# Global variables for model and tokenizer
model = None
tokenizer = None
def load_model():
"""Load model and tokenizer once at startup"""
global model, tokenizer
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval() # Set to evaluation mode
return True
except Exception as e:
print(f"Error loading model: {e}")
return False
def predict_phishing(text):
"""
Predict if email/URL is phishing or legitimate
"""
global model, tokenizer
if not text.strip():
return "Please enter some text to analyze", {}, ""
try:
# Tokenize input
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=512,
padding=True
)
# Get prediction
with torch.no_grad():
outputs = model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Get probabilities
probs = predictions[0].tolist()
# Label mapping
labels = {
"Legitimate Email": probs[0],
"Phishing URL": probs[1],
"Legitimate URL": probs[2],
"Phishing Email": probs[3] if len(probs) > 3 else 0
}
# Find highest probability
max_label = max(labels.items(), key=lambda x: x[1])
prediction = max_label[0]
confidence = max_label[1]
# Create confidence bar data
confidence_data = {label: prob for label, prob in labels.items()}
# Risk assessment
if "Phishing" in prediction:
risk_level = "🚨 HIGH RISK - Potential Phishing Detected"
risk_color = "red"
else:
risk_level = "✅ LOW RISK - Appears Legitimate"
risk_color = "green"
# Format result
result = f"""
### {risk_level}
**Primary Classification:** {prediction}
**Confidence:** {confidence:.1%}
"""
return result, confidence_data, risk_color
except Exception as e:
return f"Error during prediction: {str(e)}", {}, "orange"
# Load model at startup
print("Loading model...")
model_loaded = load_model()
if not model_loaded:
print("Failed to load model!")
# Create Gradio interface
with gr.Blocks(
theme=gr.themes.Soft(),
title="Phishing Email & URL Detective",
css="""
.risk-high { color: #dc2626 !important; font-weight: bold; }
.risk-low { color: #16a34a !important; font-weight: bold; }
.main-container { max-width: 800px; margin: 0 auto; }
"""
) as demo:
gr.Markdown("""
# 🛡️ Phishing Detection System
**Instantly detect phishing emails and malicious URLs using AI**
Powered by DistilBERT • 99.58% Accuracy • Real-time Analysis
""")
with gr.Row():
with gr.Column(scale=2):
input_text = gr.Textbox(
label="📧 Email Content or URL",
placeholder="Paste suspicious email content or URL here...",
lines=8,
max_lines=15
)
analyze_btn = gr.Button(
"🔍 Analyze for Phishing",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
result_output = gr.Markdown(label="Analysis Result")
confidence_output = gr.Label(
label="Confidence Breakdown",
num_top_classes=4
)
# Example inputs
gr.Markdown("### 📋 Try These Examples:")
examples = [
["Dear User, Your account will be suspended! Click here immediately: http://fake-bank-login.com/urgent"],
["Hi Mufasa, Thanks for your email. The quarterly report is attached. Best regards, Simba"],
["URGENT: Verify your PayPal account now or lose access: https://paypal-security-verify.suspicious.com"],
["Meeting reminder: Project sync at 3 PM in conference room B. See you there!"]
]
gr.Examples(
examples=examples,
inputs=input_text,
outputs=[result_output, confidence_output]
)
# Event handlers
analyze_btn.click(
fn=predict_phishing,
inputs=input_text,
outputs=[result_output, confidence_output, gr.State()]
)
input_text.submit(
fn=predict_phishing,
inputs=input_text,
outputs=[result_output, confidence_output, gr.State()]
)
gr.Markdown("""
---
### ℹ️ About This Tool and the team.
- **Model:** DistilBERT fine-tuned for phishing detection
- **Accuracy:** 99.58% on test dataset
- **Speed:** Real-time analysis
- **Privacy:** All processing happens locally, no data stored
**⚠️ Disclaimer:** This tool is for educational purposes (Assignemnt) only, we currently hold no rights and responsibility to this tool. So please Always verify suspicious content through official channels.
""")
# Launch configuration
if __name__ == "__main__":
demo.launch(
share=False,
server_name="0.0.0.0",
server_port=7860,
show_error=True,
quiet=False
) |