Spaces:

Pant0x
/

Phishing_URL_Detector

Sleeping

App Files Files Community

Pant0x commited on 26 days ago

Commit

1d5a1d4

verified ·

1 Parent(s): 55cc834

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -64

app.py CHANGED Viewed

@@ -1,108 +1,121 @@
 import gradio as gr
-from transformers import pipeline
-# ---------------------------------------------------------
-# 1. Load the Model
-# ---------------------------------------------------------
-# We use the pipeline from Hugging Face directly.
-# This downloads the model automatically on first run.
-print("Loading model... this may take a moment.")
-pipe = pipeline("text-classification", model="pelz-y3mi/phishing-detector")
-# ---------------------------------------------------------
-# 2. Prediction Logic
-# ---------------------------------------------------------
-def predict_phishing(url):
     """
-    Predicts if a URL is phishing or safe using the transformer model.
     """
     if not url:
         return None
-    # Get prediction
-    results = pipe(url)
-    # The pipeline usually returns a list of dicts: [{'label': 'LABEL_1', 'score': 0.99}]
-    top_result = results[0]
-    label = top_result['label']
-    score = top_result['score']
-    # Map LABEL_0/LABEL_1 to human readable text
-    # NOTE: Adjust this mapping if the model output is reversed.
-    # Usually: LABEL_1 = Phishing, LABEL_0 = Legitimate (Safe) for this specific model type
-    if label == "LABEL_1":
-        final_label = "🚨 Phishing"
-        color = "red"
     else:
-        final_label = "✅ Safe"
-        color = "green"
-    return {final_label: score}
-# ---------------------------------------------------------
-# 3. Unique UI Setup (Gradio Blocks)
-# ---------------------------------------------------------
-# Using a specific theme for a professional look
 theme = gr.themes.Soft(
     primary_hue="blue",
     secondary_hue="slate",
-).set(
-    loader_color="#FF0000",
-    slider_selected_color="#FF0000",
 )
-with gr.Blocks(theme=theme, title="PhishGuard AI") as iface:
-    # Header Section
     with gr.Row():
         gr.Markdown(
             """
-            # 🕵️‍♂️ PhishGuard AI
-            ### Advanced Neural Phishing Detection
-            Paste a URL below to analyze it using a Deep Learning Transformer model.
             """
         )
-    # Main Content Section
     with gr.Row():
-        # Left Column: Input
         with gr.Column(scale=1):
             url_input = gr.Textbox(
                 lines=3,
                 placeholder="https://example.com",
-                label="Target URL",
-                info="Enter the full link you want to inspect."
             )
-            submit_btn = gr.Button("Analyze Link 🚀", variant="primary", size="lg")
-            # Examples
             gr.Examples(
                 examples=[
-                    ["https://www.google.com"],
-                    ["http://secure-login-update.com/account"],
-                    ["https://huggingface.co"]
                 ],
                 inputs=url_input
             )
-        # Right Column: Output
         with gr.Column(scale=1):
-            output_label = gr.Label(label="Safety Confidence", num_top_classes=1)
-            # Additional info box
-            gr.Markdown(
-                """
-                > **Note:** This tool uses the `pelz-y3mi/phishing-detector` model.
-                > While highly accurate, always double-check URLs manually before entering sensitive info.
-                """
-            )
-    # Event Listener
     submit_btn.click(
         fn=predict_phishing,
         inputs=url_input,
         outputs=output_label
     )
-# Launch
-iface.launch()

 import gradio as gr
+import torch
+import numpy as np
+# -----------------------------
+# 1. Load Your Specific Model
+# -----------------------------
+MODEL_PATH = "models/phishing_rf_model.pt"
+print(f"Loading model from {MODEL_PATH}...")
+# We use torch.load because the file extension is .pt
+# map_location='cpu' ensures it works on servers without massive GPUs
+try:
+    model = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
+    print("✅ Model loaded successfully!")
+except Exception as e:
+    print(f"❌ Failed to load model: {e}")
+    raise e
+# -----------------------------
+# 2. Feature Extraction (No Scaler)
+# -----------------------------
+def extract_features(url: str) -> np.ndarray:
     """
+    Extracts the features the model expects.
+    Since we are skipping the scaler, we feed these raw numbers directly.
     """
+    length = len(url)
+    dots = url.count('.')
+    hyphens = url.count('-')
+    digits = sum(c.isdigit() for c in url)
+    at_sign = url.count('@')
+    # Create the array shape [1, 5] (1 sample, 5 features)
+    return np.array([[length, dots, hyphens, digits, at_sign]], dtype=float)
+# -----------------------------
+# 3. Prediction Logic
+# -----------------------------
+def predict_phishing(url: str):
     if not url:
         return None
+    # 1. Extract features
+    features = extract_features(url)
+    # 2. Predict
+    # We assume the model inside the .pt file is a standard sklearn model
+    # (RandomForest) that supports .predict_proba()
+    try:
+        pred_prob = model.predict_proba(features)[0]
+    except AttributeError:
+        # Fallback if the model doesn't support probabilities
+        pred = model.predict(features)[0]
+        # Mock probabilities if exact confidence isn't available
+        pred_prob = [1.0, 0.0] if pred == 0 else [0.0, 1.0]
+    # 3. Format Output
+    # Assuming Index 0 = Safe, Index 1 = Phishing
+    label_index = pred_prob.argmax()
+    confidence = float(pred_prob[label_index])
+    if label_index == 1:
+        label = "🚨 Phishing"
     else:
+        label = "✅ Safe"
+    return {label: confidence}
+# -----------------------------
+# 4. Unique Professional UI
+# -----------------------------
 theme = gr.themes.Soft(
     primary_hue="blue",
     secondary_hue="slate",
 )
+with gr.Blocks(theme=theme, title="PhishGuard Local") as iface:
+    # Header
     with gr.Row():
         gr.Markdown(
             """
+            # 🛡️ PhishGuard (Local Model)
+            ### Custom Random Forest Detector
+            Running locally using your `phishing_rf_model.pt` file.
             """
         )
+    # Main Interface
     with gr.Row():
         with gr.Column(scale=1):
             url_input = gr.Textbox(
                 lines=3,
                 placeholder="https://example.com",
+                label="Check URL",
+                info="Paste the link you want to test."
             )
+            submit_btn = gr.Button("Scan URL 🔍", variant="primary")
             gr.Examples(
                 examples=[
+                    ["https://google.com"],
+                    ["http://fake-login-secure.com/update"]
                 ],
                 inputs=url_input
             )
         with gr.Column(scale=1):
+            output_label = gr.Label(label="Result")
+            gr.Markdown("> **Note:** Running without feature scaler. Results depend on raw feature interpretation.")
+    # Actions
     submit_btn.click(
         fn=predict_phishing,
         inputs=url_input,
         outputs=output_label
     )
+iface.launch(share=True)