Spaces:

Pant0x
/

Phishing_URL_Detector

Sleeping

App Files Files Community

Pant0x commited on 28 days ago

Commit

91c1466

verified ·

1 Parent(s): 350a1b3

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -77

app.py CHANGED Viewed

@@ -1,106 +1,125 @@
 import gradio as gr
 import torch
 import numpy as np
-# -----------------------------
-# 1. Load Model (Robust)
-# -----------------------------
 MODEL_PATH = "models/phishing_rf_model.pt"
-print(f"Attempting to load model from {MODEL_PATH}...")
 try:
-    # Load the model file
-    model = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
-    print(f"✅ Model loaded successfully! Type: {type(model)}")
 except Exception as e:
-    print(f"❌ Failed to load model: {e}")
-    model = None
-# -----------------------------
-# 2. Prediction Logic (Universal)
-# -----------------------------
 def predict_phishing(url):
-    # Safety checks
-    if model is None:
-        return {"Error": 0.0}, "Model failed to load. Check logs."
     if not url:
         return None, "Please enter a URL."
     try:
-        # --- A. Extract Features ---
-        length = len(url)
-        dots = url.count('.')
-        hyphens = url.count('-')
-        digits = sum(c.isdigit() for c in url)
-        at_sign = url.count('@')
-        # Raw features list
-        features_list = [length, dots, hyphens, digits, at_sign]
-        # --- B. Smart Detection & Prediction ---
-        # CASE 1: It is a Scikit-Learn Model (Random Forest, etc.)
-        if hasattr(model, "predict_proba"):
-            # Sklearn expects a Numpy Array
-            input_data = np.array([features_list], dtype=float)
-            pred_prob = model.predict_proba(input_data)[0]
-            # Usually: Index 0 = Safe, Index 1 = Phishing
-            safe_score = float(pred_prob[0])
-            phish_score = float(pred_prob[1])
-        # CASE 2: It is a PyTorch Neural Network
-        elif isinstance(model, torch.nn.Module):
-            model.eval() # Set to evaluation mode
-            # PyTorch expects a Tensor
-            input_tensor = torch.tensor([features_list], dtype=torch.float32)
-            with torch.no_grad():
-                logits = model(input_tensor)
-                # Check output shape to decide between Softmax or Sigmoid
-                if logits.shape[1] == 1:
-                    # Binary output (Sigmoid)
-                    phish_score = torch.sigmoid(logits).item()
-                    safe_score = 1.0 - phish_score
-                else:
-                    # Multi-class output (Softmax)
-                    probs = torch.nn.functional.softmax(logits, dim=1)
-                    safe_score = float(probs[0][0])
-                    phish_score = float(probs[0][1])
-        else:
-            return {"Error": 0}, f"Unknown model type: {type(model)}"
-        # Return results
-        return {"✅ Safe": safe_score, "🚨 Phishing": phish_score}, "Success"
     except Exception as e:
-        # This catches the specific error and shows it in the UI
-        error_msg = f"Crash Error: {str(e)}"
-        print(error_msg)
-        return {"Error": 0}, error_msg
-# -----------------------------
-# 3. UI Setup
-# -----------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
-    gr.Markdown("# 🛡️ PhishGuard Debugger")
     with gr.Row():
-        input_box = gr.Textbox(label="URL", placeholder="https://google.com")
-        predict_btn = gr.Button("Scan", variant="primary")
     with gr.Row():
-        # We use two outputs: one for the label, one for the error message
-        output_label = gr.Label(label="Prediction")
-        status_box = gr.Textbox(label="Debug Status (Read this if error)", interactive=False)
-    predict_btn.click(
-        fn=predict_phishing,
-        inputs=input_box,
-        outputs=[output_label, status_box]
     )
 iface.launch()

 import gradio as gr
 import torch
+import torch.nn as nn
+import pickle
 import numpy as np
+# ---------------------------------------------------------
+# 1. Define the Neural Network Architecture
+# ---------------------------------------------------------
+# Since your file is an OrderedDict, we must define the class
+# that matches the layers inside it.
+# I am assuming a standard 5-input architecture based on your feature extractor.
+class PhishingNet(nn.Module):
+    def __init__(self, input_size=5, hidden_size=10, output_size=2):
+        super(PhishingNet, self).__init__()
+        self.fc1 = nn.Linear(input_size, hidden_size)
+        self.relu = nn.ReLU()
+        self.fc2 = nn.Linear(hidden_size, output_size)
+    def forward(self, x):
+        out = self.fc1(x)
+        out = self.relu(out)
+        out = self.fc2(out)
+        return out
+# ---------------------------------------------------------
+# 2. Load Resources (Model + Scaler)
+# ---------------------------------------------------------
 MODEL_PATH = "models/phishing_rf_model.pt"
+SCALER_PATH = "models/scaler.pkl"
+model = None
+scaler = None
+load_status = ""
 try:
+    # --- Load Scaler ---
+    with open(SCALER_PATH, "rb") as f:
+        scaler = pickle.load(f)
+    load_status += "✅ Scaler loaded.\n"
+    # --- Load Model Weights ---
+    # We load the weights (OrderedDict)
+    state_dict = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
+    # We create the structure.
+    # NOTE: If this crashes with "Shape Mismatch", we will catch it below.
+    model = PhishingNet()
+    model.load_state_dict(state_dict)
+    model.eval() # Set to evaluation mode
+    load_status += "✅ Model weights loaded into Neural Net.\n"
 except Exception as e:
+    load_status += f"❌ LOAD ERROR: {str(e)}\n"
+    print(load_status)
+# ---------------------------------------------------------
+# 3. Feature Extraction (Must match your Scaler!)
+# ---------------------------------------------------------
+def extract_features(url: str) -> np.ndarray:
+    length = len(url)
+    dots = url.count('.')
+    hyphens = url.count('-')
+    digits = sum(c.isdigit() for c in url)
+    at_sign = url.count('@')
+    # Return shape [1, 5]
+    return np.array([[length, dots, hyphens, digits, at_sign]], dtype=float)
+# ---------------------------------------------------------
+# 4. Prediction Logic
+# ---------------------------------------------------------
 def predict_phishing(url):
+    # Check if things loaded correctly
+    if model is None or scaler is None:
+        return {"Error": 0}, f"System not ready.\n{load_status}"
     if not url:
         return None, "Please enter a URL."
     try:
+        # 1. Extract
+        features = extract_features(url)
+        # 2. Scale (Using your scaler)
+        features_scaled = scaler.transform(features)
+        # 3. Convert to Torch Tensor
+        features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
+        # 4. Predict
+        with torch.no_grad():
+            logits = model(features_tensor)
+            probs = torch.nn.functional.softmax(logits, dim=1)
+            # Assume Index 0 = Safe, Index 1 = Phishing
+            safe_conf = float(probs[0][0])
+            phish_conf = float(probs[0][1])
+        return {"✅ Safe": safe_conf, "🚨 Phishing": phish_conf}, "Success"
     except Exception as e:
+        return {"Error": 0}, f"Prediction Failed: {str(e)}"
+# ---------------------------------------------------------
+# 5. UI Setup
+# ---------------------------------------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
+    gr.Markdown("# 🛡️ PhishScope (Custom Model)")
     with gr.Row():
+        url_input = gr.Textbox(label="URL to Check", placeholder="https://example.com")
+        submit_btn = gr.Button("Analyze", variant="primary")
     with gr.Row():
+        label_output = gr.Label(label="Result")
+        debug_output = gr.Textbox(label="System Status", value=load_status, lines=4)
+    submit_btn.click(
+        fn=predict_phishing,
+        inputs=url_input,
+        outputs=[label_output, debug_output]
     )
 iface.launch()