Spaces:

Perth0603
/

Random-Forest-Model-for-PhishingDetection

Sleeping

App Files Files Community

Perth0603 commited on Oct 4, 2025

Commit

99ed65e

verified ·

1 Parent(s): 54fa158

Upload app.py

Browse files

Files changed (1) hide show

app.py +40 -0

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache")
 os.environ.setdefault("TORCH_HOME", "/data/.cache")
 from typing import Optional, List, Dict, Any
 import threading
 import re
 import numpy as np
@@ -87,6 +88,14 @@ _AUTOCALIB_LEGIT_URLS: List[str] = [
     "https://www.gov.uk/",
 ]
 # -------------------------
 # URL features (must match training)
 # -------------------------
@@ -339,6 +348,37 @@ def predict_url(payload: PredictUrlPayload):
             "phish_is_positive_env": URL_POSITIVE_CLASS_ENV if URL_POSITIVE_CLASS_ENV else None,
         }
         raw_p_class1_debug: Optional[float] = None
         if isinstance(model_type, str) and model_type == "xgboost_bst":

 os.environ.setdefault("TORCH_HOME", "/data/.cache")
 from typing import Optional, List, Dict, Any
+from urllib.parse import urlparse
 import threading
 import re
 import numpy as np
     "https://www.gov.uk/",
 ]
+# Known host overrides (editable): force certain domains as LEGIT or PHISH
+_KNOWN_LEGIT_HOSTS: List[str] = [
+    "cjplogger.com",
+    "www.cjplogger.com",
+]
+_KNOWN_PHISH_HOSTS: List[str] = [
+]
 # -------------------------
 # URL features (must match training)
 # -------------------------
             "phish_is_positive_env": URL_POSITIVE_CLASS_ENV if URL_POSITIVE_CLASS_ENV else None,
         }
+        # Known-domain override after polarity is resolved
+        host = (urlparse(url_str).hostname or "").lower()
+        if host:
+            override_label: Optional[str] = None
+            if host in _KNOWN_LEGIT_HOSTS:
+                override_label = "LEGIT"
+            elif host in _KNOWN_PHISH_HOSTS:
+                override_label = "PHISH"
+            if override_label is not None:
+                # Map numeric label according to resolved polarity
+                predicted_label_numeric = 1 if ((override_label == "PHISH") == bool(phish_is_positive)) else 0
+                phish_proba_override = 0.99 if override_label == "PHISH" else 0.01
+                score_override = phish_proba_override if override_label == "PHISH" else (1.0 - phish_proba_override)
+                return {
+                    "label": override_label,
+                    "predicted_label": int(predicted_label_numeric),
+                    "score": float(score_override),
+                    "phishing_probability": float(phish_proba_override),
+                    "backend": str(model_type),
+                    "threshold": 0.5,
+                    "override": {
+                        "reason": "known_host",
+                        "host": host,
+                    },
+                    "phish_is_positive": bool(phish_is_positive),
+                    "phish_is_positive_bundle": meta_phish_is_positive,
+                    "phish_is_positive_env": URL_POSITIVE_CLASS_ENV if URL_POSITIVE_CLASS_ENV else None,
+                    "feature_cols": feature_cols,
+                    "url_col": url_col,
+                }
         raw_p_class1_debug: Optional[float] = None
         if isinstance(model_type, str) and model_type == "xgboost_bst":