Perth0603 commited on
Commit
99ed65e
·
verified ·
1 Parent(s): 54fa158

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py CHANGED
@@ -6,6 +6,7 @@ os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache")
6
  os.environ.setdefault("TORCH_HOME", "/data/.cache")
7
 
8
  from typing import Optional, List, Dict, Any
 
9
  import threading
10
  import re
11
  import numpy as np
@@ -87,6 +88,14 @@ _AUTOCALIB_LEGIT_URLS: List[str] = [
87
  "https://www.gov.uk/",
88
  ]
89
 
 
 
 
 
 
 
 
 
90
  # -------------------------
91
  # URL features (must match training)
92
  # -------------------------
@@ -339,6 +348,37 @@ def predict_url(payload: PredictUrlPayload):
339
  "phish_is_positive_env": URL_POSITIVE_CLASS_ENV if URL_POSITIVE_CLASS_ENV else None,
340
  }
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  raw_p_class1_debug: Optional[float] = None
343
 
344
  if isinstance(model_type, str) and model_type == "xgboost_bst":
 
6
  os.environ.setdefault("TORCH_HOME", "/data/.cache")
7
 
8
  from typing import Optional, List, Dict, Any
9
+ from urllib.parse import urlparse
10
  import threading
11
  import re
12
  import numpy as np
 
88
  "https://www.gov.uk/",
89
  ]
90
 
91
+ # Known host overrides (editable): force certain domains as LEGIT or PHISH
92
+ _KNOWN_LEGIT_HOSTS: List[str] = [
93
+ "cjplogger.com",
94
+ "www.cjplogger.com",
95
+ ]
96
+ _KNOWN_PHISH_HOSTS: List[str] = [
97
+ ]
98
+
99
  # -------------------------
100
  # URL features (must match training)
101
  # -------------------------
 
348
  "phish_is_positive_env": URL_POSITIVE_CLASS_ENV if URL_POSITIVE_CLASS_ENV else None,
349
  }
350
 
351
+ # Known-domain override after polarity is resolved
352
+ host = (urlparse(url_str).hostname or "").lower()
353
+ if host:
354
+ override_label: Optional[str] = None
355
+ if host in _KNOWN_LEGIT_HOSTS:
356
+ override_label = "LEGIT"
357
+ elif host in _KNOWN_PHISH_HOSTS:
358
+ override_label = "PHISH"
359
+ if override_label is not None:
360
+ # Map numeric label according to resolved polarity
361
+ predicted_label_numeric = 1 if ((override_label == "PHISH") == bool(phish_is_positive)) else 0
362
+ phish_proba_override = 0.99 if override_label == "PHISH" else 0.01
363
+ score_override = phish_proba_override if override_label == "PHISH" else (1.0 - phish_proba_override)
364
+ return {
365
+ "label": override_label,
366
+ "predicted_label": int(predicted_label_numeric),
367
+ "score": float(score_override),
368
+ "phishing_probability": float(phish_proba_override),
369
+ "backend": str(model_type),
370
+ "threshold": 0.5,
371
+ "override": {
372
+ "reason": "known_host",
373
+ "host": host,
374
+ },
375
+ "phish_is_positive": bool(phish_is_positive),
376
+ "phish_is_positive_bundle": meta_phish_is_positive,
377
+ "phish_is_positive_env": URL_POSITIVE_CLASS_ENV if URL_POSITIVE_CLASS_ENV else None,
378
+ "feature_cols": feature_cols,
379
+ "url_col": url_col,
380
+ }
381
+
382
  raw_p_class1_debug: Optional[float] = None
383
 
384
  if isinstance(model_type, str) and model_type == "xgboost_bst":