Perth0603 commited on
Commit
7734d6e
·
verified ·
1 Parent(s): bdde6ee

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -0
app.py CHANGED
@@ -160,6 +160,10 @@ def _load_url_model():
160
  _url_bundle = joblib.load(model_path)
161
 
162
 
 
 
 
 
163
  @app.get("/")
164
  def root():
165
  return {"status": "ok", "backend": "url-only"}
@@ -188,6 +192,28 @@ def predict_url(payload: PredictUrlPayload):
188
  if not url_str:
189
  return JSONResponse(status_code=400, content={"error": "Empty url"})
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  # Known-host override (suffix match)
192
  host = (urlparse(url_str).hostname or "").lower()
193
  if host and host_map:
 
160
  _url_bundle = joblib.load(model_path)
161
 
162
 
163
+ def _normalize_url_string(url: str) -> str:
164
+ return (url or "").strip().rstrip("/")
165
+
166
+
167
  @app.get("/")
168
  def root():
169
  return {"status": "ok", "backend": "url-only"}
 
192
  if not url_str:
193
  return JSONResponse(status_code=400, content={"error": "Empty url"})
194
 
195
+ # URL-level override via CSV lists (normalized exact match, ignoring trailing slash)
196
+ norm_url = _normalize_url_string(url_str)
197
+ phishy_set = { _normalize_url_string(u) for u in phishy_list }
198
+ legit_set = { _normalize_url_string(u) for u in legit_list }
199
+
200
+ if norm_url in phishy_set or norm_url in legit_set:
201
+ phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
202
+ label = "PHISH" if norm_url in phishy_set else "LEGIT"
203
+ predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0
204
+ phish_proba = 0.99 if label == "PHISH" else 0.01
205
+ score = phish_proba if label == "PHISH" else (1.0 - phish_proba)
206
+ return {
207
+ "label": label,
208
+ "predicted_label": int(predicted_label),
209
+ "score": float(score),
210
+ "phishing_probability": float(phish_proba),
211
+ "backend": str(model_type),
212
+ "threshold": 0.5,
213
+ "url_col": url_col,
214
+ "override": {"reason": "csv_url_match"},
215
+ }
216
+
217
  # Known-host override (suffix match)
218
  host = (urlparse(url_str).hostname or "").lower()
219
  if host and host_map: