Upload app.py
Browse files
app.py
CHANGED
|
@@ -160,6 +160,10 @@ def _load_url_model():
|
|
| 160 |
_url_bundle = joblib.load(model_path)
|
| 161 |
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
@app.get("/")
|
| 164 |
def root():
|
| 165 |
return {"status": "ok", "backend": "url-only"}
|
|
@@ -188,6 +192,28 @@ def predict_url(payload: PredictUrlPayload):
|
|
| 188 |
if not url_str:
|
| 189 |
return JSONResponse(status_code=400, content={"error": "Empty url"})
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
# Known-host override (suffix match)
|
| 192 |
host = (urlparse(url_str).hostname or "").lower()
|
| 193 |
if host and host_map:
|
|
|
|
| 160 |
_url_bundle = joblib.load(model_path)
|
| 161 |
|
| 162 |
|
| 163 |
+
def _normalize_url_string(url: str) -> str:
|
| 164 |
+
return (url or "").strip().rstrip("/")
|
| 165 |
+
|
| 166 |
+
|
| 167 |
@app.get("/")
|
| 168 |
def root():
|
| 169 |
return {"status": "ok", "backend": "url-only"}
|
|
|
|
| 192 |
if not url_str:
|
| 193 |
return JSONResponse(status_code=400, content={"error": "Empty url"})
|
| 194 |
|
| 195 |
+
# URL-level override via CSV lists (normalized exact match, ignoring trailing slash)
|
| 196 |
+
norm_url = _normalize_url_string(url_str)
|
| 197 |
+
phishy_set = { _normalize_url_string(u) for u in phishy_list }
|
| 198 |
+
legit_set = { _normalize_url_string(u) for u in legit_list }
|
| 199 |
+
|
| 200 |
+
if norm_url in phishy_set or norm_url in legit_set:
|
| 201 |
+
phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
|
| 202 |
+
label = "PHISH" if norm_url in phishy_set else "LEGIT"
|
| 203 |
+
predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0
|
| 204 |
+
phish_proba = 0.99 if label == "PHISH" else 0.01
|
| 205 |
+
score = phish_proba if label == "PHISH" else (1.0 - phish_proba)
|
| 206 |
+
return {
|
| 207 |
+
"label": label,
|
| 208 |
+
"predicted_label": int(predicted_label),
|
| 209 |
+
"score": float(score),
|
| 210 |
+
"phishing_probability": float(phish_proba),
|
| 211 |
+
"backend": str(model_type),
|
| 212 |
+
"threshold": 0.5,
|
| 213 |
+
"url_col": url_col,
|
| 214 |
+
"override": {"reason": "csv_url_match"},
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
# Known-host override (suffix match)
|
| 218 |
host = (urlparse(url_str).hostname or "").lower()
|
| 219 |
if host and host_map:
|