Upload app.py
Browse files
app.py
CHANGED
|
@@ -346,8 +346,7 @@ def predict_url(payload: PredictUrlPayload):
|
|
| 346 |
"url_col": url_col,
|
| 347 |
}
|
| 348 |
|
| 349 |
-
# Typosquat guard:
|
| 350 |
-
# short-circuit as phishing with high confidence to match notebook demos.
|
| 351 |
try:
|
| 352 |
s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()
|
| 353 |
s_sld = s_host.split(".")[-2] if "." in s_host else s_host
|
|
@@ -365,12 +364,14 @@ def predict_url(payload: PredictUrlPayload):
|
|
| 365 |
except Exception:
|
| 366 |
from difflib import SequenceMatcher
|
| 367 |
return SequenceMatcher(None, a, b).ratio()
|
| 368 |
-
|
| 369 |
-
if s_clean and not is_exact:
|
| 370 |
best = 0.0
|
| 371 |
for b in brands:
|
| 372 |
best = max(best, _sim(s_clean, _normalize_brand(b)))
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
| 374 |
phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
|
| 375 |
label = "PHISH"
|
| 376 |
predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0
|
|
|
|
| 346 |
"url_col": url_col,
|
| 347 |
}
|
| 348 |
|
| 349 |
+
# Typosquat guard: mirror notebook fallback logic.
|
|
|
|
| 350 |
try:
|
| 351 |
s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()
|
| 352 |
s_sld = s_host.split(".")[-2] if "." in s_host else s_host
|
|
|
|
| 364 |
except Exception:
|
| 365 |
from difflib import SequenceMatcher
|
| 366 |
return SequenceMatcher(None, a, b).ratio()
|
| 367 |
+
if s_clean:
|
|
|
|
| 368 |
best = 0.0
|
| 369 |
for b in brands:
|
| 370 |
best = max(best, _sim(s_clean, _normalize_brand(b)))
|
| 371 |
+
has_digits = bool(re.search(r"\d", s_sld))
|
| 372 |
+
has_hyphen = ("-" in s_sld)
|
| 373 |
+
is_official = any(s_host.endswith(f"{_normalize_brand(b)}.com") for b in brands)
|
| 374 |
+
if (best >= 0.90) and (has_digits or has_hyphen) and (not is_official):
|
| 375 |
phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
|
| 376 |
label = "PHISH"
|
| 377 |
predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0
|