Perth0603 commited on
Commit
0319812
·
verified ·
1 Parent(s): 3c180d3

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -346,8 +346,7 @@ def predict_url(payload: PredictUrlPayload):
346
  "url_col": url_col,
347
  }
348
 
349
- # Typosquat guard: if SLD is very similar to common brands but not exact,
350
- # short-circuit as phishing with high confidence to match notebook demos.
351
  try:
352
  s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()
353
  s_sld = s_host.split(".")[-2] if "." in s_host else s_host
@@ -365,12 +364,14 @@ def predict_url(payload: PredictUrlPayload):
365
  except Exception:
366
  from difflib import SequenceMatcher
367
  return SequenceMatcher(None, a, b).ratio()
368
- is_exact = any(s_clean == _normalize_brand(b) for b in brands)
369
- if s_clean and not is_exact:
370
  best = 0.0
371
  for b in brands:
372
  best = max(best, _sim(s_clean, _normalize_brand(b)))
373
- if best >= 0.82:
 
 
 
374
  phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
375
  label = "PHISH"
376
  predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0
 
346
  "url_col": url_col,
347
  }
348
 
349
+ # Typosquat guard: mirror notebook fallback logic.
 
350
  try:
351
  s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()
352
  s_sld = s_host.split(".")[-2] if "." in s_host else s_host
 
364
  except Exception:
365
  from difflib import SequenceMatcher
366
  return SequenceMatcher(None, a, b).ratio()
367
+ if s_clean:
 
368
  best = 0.0
369
  for b in brands:
370
  best = max(best, _sim(s_clean, _normalize_brand(b)))
371
+ has_digits = bool(re.search(r"\d", s_sld))
372
+ has_hyphen = ("-" in s_sld)
373
+ is_official = any(s_host.endswith(f"{_normalize_brand(b)}.com") for b in brands)
374
+ if (best >= 0.90) and (has_digits or has_hyphen) and (not is_official):
375
  phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
376
  label = "PHISH"
377
  predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0