Perth0603 commited on
Commit
6e01a39
·
verified ·
1 Parent(s): aafb4b7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py CHANGED
@@ -385,6 +385,48 @@ def predict_url(payload: PredictUrlPayload):
385
  "url_col": url_col,
386
  }
387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  # Typosquat guard: mirror notebook fallback logic.
389
  try:
390
  s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()
 
385
  "url_col": url_col,
386
  }
387
 
388
+ # Lookalike character guard: detect homoglyph/lookalike attacks
389
+ try:
390
+ # Cyrillic characters that look like ASCII letters
391
+ lookalikes_cyrillic = {
392
+ 'а': 'a', 'е': 'e', 'о': 'o', 'р': 'p', 'с': 'c', 'х': 'x',
393
+ 'у': 'y', 'ч': '4', 'ы': 'b', 'ь': 'b', 'і': 'i', 'ї': 'yi',
394
+ 'ґ': 'g', 'ė': 'e', 'ń': 'n', 'ș': 's', 'ț': 't'
395
+ }
396
+
397
+ # Greek characters that look like ASCII letters
398
+ lookalikes_greek = {
399
+ 'α': 'a', 'ο': 'o', 'ν': 'v', 'τ': 't', 'ρ': 'p'
400
+ }
401
+
402
+ # Latin Extended lookalikes
403
+ lookalikes_latin = {
404
+ 'ɑ': 'a', 'ɢ': 'g', 'ᴅ': 'd', 'ɡ': 'g', 'ɪ': 'i',
405
+ 'ɴ': 'n', 'ᴘ': 'p', 'ᴠ': 'v', 'ᴡ': 'w', 'ɨ': 'i'
406
+ }
407
+
408
+ all_lookalikes = {**lookalikes_cyrillic, **lookalikes_greek, **lookalikes_latin}
409
+
410
+ for char in url_str:
411
+ if char in all_lookalikes:
412
+ phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
413
+ label = "PHISH"
414
+ predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0
415
+ phish_proba = 0.95
416
+ score = phish_proba
417
+ return {
418
+ "label": label,
419
+ "predicted_label": int(predicted_label),
420
+ "score": float(score),
421
+ "phishing_probability": float(phish_proba),
422
+ "backend": "lookalike_guard",
423
+ "threshold": 0.5,
424
+ "url_col": url_col,
425
+ "rule": "lookalike_character_detected",
426
+ }
427
+ except Exception:
428
+ pass
429
+
430
  # Typosquat guard: mirror notebook fallback logic.
431
  try:
432
  s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()