Upload app.py
Browse files
app.py
CHANGED
|
@@ -385,6 +385,48 @@ def predict_url(payload: PredictUrlPayload):
|
|
| 385 |
"url_col": url_col,
|
| 386 |
}
|
| 387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
# Typosquat guard: mirror notebook fallback logic.
|
| 389 |
try:
|
| 390 |
s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()
|
|
|
|
| 385 |
"url_col": url_col,
|
| 386 |
}
|
| 387 |
|
| 388 |
+
# Lookalike character guard: detect homoglyph/lookalike attacks
|
| 389 |
+
try:
|
| 390 |
+
# Cyrillic characters that look like ASCII letters
|
| 391 |
+
lookalikes_cyrillic = {
|
| 392 |
+
'а': 'a', 'е': 'e', 'о': 'o', 'р': 'p', 'с': 'c', 'х': 'x',
|
| 393 |
+
'у': 'y', 'ч': '4', 'ы': 'b', 'ь': 'b', 'і': 'i', 'ї': 'yi',
|
| 394 |
+
'ґ': 'g', 'ė': 'e', 'ń': 'n', 'ș': 's', 'ț': 't'
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
# Greek characters that look like ASCII letters
|
| 398 |
+
lookalikes_greek = {
|
| 399 |
+
'α': 'a', 'ο': 'o', 'ν': 'v', 'τ': 't', 'ρ': 'p'
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
# Latin Extended lookalikes
|
| 403 |
+
lookalikes_latin = {
|
| 404 |
+
'ɑ': 'a', 'ɢ': 'g', 'ᴅ': 'd', 'ɡ': 'g', 'ɪ': 'i',
|
| 405 |
+
'ɴ': 'n', 'ᴘ': 'p', 'ᴠ': 'v', 'ᴡ': 'w', 'ɨ': 'i'
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
all_lookalikes = {**lookalikes_cyrillic, **lookalikes_greek, **lookalikes_latin}
|
| 409 |
+
|
| 410 |
+
for char in url_str:
|
| 411 |
+
if char in all_lookalikes:
|
| 412 |
+
phish_is_positive = True if URL_POSITIVE_CLASS_ENV == "" else (URL_POSITIVE_CLASS_ENV == "PHISH")
|
| 413 |
+
label = "PHISH"
|
| 414 |
+
predicted_label = 1 if ((label == "PHISH") == phish_is_positive) else 0
|
| 415 |
+
phish_proba = 0.95
|
| 416 |
+
score = phish_proba
|
| 417 |
+
return {
|
| 418 |
+
"label": label,
|
| 419 |
+
"predicted_label": int(predicted_label),
|
| 420 |
+
"score": float(score),
|
| 421 |
+
"phishing_probability": float(phish_proba),
|
| 422 |
+
"backend": "lookalike_guard",
|
| 423 |
+
"threshold": 0.5,
|
| 424 |
+
"url_col": url_col,
|
| 425 |
+
"rule": "lookalike_character_detected",
|
| 426 |
+
}
|
| 427 |
+
except Exception:
|
| 428 |
+
pass
|
| 429 |
+
|
| 430 |
# Typosquat guard: mirror notebook fallback logic.
|
| 431 |
try:
|
| 432 |
s_host = (urlparse(_ensure_scheme(url_str)).hostname or "").lower()
|