Update app.py
Browse files
app.py
CHANGED
|
@@ -252,43 +252,73 @@ def calculate_malware_risk(features):
|
|
| 252 |
# -------------------------------
|
| 253 |
def get_final_prediction(phishing_pred, malware_pred, phishing_risk, malware_risk):
|
| 254 |
"""
|
| 255 |
-
|
| 256 |
-
1.
|
| 257 |
-
2.
|
| 258 |
-
3.
|
| 259 |
-
4. Add safety thresholds for benign classification
|
| 260 |
"""
|
| 261 |
|
| 262 |
-
#
|
| 263 |
-
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
-
#
|
| 267 |
-
|
| 268 |
-
return "Benign", f"Low risk scores (Phishing: {phishing_risk}, Malware: {malware_risk}) - Safe to browse"
|
| 269 |
|
| 270 |
-
# Case
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
return "Phishing", f"Phishing
|
| 277 |
else:
|
| 278 |
-
return "
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
return "Malicious", f"Malware
|
| 284 |
else:
|
| 285 |
-
return "
|
| 286 |
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
else:
|
| 291 |
-
return "Benign", f"
|
| 292 |
|
| 293 |
def analyze_url(url):
|
| 294 |
try:
|
|
@@ -308,7 +338,7 @@ def analyze_url(url):
|
|
| 308 |
phishing_risk = calculate_phishing_risk(phishing_features)
|
| 309 |
malware_risk = calculate_malware_risk(malware_features)
|
| 310 |
|
| 311 |
-
# Get final prediction using
|
| 312 |
final_result, decision_reason = get_final_prediction(
|
| 313 |
phishing_pred, malware_pred, phishing_risk, malware_risk
|
| 314 |
)
|
|
|
|
| 252 |
# -------------------------------
|
| 253 |
def get_final_prediction(phishing_pred, malware_pred, phishing_risk, malware_risk):
|
| 254 |
"""
|
| 255 |
+
Enhanced decision system:
|
| 256 |
+
1. Prioritize model predictions first
|
| 257 |
+
2. Use risk scores for confidence and tie-breaking
|
| 258 |
+
3. Whitelist protection for trusted domains
|
|
|
|
| 259 |
"""
|
| 260 |
|
| 261 |
+
# Trusted domains whitelist (exact match)
|
| 262 |
+
trusted_domains = [
|
| 263 |
+
'google.com', 'www.google.com', 'facebook.com', 'www.facebook.com',
|
| 264 |
+
'microsoft.com', 'www.microsoft.com', 'apple.com', 'www.apple.com',
|
| 265 |
+
'amazon.com', 'www.amazon.com', 'youtube.com', 'www.youtube.com',
|
| 266 |
+
'twitter.com', 'www.twitter.com', 'linkedin.com', 'www.linkedin.com',
|
| 267 |
+
'github.com', 'www.github.com', 'stackoverflow.com', 'www.stackoverflow.com'
|
| 268 |
+
]
|
| 269 |
+
|
| 270 |
+
# Extract domain from URL for whitelist check
|
| 271 |
+
from urllib.parse import urlparse
|
| 272 |
+
try:
|
| 273 |
+
parsed_url = urlparse(url if 'url' in locals() else "")
|
| 274 |
+
domain = parsed_url.netloc.lower()
|
| 275 |
+
if domain in trusted_domains:
|
| 276 |
+
return "Benign", f"Whitelisted trusted domain: {domain}"
|
| 277 |
+
except:
|
| 278 |
+
pass
|
| 279 |
|
| 280 |
+
# Model prediction priorities
|
| 281 |
+
RISK_BOOST_THRESHOLD = 15 # Minimum risk to boost model prediction
|
|
|
|
| 282 |
|
| 283 |
+
# Case 1: Both models detect threats
|
| 284 |
+
if phishing_pred == "Phishing" and malware_pred == "malicious":
|
| 285 |
+
if phishing_risk > malware_risk:
|
| 286 |
+
return "Phishing", f"Both models detected threat - phishing characteristics stronger (Risk: {phishing_risk} vs {malware_risk})"
|
| 287 |
+
else:
|
| 288 |
+
return "Malicious", f"Both models detected threat - malware characteristics stronger (Risk: {malware_risk} vs {phishing_risk})"
|
| 289 |
|
| 290 |
+
# Case 2: Only phishing model detects threat
|
| 291 |
+
elif phishing_pred == "Phishing" and malware_pred != "malicious":
|
| 292 |
+
if phishing_risk >= RISK_BOOST_THRESHOLD or phishing_risk > malware_risk:
|
| 293 |
+
return "Phishing", f"Phishing model detected threat with supporting risk indicators (Risk: {phishing_risk})"
|
| 294 |
else:
|
| 295 |
+
return "Phishing", f"Phishing model detected threat (Risk score: {phishing_risk})"
|
| 296 |
|
| 297 |
+
# Case 3: Only malware model detects threat
|
| 298 |
+
elif malware_pred == "malicious" and phishing_pred != "Phishing":
|
| 299 |
+
if malware_risk >= RISK_BOOST_THRESHOLD or malware_risk > phishing_risk:
|
| 300 |
+
return "Malicious", f"Malware model detected threat with supporting risk indicators (Risk: {malware_risk})"
|
| 301 |
else:
|
| 302 |
+
return "Malicious", f"Malware model detected threat (Risk score: {malware_risk})"
|
| 303 |
|
| 304 |
+
# Case 4: Both models report benign - check high risk scores
|
| 305 |
+
else:
|
| 306 |
+
HIGH_RISK_THRESHOLD = 40 # High risk threshold for override
|
| 307 |
+
MEDIUM_RISK_THRESHOLD = 25 # Medium risk threshold
|
| 308 |
+
|
| 309 |
+
if phishing_risk >= HIGH_RISK_THRESHOLD and malware_risk >= HIGH_RISK_THRESHOLD:
|
| 310 |
+
if phishing_risk > malware_risk:
|
| 311 |
+
return "Phishing", f"Models missed but high phishing risk detected ({phishing_risk})"
|
| 312 |
+
else:
|
| 313 |
+
return "Malicious", f"Models missed but high malware risk detected ({malware_risk})"
|
| 314 |
+
elif phishing_risk >= HIGH_RISK_THRESHOLD:
|
| 315 |
+
return "Phishing", f"Models reported benign but high phishing risk indicators ({phishing_risk})"
|
| 316 |
+
elif malware_risk >= HIGH_RISK_THRESHOLD:
|
| 317 |
+
return "Malicious", f"Models reported benign but high malware risk indicators ({malware_risk})"
|
| 318 |
+
elif phishing_risk >= MEDIUM_RISK_THRESHOLD or malware_risk >= MEDIUM_RISK_THRESHOLD:
|
| 319 |
+
return "Suspicious", f"Models reported benign but moderate risk present (P:{phishing_risk}, M:{malware_risk})"
|
| 320 |
else:
|
| 321 |
+
return "Benign", f"Models and risk analysis confirm safe (P:{phishing_risk}, M:{malware_risk})"
|
| 322 |
|
| 323 |
def analyze_url(url):
|
| 324 |
try:
|
|
|
|
| 338 |
phishing_risk = calculate_phishing_risk(phishing_features)
|
| 339 |
malware_risk = calculate_malware_risk(malware_features)
|
| 340 |
|
| 341 |
+
# Get final prediction using enhanced model-priority system
|
| 342 |
final_result, decision_reason = get_final_prediction(
|
| 343 |
phishing_pred, malware_pred, phishing_risk, malware_risk
|
| 344 |
)
|