quantum-drive commited on
Commit
9e9f012
·
verified ·
1 Parent(s): a77754d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -28
app.py CHANGED
@@ -252,43 +252,73 @@ def calculate_malware_risk(features):
252
  # -------------------------------
253
  def get_final_prediction(phishing_pred, malware_pred, phishing_risk, malware_risk):
254
  """
255
- Simple risk-based decision system with whitelist protection:
256
- 1. Check for trusted domains first
257
- 2. Compare risk scores directly
258
- 3. Use higher risk score for final prediction
259
- 4. Add safety thresholds for benign classification
260
  """
261
 
262
- # Safety thresholds
263
- THREAT_THRESHOLD = 30 # Increased minimum score to consider as threat
264
- HIGH_CONFIDENCE_THRESHOLD = 15 # Risk difference for high confidence
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
- # Case 1: Both risks are very low - definitely benign
267
- if phishing_risk < THREAT_THRESHOLD and malware_risk < THREAT_THRESHOLD:
268
- return "Benign", f"Low risk scores (Phishing: {phishing_risk}, Malware: {malware_risk}) - Safe to browse"
269
 
270
- # Case 2: One or both risks are above threshold
271
- risk_difference = abs(phishing_risk - malware_risk)
 
 
 
 
272
 
273
- if phishing_risk > malware_risk:
274
- if phishing_risk >= THREAT_THRESHOLD:
275
- confidence = "High" if risk_difference >= HIGH_CONFIDENCE_THRESHOLD else "Medium"
276
- return "Phishing", f"Phishing risk higher ({phishing_risk} vs {malware_risk}) - {confidence} confidence"
277
  else:
278
- return "Benign", f"Phishing risk slightly higher but below threat threshold ({phishing_risk})"
279
 
280
- elif malware_risk > phishing_risk:
281
- if malware_risk >= THREAT_THRESHOLD:
282
- confidence = "High" if risk_difference >= HIGH_CONFIDENCE_THRESHOLD else "Medium"
283
- return "Malicious", f"Malware risk higher ({malware_risk} vs {phishing_risk}) - {confidence} confidence"
284
  else:
285
- return "Benign", f"Malware risk slightly higher but below threat threshold ({malware_risk})"
286
 
287
- else: # Equal risks
288
- if phishing_risk >= THREAT_THRESHOLD:
289
- return "Suspicious", f"Equal risk scores ({phishing_risk}) - requires manual review"
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  else:
291
- return "Benign", f"Equal low risk scores ({phishing_risk}) - Safe to browse"
292
 
293
  def analyze_url(url):
294
  try:
@@ -308,7 +338,7 @@ def analyze_url(url):
308
  phishing_risk = calculate_phishing_risk(phishing_features)
309
  malware_risk = calculate_malware_risk(malware_features)
310
 
311
- # Get final prediction using simple risk-based system
312
  final_result, decision_reason = get_final_prediction(
313
  phishing_pred, malware_pred, phishing_risk, malware_risk
314
  )
 
252
  # -------------------------------
253
  def get_final_prediction(phishing_pred, malware_pred, phishing_risk, malware_risk):
254
  """
255
+ Enhanced decision system:
256
+ 1. Prioritize model predictions first
257
+ 2. Use risk scores for confidence and tie-breaking
258
+ 3. Whitelist protection for trusted domains
 
259
  """
260
 
261
+ # Trusted domains whitelist (exact match)
262
+ trusted_domains = [
263
+ 'google.com', 'www.google.com', 'facebook.com', 'www.facebook.com',
264
+ 'microsoft.com', 'www.microsoft.com', 'apple.com', 'www.apple.com',
265
+ 'amazon.com', 'www.amazon.com', 'youtube.com', 'www.youtube.com',
266
+ 'twitter.com', 'www.twitter.com', 'linkedin.com', 'www.linkedin.com',
267
+ 'github.com', 'www.github.com', 'stackoverflow.com', 'www.stackoverflow.com'
268
+ ]
269
+
270
+ # Extract domain from URL for whitelist check
271
+ from urllib.parse import urlparse
272
+ try:
273
+ parsed_url = urlparse(url if 'url' in locals() else "")
274
+ domain = parsed_url.netloc.lower()
275
+ if domain in trusted_domains:
276
+ return "Benign", f"Whitelisted trusted domain: {domain}"
277
+ except:
278
+ pass
279
 
280
+ # Model prediction priorities
281
+ RISK_BOOST_THRESHOLD = 15 # Minimum risk to boost model prediction
 
282
 
283
+ # Case 1: Both models detect threats
284
+ if phishing_pred == "Phishing" and malware_pred == "malicious":
285
+ if phishing_risk > malware_risk:
286
+ return "Phishing", f"Both models detected threat - phishing characteristics stronger (Risk: {phishing_risk} vs {malware_risk})"
287
+ else:
288
+ return "Malicious", f"Both models detected threat - malware characteristics stronger (Risk: {malware_risk} vs {phishing_risk})"
289
 
290
+ # Case 2: Only phishing model detects threat
291
+ elif phishing_pred == "Phishing" and malware_pred != "malicious":
292
+ if phishing_risk >= RISK_BOOST_THRESHOLD or phishing_risk > malware_risk:
293
+ return "Phishing", f"Phishing model detected threat with supporting risk indicators (Risk: {phishing_risk})"
294
  else:
295
+ return "Phishing", f"Phishing model detected threat (Risk score: {phishing_risk})"
296
 
297
+ # Case 3: Only malware model detects threat
298
+ elif malware_pred == "malicious" and phishing_pred != "Phishing":
299
+ if malware_risk >= RISK_BOOST_THRESHOLD or malware_risk > phishing_risk:
300
+ return "Malicious", f"Malware model detected threat with supporting risk indicators (Risk: {malware_risk})"
301
  else:
302
+ return "Malicious", f"Malware model detected threat (Risk score: {malware_risk})"
303
 
304
+ # Case 4: Both models report benign - check high risk scores
305
+ else:
306
+ HIGH_RISK_THRESHOLD = 40 # High risk threshold for override
307
+ MEDIUM_RISK_THRESHOLD = 25 # Medium risk threshold
308
+
309
+ if phishing_risk >= HIGH_RISK_THRESHOLD and malware_risk >= HIGH_RISK_THRESHOLD:
310
+ if phishing_risk > malware_risk:
311
+ return "Phishing", f"Models missed but high phishing risk detected ({phishing_risk})"
312
+ else:
313
+ return "Malicious", f"Models missed but high malware risk detected ({malware_risk})"
314
+ elif phishing_risk >= HIGH_RISK_THRESHOLD:
315
+ return "Phishing", f"Models reported benign but high phishing risk indicators ({phishing_risk})"
316
+ elif malware_risk >= HIGH_RISK_THRESHOLD:
317
+ return "Malicious", f"Models reported benign but high malware risk indicators ({malware_risk})"
318
+ elif phishing_risk >= MEDIUM_RISK_THRESHOLD or malware_risk >= MEDIUM_RISK_THRESHOLD:
319
+ return "Suspicious", f"Models reported benign but moderate risk present (P:{phishing_risk}, M:{malware_risk})"
320
  else:
321
+ return "Benign", f"Models and risk analysis confirm safe (P:{phishing_risk}, M:{malware_risk})"
322
 
323
  def analyze_url(url):
324
  try:
 
338
  phishing_risk = calculate_phishing_risk(phishing_features)
339
  malware_risk = calculate_malware_risk(malware_features)
340
 
341
+ # Get final prediction using enhanced model-priority system
342
  final_result, decision_reason = get_final_prediction(
343
  phishing_pred, malware_pred, phishing_risk, malware_risk
344
  )