padmanabhbosamia commited on
Commit
5253576
Β·
verified Β·
1 Parent(s): e25e49a

Update app.py

Browse files

Extended the changes

Files changed (1) hide show
  1. app.py +420 -0
app.py CHANGED
@@ -224,6 +224,323 @@ def predict_batch(csv_file):
224
  return None, f"❌ Error: {str(e)}"
225
 
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  # Create Gradio interface
228
  with gr.Blocks(title="Fraud Detection System") as demo:
229
 
@@ -334,6 +651,109 @@ with gr.Blocks(title="Fraud Detection System") as demo:
334
  outputs=[download_file, batch_output]
335
  )
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  with gr.Tab("ℹ️ About"):
338
  gr.Markdown("""
339
  ## About This Demo
 
224
  return None, f"❌ Error: {str(e)}"
225
 
226
 
227
+ def calculate_business_impact(total_transactions, fraud_rate_percent, precision, recall,
228
+ fraud_loss_per_transaction, review_cost_per_transaction):
229
+ """Calculate the financial business impact of a fraud detection model"""
230
+
231
+ try:
232
+ # Convert fraud rate to decimal
233
+ fraud_rate = fraud_rate_percent / 100
234
+
235
+ # Calculate actual frauds in the dataset
236
+ total_frauds = int(total_transactions * fraud_rate)
237
+ total_legitimate = total_transactions - total_frauds
238
+
239
+ # Calculate confusion matrix components
240
+ # Recall = TP / (TP + FN) = TP / total_frauds
241
+ # So: TP = recall * total_frauds
242
+ true_positives = int(recall * total_frauds)
243
+ false_negatives = total_frauds - true_positives
244
+
245
+ # Precision = TP / (TP + FP)
246
+ # So: FP = TP / precision - TP = TP * (1/precision - 1)
247
+ false_positives = int(true_positives / precision - true_positives) if precision > 0 else 0
248
+ true_negatives = total_legitimate - false_positives
249
+
250
+ # Calculate financial impact
251
+ # Frauds caught (prevented losses)
252
+ fraud_losses_prevented = true_positives * fraud_loss_per_transaction
253
+
254
+ # Frauds missed (actual losses)
255
+ fraud_losses_incurred = false_negatives * fraud_loss_per_transaction
256
+
257
+ # Review costs (for all flagged transactions)
258
+ total_flagged = true_positives + false_positives
259
+ total_review_costs = total_flagged * review_cost_per_transaction
260
+
261
+ # Net benefit
262
+ net_benefit = fraud_losses_prevented - fraud_losses_incurred - total_review_costs
263
+
264
+ # Without model (baseline - all frauds go through)
265
+ baseline_losses = total_frauds * fraud_loss_per_transaction
266
+ savings_vs_baseline = baseline_losses - fraud_losses_incurred - total_review_costs
267
+
268
+ # Calculate percentages
269
+ fraud_detection_rate = (true_positives / total_frauds * 100) if total_frauds > 0 else 0
270
+ false_positive_rate = (false_positives / total_legitimate * 100) if total_legitimate > 0 else 0
271
+
272
+ # Format results
273
+ results = f"""
274
+ ## πŸ’° Business Impact Analysis
275
+
276
+ ### πŸ“Š Transaction Breakdown
277
+ - **Total Transactions:** {total_transactions:,} per month
278
+ - **Actual Frauds:** {total_frauds:,} ({fraud_rate_percent:.2f}%)
279
+ - **Legitimate Transactions:** {total_legitimate:,} ({100-fraud_rate_percent:.2f}%)
280
+
281
+ ### 🎯 Model Performance
282
+ - **Precision:** {precision*100:.1f}% (of flagged, {precision*100:.1f}% are actually fraud)
283
+ - **Recall:** {recall*100:.1f}% (catches {recall*100:.1f}% of all frauds)
284
+
285
+ ### πŸ” Detection Results
286
+ - **βœ… True Positives (Frauds Caught):** {true_positives:,} ({fraud_detection_rate:.1f}% of frauds)
287
+ - **❌ False Negatives (Frauds Missed):** {false_negatives:,} ({100-fraud_detection_rate:.1f}% of frauds)
288
+ - **⚠️ False Positives (False Alarms):** {false_positives:,} ({false_positive_rate:.2f}% of legitimate)
289
+ - **βœ… True Negatives (Correctly Allowed):** {true_negatives:,}
290
+
291
+ ### πŸ’΅ Financial Impact (Monthly)
292
+
293
+ **Fraud Prevention:**
294
+ - **Losses Prevented:** ${fraud_losses_prevented:,.2f}
295
+ - ({true_positives:,} frauds caught Γ— ${fraud_loss_per_transaction:,.2f})
296
+
297
+ **Losses Incurred:**
298
+ - **Missed Fraud Losses:** ${fraud_losses_incurred:,.2f}
299
+ - ({false_negatives:,} frauds missed Γ— ${fraud_loss_per_transaction:,.2f})
300
+
301
+ **Operational Costs:**
302
+ - **Manual Review Costs:** ${total_review_costs:,.2f}
303
+ - ({total_flagged:,} flagged transactions Γ— ${review_cost_per_transaction:,.2f})
304
+
305
+ ### πŸ“ˆ **Net Benefit: ${net_benefit:,.2f} per month**
306
+
307
+ ### 🎯 **Primary Benefit:**
308
+ **The model saves ${savings_vs_baseline:,.2f} per month compared to having no fraud detection system.**
309
+
310
+ **Annual Impact:** ${net_benefit * 12:,.2f}
311
+
312
+ ### πŸ“Š Key Insights:
313
+ 1. **Fraud Detection Rate:** {fraud_detection_rate:.1f}% of frauds are caught
314
+ 2. **Cost Efficiency:** Every ${total_review_costs/fraud_losses_prevented:.2f} spent on reviews prevents ${fraud_loss_per_transaction:.2f} in fraud
315
+ 3. **ROI:** {((net_benefit / total_review_costs) * 100) if total_review_costs > 0 else 0:.1f}% return on review investment
316
+ 4. **Remaining Risk:** {false_negatives:,} frauds still slip through (${fraud_losses_incurred:,.2f} in losses)
317
+
318
+ ### ⚠️ Recommendations:
319
+ - **Current Recall ({recall*100:.1f}%):** Missing {false_negatives:,} frauds costs ${fraud_losses_incurred:,.2f}/month
320
+ - Consider improving recall to reduce missed frauds
321
+ - Balance precision to control review costs
322
+ """
323
+
324
+ return results
325
+
326
+ except Exception as e:
327
+ return f"❌ Error calculating business impact: {str(e)}"
328
+
329
+
330
+ def analyze_model_drift(initial_precision, current_precision, months_deployed,
331
+ initial_recall, current_recall):
332
+ """Analyze model drift and provide recommendations"""
333
+
334
+ try:
335
+ precision_drop = initial_precision - current_precision
336
+ precision_drop_pct = (precision_drop / initial_precision * 100) if initial_precision > 0 else 0
337
+
338
+ recall_change = current_recall - initial_recall
339
+ recall_change_pct = (recall_change / initial_recall * 100) if initial_recall > 0 else 0
340
+
341
+ # Determine severity
342
+ if precision_drop_pct > 20:
343
+ severity = "πŸ”΄ CRITICAL"
344
+ urgency = "Immediate action required"
345
+ elif precision_drop_pct > 10:
346
+ severity = "🟠 HIGH"
347
+ urgency = "Action needed within 1-2 weeks"
348
+ else:
349
+ severity = "🟑 MODERATE"
350
+ urgency = "Monitor closely, plan retraining"
351
+
352
+ # Most likely causes (in order of probability)
353
+ causes = []
354
+ if precision_drop_pct > 15:
355
+ causes.append({
356
+ "rank": 1,
357
+ "cause": "**Data Drift / Distribution Shift**",
358
+ "description": "The statistical distribution of incoming transactions has changed. Legitimate customer behavior patterns have shifted (e.g., new spending habits, new products, seasonal changes, post-pandemic behavior changes).",
359
+ "probability": "Very High (80-90%)"
360
+ })
361
+ else:
362
+ causes.append({
363
+ "rank": 1,
364
+ "cause": "**Data Drift / Distribution Shift**",
365
+ "description": "Gradual changes in transaction patterns over time.",
366
+ "probability": "High (70-80%)"
367
+ })
368
+
369
+ causes.append({
370
+ "rank": 2,
371
+ "cause": "**Concept Drift**",
372
+ "description": "The relationship between features and fraud has changed. Fraudsters have adapted their tactics to evade detection, or new fraud patterns have emerged that weren't in training data.",
373
+ "probability": "Medium-High (50-60%)"
374
+ })
375
+
376
+ causes.append({
377
+ "rank": 3,
378
+ "cause": "**Feature Drift**",
379
+ "description": "Individual features have changed meaning or distribution. Examples: new payment methods, changes in merchant categories, updated transaction processing systems.",
380
+ "probability": "Medium (30-40%)"
381
+ })
382
+
383
+ causes.append({
384
+ "rank": 4,
385
+ "cause": "**Label Quality Issues**",
386
+ "description": "Ground truth labels may have become less accurate, or fraud definition has changed. This is less common but can cause apparent precision drops.",
387
+ "probability": "Low (10-20%)"
388
+ })
389
+
390
+ # Appropriate actions
391
+ actions = [
392
+ {
393
+ "priority": "πŸ”΄ IMMEDIATE",
394
+ "action": "**Data Distribution Analysis**",
395
+ "steps": [
396
+ "Compare feature distributions of recent data vs training data",
397
+ "Use statistical tests (KS test, PSI - Population Stability Index)",
398
+ "Identify which features have drifted most significantly",
399
+ "Check for missing values, outliers, or data quality issues"
400
+ ]
401
+ },
402
+ {
403
+ "priority": "πŸ”΄ IMMEDIATE",
404
+ "action": "**Model Retraining**",
405
+ "steps": [
406
+ "Collect recent labeled data (last 1-3 months)",
407
+ "Retrain model with updated dataset",
408
+ "Use time-based train/test splits (not random)",
409
+ "Consider ensemble with older model for stability",
410
+ "Validate on holdout set before deployment"
411
+ ]
412
+ },
413
+ {
414
+ "priority": "🟠 HIGH",
415
+ "action": "**Implement Monitoring**",
416
+ "steps": [
417
+ "Set up automated drift detection (PSI, feature drift alerts)",
418
+ "Track precision/recall on rolling windows (daily/weekly)",
419
+ "Monitor false positive rate trends",
420
+ "Alert when metrics drop below thresholds",
421
+ "Dashboard for real-time model health"
422
+ ]
423
+ },
424
+ {
425
+ "priority": "🟠 HIGH",
426
+ "action": "**Threshold Adjustment**",
427
+ "steps": [
428
+ "Temporarily adjust classification threshold to maintain precision",
429
+ "Use probability scores instead of binary predictions",
430
+ "Implement adaptive thresholds based on recent performance",
431
+ "Balance precision vs recall based on business needs"
432
+ ]
433
+ },
434
+ {
435
+ "priority": "🟑 MEDIUM",
436
+ "action": "**Feature Engineering Updates**",
437
+ "steps": [
438
+ "Review and update feature engineering logic",
439
+ "Add new features that capture current fraud patterns",
440
+ "Remove obsolete features",
441
+ "Consider interaction features or time-based features"
442
+ ]
443
+ },
444
+ {
445
+ "priority": "🟑 MEDIUM",
446
+ "action": "**Continuous Learning Pipeline**",
447
+ "steps": [
448
+ "Implement periodic retraining schedule (monthly/quarterly)",
449
+ "Use online learning or incremental updates if applicable",
450
+ "A/B test new model versions before full deployment",
451
+ "Maintain model versioning and rollback capability"
452
+ ]
453
+ }
454
+ ]
455
+
456
+ # Calculate impact
457
+ # Assuming same parameters as before for impact calculation
458
+ # This is a simplified impact - in reality you'd need full business params
459
+ impact_note = "⚠️ Lower precision means more false positives, increasing review costs and customer friction."
460
+
461
+ # Format results
462
+ results = f"""
463
+ ## πŸ” Model Drift Analysis
464
+
465
+ ### πŸ“‰ Performance Degradation
466
+ - **Initial Precision:** {initial_precision*100:.1f}%
467
+ - **Current Precision:** {current_precision*100:.1f}%
468
+ - **Precision Drop:** {precision_drop*100:.1f} percentage points ({precision_drop_pct:.1f}% relative decrease)
469
+ - **Deployment Duration:** {months_deployed} months
470
+
471
+ - **Initial Recall:** {initial_recall*100:.1f}%
472
+ - **Current Recall:** {current_recall*100:.1f}%
473
+ - **Recall Change:** {recall_change*100:+.1f} percentage points ({recall_change_pct:+.1f}% relative change)
474
+
475
+ ### {severity} - {urgency}
476
+
477
+ ---
478
+
479
+ ## 🎯 Most Likely Cause
480
+
481
+ ### {causes[0]['rank']}. {causes[0]['cause']}
482
+ **Probability:** {causes[0]['probability']}
483
+
484
+ **Explanation:**
485
+ {causes[0]['description']}
486
+
487
+ **Why This Matters:**
488
+ - Lower precision = More false positives
489
+ - More legitimate transactions flagged for review
490
+ - Increased operational costs and customer friction
491
+ - Model is becoming less reliable over time
492
+
493
+ ---
494
+
495
+ ## πŸ”§ Appropriate Actions (Priority Order)
496
+
497
+ """
498
+
499
+ for action in actions:
500
+ results += f"""
501
+ ### {action['priority']} {action['action']}
502
+ """
503
+ for i, step in enumerate(action['steps'], 1):
504
+ results += f"{i}. {step}\n"
505
+ results += "\n"
506
+
507
+ results += f"""
508
+ ---
509
+
510
+ ## πŸ“Š Additional Considerations
511
+
512
+ ### Why Precision Drops Are Critical:
513
+ 1. **Financial Impact:** More false positives = higher review costs
514
+ 2. **Customer Experience:** Legitimate customers face more friction
515
+ 3. **Operational Burden:** Review teams overwhelmed with false alarms
516
+ 4. **Trust Erosion:** Model loses credibility if too many false alarms
517
+
518
+ ### Prevention Strategy:
519
+ - **Proactive Monitoring:** Don't wait for metrics to drop
520
+ - **Regular Retraining:** Schedule periodic model updates (every 1-3 months)
521
+ - **Data Quality:** Ensure incoming data matches training data characteristics
522
+ - **Feedback Loops:** Incorporate labeled outcomes back into training data
523
+
524
+ ### Expected Timeline:
525
+ - **Immediate (Week 1):** Data analysis, threshold adjustment
526
+ - **Short-term (Weeks 2-4):** Model retraining, validation
527
+ - **Long-term (Ongoing):** Continuous monitoring, scheduled retraining
528
+
529
+ ---
530
+
531
+ ## πŸ’‘ Key Takeaway
532
+
533
+ **The most likely cause is DATA DRIFT** - your model was trained on data from 3+ months ago, and transaction patterns have changed. The model needs to be retrained on recent data to adapt to current patterns.
534
+
535
+ **Action:** Implement a retraining pipeline with recent labeled data and set up continuous monitoring to catch drift early.
536
+ """
537
+
538
+ return results
539
+
540
+ except Exception as e:
541
+ return f"❌ Error analyzing model drift: {str(e)}"
542
+
543
+
544
  # Create Gradio interface
545
  with gr.Blocks(title="Fraud Detection System") as demo:
546
 
 
651
  outputs=[download_file, batch_output]
652
  )
653
 
654
+ with gr.Tab("πŸ’° Business Impact Calculator"):
655
+ gr.Markdown("### Calculate Financial Impact of Your Fraud Detection Model")
656
+ gr.Markdown("Enter your model's performance metrics and business parameters to see the financial impact")
657
+
658
+ with gr.Row():
659
+ with gr.Column():
660
+ gr.Markdown("#### πŸ“Š Model Performance Metrics")
661
+ precision_input = gr.Slider(0, 1, step=0.01, value=0.85, label="Precision (0-1)", info="Of flagged transactions, what % are actually fraud?")
662
+ recall_input = gr.Slider(0, 1, step=0.01, value=0.90, label="Recall (0-1)", info="Of all frauds, what % does the model catch?")
663
+
664
+ gr.Markdown("#### 🏦 Business Parameters")
665
+ total_transactions = gr.Number(label="Total Transactions per Month", value=1000000, precision=0)
666
+ fraud_rate = gr.Slider(0, 10, step=0.01, value=1.0, label="Fraud Rate (%)", info="Percentage of transactions that are fraudulent")
667
+
668
+ gr.Markdown("#### πŸ’΅ Cost Parameters")
669
+ fraud_loss = gr.Number(label="Average Fraud Loss per Transaction ($)", value=500, precision=2)
670
+ review_cost = gr.Number(label="Manual Review Cost per Flagged Transaction ($)", value=2.00, precision=2)
671
+
672
+ calc_button = gr.Button("πŸ’° Calculate Business Impact", variant="primary", size="lg")
673
+
674
+ with gr.Column():
675
+ impact_output = gr.Markdown(label="Business Impact Analysis")
676
+
677
+ calc_button.click(
678
+ fn=calculate_business_impact,
679
+ inputs=[total_transactions, fraud_rate, precision_input, recall_input, fraud_loss, review_cost],
680
+ outputs=[impact_output]
681
+ )
682
+
683
+ gr.Markdown("---")
684
+ gr.Markdown("""
685
+ ### πŸ“š How to Use This Calculator
686
+
687
+ **Example Scenario:**
688
+ - Bank processes 1 million transactions/month
689
+ - Model has 85% precision and 90% recall
690
+ - 1% of transactions are fraudulent
691
+ - Average fraud loss: $500 per transaction
692
+ - Manual review cost: $2 per flagged transaction
693
+
694
+ **What This Calculates:**
695
+ 1. **True Positives:** Frauds caught by the model
696
+ 2. **False Negatives:** Frauds missed (costly!)
697
+ 3. **False Positives:** Legitimate transactions flagged (review costs)
698
+ 4. **Net Benefit:** Total financial impact of using the model
699
+
700
+ **Key Insight:** The primary benefit is the **net savings** compared to having no fraud detection system.
701
+ """)
702
+
703
+ with gr.Tab("πŸ“‰ Model Drift Analysis"):
704
+ gr.Markdown("### Analyze Model Performance Degradation")
705
+ gr.Markdown("If your model's precision or recall has dropped over time, use this tool to identify likely causes and appropriate actions")
706
+
707
+ with gr.Row():
708
+ with gr.Column():
709
+ gr.Markdown("#### πŸ“Š Initial Performance (At Deployment)")
710
+ initial_precision = gr.Slider(0, 1, step=0.01, value=0.85, label="Initial Precision", info="Model precision when first deployed")
711
+ initial_recall = gr.Slider(0, 1, step=0.01, value=0.90, label="Initial Recall", info="Model recall when first deployed")
712
+
713
+ gr.Markdown("#### πŸ“‰ Current Performance (Now)")
714
+ current_precision = gr.Slider(0, 1, step=0.01, value=0.70, label="Current Precision", info="Model precision after deployment period")
715
+ current_recall = gr.Slider(0, 1, step=0.01, value=0.90, label="Current Recall", info="Model recall now (may have changed)")
716
+
717
+ gr.Markdown("#### ⏱️ Deployment Information")
718
+ months_deployed = gr.Number(label="Months Since Deployment", value=3, precision=1, info="How long has the model been in production?")
719
+
720
+ analyze_button = gr.Button("πŸ” Analyze Model Drift", variant="primary", size="lg")
721
+
722
+ with gr.Column():
723
+ drift_output = gr.Markdown(label="Drift Analysis & Recommendations")
724
+
725
+ analyze_button.click(
726
+ fn=analyze_model_drift,
727
+ inputs=[initial_precision, current_precision, months_deployed, initial_recall, current_recall],
728
+ outputs=[drift_output]
729
+ )
730
+
731
+ gr.Markdown("---")
732
+ gr.Markdown("""
733
+ ### πŸ“š Understanding Model Drift
734
+
735
+ **What is Model Drift?**
736
+ Model drift occurs when a machine learning model's performance degrades over time because the data it encounters in production differs from the data it was trained on.
737
+
738
+ **Common Scenarios:**
739
+ - **Precision drops from 85% to 70%** β†’ More false positives (legitimate transactions flagged)
740
+ - **Recall drops** β†’ More frauds missed (false negatives)
741
+ - **Both drop** β†’ Model is becoming unreliable
742
+
743
+ **Why It Happens:**
744
+ 1. Customer behavior changes (new spending patterns, seasonal trends)
745
+ 2. Fraudsters adapt their tactics
746
+ 3. New products/services introduced
747
+ 4. Changes in transaction processing systems
748
+ 5. External factors (economic changes, regulations)
749
+
750
+ **Example:**
751
+ After 3 months, precision drops from 85% to 70%. This means:
752
+ - Previously: 85 out of 100 flagged transactions were fraud
753
+ - Now: Only 70 out of 100 flagged transactions are fraud
754
+ - **30% increase in false positives** = Higher review costs, customer friction
755
+ """)
756
+
757
  with gr.Tab("ℹ️ About"):
758
  gr.Markdown("""
759
  ## About This Demo