YashChowdhary's picture
Update app.py
f1b0880 verified
"""
Auto Insurance Claims Fraud Detection
=====================================
A machine learning application that trains and compares 4 different models
for detecting fraudulent insurance claims.
Models: XGBoost, LightGBM, Random Forest, Logistic Regression
"""
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# ML Libraries
from sklearn.model_selection import cross_val_score
from sklearn.metrics import (
precision_recall_curve, roc_curve, auc,
confusion_matrix, classification_report,
f1_score, precision_score, recall_score, accuracy_score
)
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
# ============================================================================
# PLOT STYLE CONFIGURATION
# Use white background for universal readability in both light and dark modes
# ============================================================================
def setup_plot_style():
"""Configure matplotlib for clean, readable plots."""
plt.rcParams.update({
'figure.facecolor': 'white',
'axes.facecolor': 'white',
'axes.edgecolor': '#333333',
'axes.labelcolor': '#333333',
'text.color': '#333333',
'xtick.color': '#333333',
'ytick.color': '#333333',
'grid.color': '#cccccc',
'grid.alpha': 0.5,
'legend.facecolor': 'white',
'legend.edgecolor': '#cccccc',
'font.size': 11,
'axes.titlesize': 14,
'axes.labelsize': 12,
})
setup_plot_style()
# Color palette - vibrant colors that work on white background
COLORS = {
'primary': '#2563eb', # Blue
'success': '#16a34a', # Green
'danger': '#dc2626', # Red
'warning': '#f59e0b', # Amber
'purple': '#9333ea', # Purple
'cyan': '#0891b2', # Cyan
}
# ============================================================================
# DATA LOADING AND PREPROCESSING
# ============================================================================
def load_and_prepare_data():
"""Load the train and test datasets."""
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
X_train = train_df.drop('fraud', axis=1)
y_train = train_df['fraud']
X_test = test_df.drop('fraud', axis=1)
y_test = test_df['fraud']
return X_train, X_test, y_train, y_test, train_df, test_df
def apply_smote(X_train, y_train):
"""Apply SMOTE to handle class imbalance."""
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
return X_resampled, y_resampled
# ============================================================================
# MODEL DEFINITIONS
# ============================================================================
def get_models():
"""Define the 4 models for comparison."""
models = {
'XGBoost': XGBClassifier(
n_estimators=100,
max_depth=4,
learning_rate=0.1,
scale_pos_weight=10,
random_state=42,
use_label_encoder=False,
eval_metric='logloss'
),
'LightGBM': LGBMClassifier(
n_estimators=100,
max_depth=4,
learning_rate=0.1,
class_weight='balanced',
random_state=42,
verbose=-1
),
'Random Forest': RandomForestClassifier(
n_estimators=100,
max_depth=6,
class_weight='balanced',
random_state=42,
n_jobs=-1
),
'Logistic Regression': LogisticRegression(
class_weight='balanced',
max_iter=1000,
random_state=42
)
}
return models
# ============================================================================
# MODEL TRAINING AND EVALUATION
# ============================================================================
def train_model(model, X_train, y_train):
"""Train a model."""
model.fit(X_train, y_train)
return model
def evaluate_model(model, X_test, y_test):
"""Get predictions and probabilities."""
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]
return y_pred, y_proba
def get_metrics(y_test, y_pred, y_proba):
"""Calculate evaluation metrics."""
metrics = {
'Accuracy': accuracy_score(y_test, y_pred),
'Precision': precision_score(y_test, y_pred, zero_division=0),
'Recall': recall_score(y_test, y_pred, zero_division=0),
'F1 Score': f1_score(y_test, y_pred, zero_division=0),
'ROC AUC': auc(*roc_curve(y_test, y_proba)[:2])
}
return metrics
def find_optimal_threshold(y_test, y_proba):
"""Find optimal threshold using F1 score."""
thresholds = np.arange(0.1, 0.9, 0.01)
f1_scores = []
for thresh in thresholds:
y_pred_thresh = (y_proba >= thresh).astype(int)
f1 = f1_score(y_test, y_pred_thresh, zero_division=0)
f1_scores.append(f1)
best_idx = np.argmax(f1_scores)
best_threshold = thresholds[best_idx]
best_f1 = f1_scores[best_idx]
return best_threshold, best_f1, thresholds, f1_scores
# ============================================================================
# VISUALIZATION FUNCTIONS
# ============================================================================
def plot_precision_recall_curve(y_test, y_proba, model_name):
"""Plot Precision-Recall curve."""
setup_plot_style()
precision, recall, _ = precision_recall_curve(y_test, y_proba)
pr_auc = auc(recall, precision)
fig, ax = plt.subplots(figsize=(9, 6))
ax.plot(recall, precision, color=COLORS['primary'], linewidth=2.5,
label=f'{model_name} (AUC = {pr_auc:.3f})')
ax.fill_between(recall, precision, alpha=0.2, color=COLORS['primary'])
# Baseline
baseline = y_test.mean()
ax.axhline(y=baseline, color=COLORS['danger'], linestyle='--', linewidth=2,
label=f'Random Baseline = {baseline:.3f}')
ax.set_xlabel('Recall (Fraud Detection Rate)', fontweight='bold')
ax.set_ylabel('Precision (True Fraud Rate)', fontweight='bold')
ax.set_title(f'Precision-Recall Curve: {model_name}', fontsize=15, fontweight='bold', pad=15)
ax.legend(loc='upper right', fontsize=11, framealpha=0.95)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.4)
plt.tight_layout()
return fig
def plot_roc_curve(y_test, y_proba, model_name):
"""Plot ROC curve."""
setup_plot_style()
fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
fig, ax = plt.subplots(figsize=(9, 6))
ax.plot(fpr, tpr, color=COLORS['primary'], linewidth=2.5,
label=f'{model_name} (AUC = {roc_auc:.3f})')
ax.fill_between(fpr, tpr, alpha=0.2, color=COLORS['primary'])
ax.plot([0, 1], [0, 1], color=COLORS['danger'], linestyle='--', linewidth=2,
label='Random Classifier')
ax.set_xlabel('False Positive Rate', fontweight='bold')
ax.set_ylabel('True Positive Rate (Recall)', fontweight='bold')
ax.set_title(f'ROC Curve: {model_name}', fontsize=15, fontweight='bold', pad=15)
ax.legend(loc='lower right', fontsize=11, framealpha=0.95)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.4)
plt.tight_layout()
return fig
def plot_confusion_matrix(y_test, y_pred, model_name):
"""Plot confusion matrix heatmap."""
setup_plot_style()
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(9, 7))
# Use a colormap with good contrast
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
xticklabels=['Legitimate', 'Fraud'],
yticklabels=['Legitimate', 'Fraud'],
annot_kws={'size': 18, 'fontweight': 'bold'},
linewidths=2, linecolor='white',
cbar_kws={'label': 'Count', 'shrink': 0.8})
ax.set_xlabel('Predicted Label', fontweight='bold', fontsize=12)
ax.set_ylabel('True Label', fontweight='bold', fontsize=12)
ax.set_title(f'Confusion Matrix: {model_name}', fontsize=15, fontweight='bold', pad=15)
# Summary box
tn, fp, fn, tp = cm.ravel()
summary = f"True Neg: {tn:,}\nFalse Pos: {fp:,}\nFalse Neg: {fn:,}\nTrue Pos: {tp:,}"
ax.text(1.25, 0.5, summary, transform=ax.transAxes, fontsize=11,
verticalalignment='center', fontfamily='monospace',
bbox=dict(boxstyle='round,pad=0.5', facecolor='#f0f0f0', edgecolor='#cccccc'))
plt.tight_layout()
return fig
def plot_feature_importance(model, feature_names, model_name):
"""Plot top 15 most important features."""
setup_plot_style()
fig, ax = plt.subplots(figsize=(10, 8))
# Get feature importances
if hasattr(model, 'feature_importances_'):
importances = model.feature_importances_
elif hasattr(model, 'coef_'):
importances = np.abs(model.coef_[0])
else:
ax.text(0.5, 0.5, 'Feature importance not available',
ha='center', va='center', fontsize=14)
ax.set_facecolor('white')
return fig
# Create and sort dataframe
importance_df = pd.DataFrame({
'Feature': feature_names,
'Importance': importances
}).sort_values('Importance', ascending=True).tail(15)
# Gradient blue bars
colors = plt.cm.Blues(np.linspace(0.4, 0.85, len(importance_df)))
bars = ax.barh(importance_df['Feature'], importance_df['Importance'], color=colors, edgecolor='#333333', linewidth=0.5)
# Add value labels
for bar, val in zip(bars, importance_df['Importance']):
ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2,
f'{val:.3f}', va='center', fontsize=9)
ax.set_xlabel('Importance Score', fontweight='bold')
ax.set_title(f'Top 15 Feature Importances: {model_name}', fontsize=15, fontweight='bold', pad=15)
ax.grid(True, alpha=0.4, axis='x')
plt.tight_layout()
return fig
def plot_threshold_analysis(y_test, y_proba, model_name):
"""Plot threshold analysis."""
setup_plot_style()
thresholds = np.arange(0.05, 0.95, 0.01)
precisions, recalls, f1_scores = [], [], []
for thresh in thresholds:
y_pred_thresh = (y_proba >= thresh).astype(int)
precisions.append(precision_score(y_test, y_pred_thresh, zero_division=0))
recalls.append(recall_score(y_test, y_pred_thresh, zero_division=0))
f1_scores.append(f1_score(y_test, y_pred_thresh, zero_division=0))
best_idx = np.argmax(f1_scores)
best_threshold = thresholds[best_idx]
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(thresholds, precisions, color=COLORS['primary'], linewidth=2.5, label='Precision')
ax.plot(thresholds, recalls, color=COLORS['success'], linewidth=2.5, label='Recall')
ax.plot(thresholds, f1_scores, color=COLORS['danger'], linewidth=2.5, label='F1 Score')
ax.axvline(x=best_threshold, color=COLORS['warning'], linestyle='--', linewidth=2,
label=f'Optimal = {best_threshold:.2f}')
ax.axvline(x=0.5, color='#888888', linestyle=':', linewidth=1.5, label='Default (0.5)')
# Mark optimal point
ax.scatter([best_threshold], [f1_scores[best_idx]], color=COLORS['warning'], s=100, zorder=5)
ax.set_xlabel('Classification Threshold', fontweight='bold')
ax.set_ylabel('Score', fontweight='bold')
ax.set_title(f'Threshold Analysis: {model_name}', fontsize=15, fontweight='bold', pad=15)
ax.legend(loc='center right', fontsize=11, framealpha=0.95)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.4)
plt.tight_layout()
return fig
def plot_class_distribution(train_df, test_df):
"""Plot class distribution with clear, readable labels."""
setup_plot_style()
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
colors = [COLORS['success'], COLORS['danger']]
explode = (0, 0.08)
# Training data
train_fraud = train_df['fraud'].sum()
train_legit = len(train_df) - train_fraud
train_sizes = [train_legit, train_fraud]
train_pct = [train_legit/len(train_df)*100, train_fraud/len(train_df)*100]
wedges1, texts1, autotexts1 = axes[0].pie(
train_sizes,
explode=explode,
colors=colors,
autopct='%1.1f%%',
startangle=90,
shadow=False,
wedgeprops={'edgecolor': 'white', 'linewidth': 2}
)
# Style the percentage text
for autotext in autotexts1:
autotext.set_color('white')
autotext.set_fontsize(14)
autotext.set_fontweight('bold')
axes[0].set_title('Training Data Distribution', fontsize=14, fontweight='bold', pad=10)
# Add legend with counts
axes[0].legend(
wedges1,
[f'Legitimate: {train_legit:,} ({train_pct[0]:.1f}%)',
f'Fraud: {train_fraud:,} ({train_pct[1]:.1f}%)'],
loc='lower center',
bbox_to_anchor=(0.5, -0.15),
fontsize=11,
framealpha=0.95
)
# Test data
test_fraud = test_df['fraud'].sum()
test_legit = len(test_df) - test_fraud
test_sizes = [test_legit, test_fraud]
test_pct = [test_legit/len(test_df)*100, test_fraud/len(test_df)*100]
wedges2, texts2, autotexts2 = axes[1].pie(
test_sizes,
explode=explode,
colors=colors,
autopct='%1.1f%%',
startangle=90,
shadow=False,
wedgeprops={'edgecolor': 'white', 'linewidth': 2}
)
for autotext in autotexts2:
autotext.set_color('white')
autotext.set_fontsize(14)
autotext.set_fontweight('bold')
axes[1].set_title('Test Data Distribution', fontsize=14, fontweight='bold', pad=10)
axes[1].legend(
wedges2,
[f'Legitimate: {test_legit:,} ({test_pct[0]:.1f}%)',
f'Fraud: {test_fraud:,} ({test_pct[1]:.1f}%)'],
loc='lower center',
bbox_to_anchor=(0.5, -0.15),
fontsize=11,
framealpha=0.95
)
fig.suptitle('Class Imbalance in Fraud Detection Dataset', fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
return fig
def plot_model_comparison(all_metrics):
"""Bar chart comparing all models."""
setup_plot_style()
fig, ax = plt.subplots(figsize=(12, 6))
models_list = list(all_metrics.keys())
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC']
x = np.arange(len(metrics))
width = 0.2
colors = [COLORS['primary'], COLORS['success'], COLORS['danger'], COLORS['purple']]
for i, model in enumerate(models_list):
values = [all_metrics[model][m] for m in metrics]
bars = ax.bar(x + i*width, values, width, label=model, color=colors[i],
edgecolor='white', linewidth=0.5)
# Add value labels
for bar, v in zip(bars, values):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
f'{v:.2f}', ha='center', va='bottom', fontsize=9, fontweight='bold')
ax.set_ylabel('Score', fontweight='bold')
ax.set_title('Model Performance Comparison', fontsize=15, fontweight='bold', pad=15)
ax.set_xticks(x + width * 1.5)
ax.set_xticklabels(metrics, fontweight='bold')
ax.legend(loc='upper right', fontsize=10, framealpha=0.95)
ax.set_ylim([0, 1.15])
ax.grid(True, alpha=0.4, axis='y')
plt.tight_layout()
return fig
# ============================================================================
# LOAD DATA AND TRAIN MODELS
# ============================================================================
print("Loading data...")
X_train, X_test, y_train, y_test, train_df, test_df = load_and_prepare_data()
print("Applying SMOTE to handle class imbalance...")
X_train_balanced, y_train_balanced = apply_smote(X_train, y_train)
print("Training models...")
models = get_models()
trained_models = {}
all_metrics = {}
all_predictions = {}
all_probabilities = {}
for name, model in models.items():
print(f" Training {name}...")
trained_models[name] = train_model(model, X_train_balanced, y_train_balanced)
y_pred, y_proba = evaluate_model(trained_models[name], X_test, y_test)
all_predictions[name] = y_pred
all_probabilities[name] = y_proba
all_metrics[name] = get_metrics(y_test, y_pred, y_proba)
print("Models trained successfully!")
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
def get_data_overview():
"""Dataset summary."""
return f"""
## Dataset Overview
### Training Data
- **Total Samples:** {len(train_df):,}
- **Fraud Cases:** {train_df['fraud'].sum():,} ({train_df['fraud'].mean()*100:.2f}%)
- **Legitimate Cases:** {(train_df['fraud']==0).sum():,} ({(1-train_df['fraud'].mean())*100:.2f}%)
### Test Data
- **Total Samples:** {len(test_df):,}
- **Fraud Cases:** {test_df['fraud'].sum():,} ({test_df['fraud'].mean()*100:.2f}%)
- **Legitimate Cases:** {(test_df['fraud']==0).sum():,} ({(1-test_df['fraud'].mean())*100:.2f}%)
### Features
- **Number of Features:** {X_train.shape[1]}
- **Feature Types:** All numeric (pre-processed)
### Class Imbalance Handling
- Applied **SMOTE** (Synthetic Minority Over-sampling Technique)
- Training samples after SMOTE: {len(X_train_balanced):,}
"""
def update_model_display(model_name):
"""Update metrics when model is selected."""
metrics = all_metrics[model_name]
y_pred = all_predictions[model_name]
y_proba = all_probabilities[model_name]
best_thresh, best_f1, _, _ = find_optimal_threshold(y_test, y_proba)
metrics_text = f"""
## {model_name} Performance
| Metric | Score |
|--------|-------|
| **Accuracy** | {metrics['Accuracy']:.4f} |
| **Precision** | {metrics['Precision']:.4f} |
| **Recall** | {metrics['Recall']:.4f} |
| **F1 Score** | {metrics['F1 Score']:.4f} |
| **ROC AUC** | {metrics['ROC AUC']:.4f} |
### Threshold Optimization
- **Default Threshold:** 0.50
- **Optimal Threshold:** {best_thresh:.2f}
- **F1 at Optimal:** {best_f1:.4f}
"""
report = classification_report(y_test, y_pred, target_names=['Legitimate', 'Fraud'])
report_text = f"```\n{report}\n```"
return metrics_text, report_text
def get_selected_plot(model_name, plot_type):
"""Generate selected plot."""
y_proba = all_probabilities[model_name]
y_pred = all_predictions[model_name]
if plot_type == "Precision-Recall Curve":
return plot_precision_recall_curve(y_test, y_proba, model_name)
elif plot_type == "ROC Curve":
return plot_roc_curve(y_test, y_proba, model_name)
elif plot_type == "Confusion Matrix":
return plot_confusion_matrix(y_test, y_pred, model_name)
elif plot_type == "Feature Importance":
return plot_feature_importance(trained_models[model_name], X_train.columns, model_name)
elif plot_type == "Threshold Analysis":
return plot_threshold_analysis(y_test, y_proba, model_name)
return None
def get_comparison_results():
"""Generate comparison."""
comparison_df = pd.DataFrame(all_metrics).T.round(4)
best_models = comparison_df.idxmax()
summary = "## Best Model by Metric\n\n| Metric | Best Model | Score |\n|--------|------------|-------|\n"
for metric in comparison_df.columns:
best = best_models[metric]
score = comparison_df.loc[best, metric]
summary += f"| {metric} | **{best}** | {score:.4f} |\n"
return comparison_df.to_markdown(), summary, plot_model_comparison(all_metrics)
def update_threshold_plot(model_name):
"""Update threshold plot."""
return plot_threshold_analysis(y_test, all_probabilities[model_name], model_name)
# Build UI
with gr.Blocks(title="Auto Insurance Fraud Detection", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸš— Auto Insurance Claims Fraud Detection
Machine learning models for detecting fraudulent auto insurance claims.
**Models:** XGBoost | LightGBM | Random Forest | Logistic Regression
""")
with gr.Tabs():
# Tab 1: Data Overview
with gr.TabItem("πŸ“Š Data Overview"):
gr.Markdown(get_data_overview())
gr.Plot(value=plot_class_distribution(train_df, test_df))
# Tab 2: Model Evaluation
with gr.TabItem("🎯 Model Evaluation"):
with gr.Row():
model_selector = gr.Dropdown(
choices=list(models.keys()),
value="XGBoost",
label="Select Model"
)
plot_selector = gr.Dropdown(
choices=["Precision-Recall Curve", "ROC Curve", "Confusion Matrix",
"Feature Importance", "Threshold Analysis"],
value="Precision-Recall Curve",
label="Select Visualization"
)
with gr.Row():
with gr.Column(scale=1):
metrics_display = gr.Markdown()
report_display = gr.Markdown()
with gr.Column(scale=2):
plot_display = gr.Plot()
def update_all(model_name, plot_type):
metrics, report = update_model_display(model_name)
plot = get_selected_plot(model_name, plot_type)
return metrics, report, plot
model_selector.change(fn=update_all, inputs=[model_selector, plot_selector],
outputs=[metrics_display, report_display, plot_display])
plot_selector.change(fn=update_all, inputs=[model_selector, plot_selector],
outputs=[metrics_display, report_display, plot_display])
demo.load(fn=update_all, inputs=[model_selector, plot_selector],
outputs=[metrics_display, report_display, plot_display])
# Tab 3: Compare Models
with gr.TabItem("πŸ“ˆ Compare Models"):
comparison_table, comparison_summary, comparison_plot = get_comparison_results()
gr.Markdown("## All Models Performance Comparison")
gr.Markdown(comparison_summary)
gr.Markdown(comparison_table)
gr.Plot(value=comparison_plot)
# Tab 4: Threshold
with gr.TabItem("βš–οΈ Threshold Optimization"):
gr.Markdown("""
## Finding the Optimal Threshold
The default 0.5 threshold often isn't optimal for imbalanced data.
We balance **Recall** (catching frauds) vs **Precision** (avoiding false alarms).
""")
thresh_model = gr.Dropdown(choices=list(models.keys()), value="XGBoost",
label="Select Model")
thresh_plot = gr.Plot()
thresh_model.change(fn=update_threshold_plot, inputs=[thresh_model], outputs=[thresh_plot])
demo.load(fn=update_threshold_plot, inputs=[thresh_model], outputs=[thresh_plot])
# Thresholds table
thresh_summary = "### Optimal Thresholds\n\n| Model | Threshold | F1 Score |\n|-------|-----------|----------|\n"
for name in models.keys():
opt_t, opt_f1, _, _ = find_optimal_threshold(y_test, all_probabilities[name])
thresh_summary += f"| {name} | {opt_t:.2f} | {opt_f1:.4f} |\n"
gr.Markdown(thresh_summary)
# Tab 5: About
with gr.TabItem("ℹ️ About"):
gr.Markdown("""
## About This Project
### Business Context
Auto insurance fraud costs billions annually. This tool flags potentially fraudulent claims.
### Models
- **XGBoost:** Gradient boosting, excellent for tabular data
- **LightGBM:** Fast, memory-efficient gradient boosting
- **Random Forest:** Robust ensemble method
- **Logistic Regression:** Interpretable baseline
### Key Metrics
- **Precision:** Of flagged claims, how many are actually fraud?
- **Recall:** Of actual frauds, how many did we catch?
- **F1 Score:** Balance of precision and recall
""")
if __name__ == "__main__":
demo.launch()