""" Auto Insurance Claims Fraud Detection ===================================== A machine learning application that trains and compares 4 different models for detecting fraudulent insurance claims. Models: XGBoost, LightGBM, Random Forest, Logistic Regression """ import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import warnings warnings.filterwarnings('ignore') # ML Libraries from sklearn.model_selection import cross_val_score from sklearn.metrics import ( precision_recall_curve, roc_curve, auc, confusion_matrix, classification_report, f1_score, precision_score, recall_score, accuracy_score ) from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from xgboost import XGBClassifier from lightgbm import LGBMClassifier from imblearn.over_sampling import SMOTE # ============================================================================ # PLOT STYLE CONFIGURATION # Use white background for universal readability in both light and dark modes # ============================================================================ def setup_plot_style(): """Configure matplotlib for clean, readable plots.""" plt.rcParams.update({ 'figure.facecolor': 'white', 'axes.facecolor': 'white', 'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333', 'xtick.color': '#333333', 'ytick.color': '#333333', 'grid.color': '#cccccc', 'grid.alpha': 0.5, 'legend.facecolor': 'white', 'legend.edgecolor': '#cccccc', 'font.size': 11, 'axes.titlesize': 14, 'axes.labelsize': 12, }) setup_plot_style() # Color palette - vibrant colors that work on white background COLORS = { 'primary': '#2563eb', # Blue 'success': '#16a34a', # Green 'danger': '#dc2626', # Red 'warning': '#f59e0b', # Amber 'purple': '#9333ea', # Purple 'cyan': '#0891b2', # Cyan } # ============================================================================ # DATA LOADING AND PREPROCESSING # ============================================================================ def load_and_prepare_data(): """Load the train and test datasets.""" train_df = pd.read_csv('train.csv') test_df = pd.read_csv('test.csv') X_train = train_df.drop('fraud', axis=1) y_train = train_df['fraud'] X_test = test_df.drop('fraud', axis=1) y_test = test_df['fraud'] return X_train, X_test, y_train, y_test, train_df, test_df def apply_smote(X_train, y_train): """Apply SMOTE to handle class imbalance.""" smote = SMOTE(random_state=42) X_resampled, y_resampled = smote.fit_resample(X_train, y_train) return X_resampled, y_resampled # ============================================================================ # MODEL DEFINITIONS # ============================================================================ def get_models(): """Define the 4 models for comparison.""" models = { 'XGBoost': XGBClassifier( n_estimators=100, max_depth=4, learning_rate=0.1, scale_pos_weight=10, random_state=42, use_label_encoder=False, eval_metric='logloss' ), 'LightGBM': LGBMClassifier( n_estimators=100, max_depth=4, learning_rate=0.1, class_weight='balanced', random_state=42, verbose=-1 ), 'Random Forest': RandomForestClassifier( n_estimators=100, max_depth=6, class_weight='balanced', random_state=42, n_jobs=-1 ), 'Logistic Regression': LogisticRegression( class_weight='balanced', max_iter=1000, random_state=42 ) } return models # ============================================================================ # MODEL TRAINING AND EVALUATION # ============================================================================ def train_model(model, X_train, y_train): """Train a model.""" model.fit(X_train, y_train) return model def evaluate_model(model, X_test, y_test): """Get predictions and probabilities.""" y_pred = model.predict(X_test) y_proba = model.predict_proba(X_test)[:, 1] return y_pred, y_proba def get_metrics(y_test, y_pred, y_proba): """Calculate evaluation metrics.""" metrics = { 'Accuracy': accuracy_score(y_test, y_pred), 'Precision': precision_score(y_test, y_pred, zero_division=0), 'Recall': recall_score(y_test, y_pred, zero_division=0), 'F1 Score': f1_score(y_test, y_pred, zero_division=0), 'ROC AUC': auc(*roc_curve(y_test, y_proba)[:2]) } return metrics def find_optimal_threshold(y_test, y_proba): """Find optimal threshold using F1 score.""" thresholds = np.arange(0.1, 0.9, 0.01) f1_scores = [] for thresh in thresholds: y_pred_thresh = (y_proba >= thresh).astype(int) f1 = f1_score(y_test, y_pred_thresh, zero_division=0) f1_scores.append(f1) best_idx = np.argmax(f1_scores) best_threshold = thresholds[best_idx] best_f1 = f1_scores[best_idx] return best_threshold, best_f1, thresholds, f1_scores # ============================================================================ # VISUALIZATION FUNCTIONS # ============================================================================ def plot_precision_recall_curve(y_test, y_proba, model_name): """Plot Precision-Recall curve.""" setup_plot_style() precision, recall, _ = precision_recall_curve(y_test, y_proba) pr_auc = auc(recall, precision) fig, ax = plt.subplots(figsize=(9, 6)) ax.plot(recall, precision, color=COLORS['primary'], linewidth=2.5, label=f'{model_name} (AUC = {pr_auc:.3f})') ax.fill_between(recall, precision, alpha=0.2, color=COLORS['primary']) # Baseline baseline = y_test.mean() ax.axhline(y=baseline, color=COLORS['danger'], linestyle='--', linewidth=2, label=f'Random Baseline = {baseline:.3f}') ax.set_xlabel('Recall (Fraud Detection Rate)', fontweight='bold') ax.set_ylabel('Precision (True Fraud Rate)', fontweight='bold') ax.set_title(f'Precision-Recall Curve: {model_name}', fontsize=15, fontweight='bold', pad=15) ax.legend(loc='upper right', fontsize=11, framealpha=0.95) ax.set_xlim([0, 1]) ax.set_ylim([0, 1]) ax.grid(True, alpha=0.4) plt.tight_layout() return fig def plot_roc_curve(y_test, y_proba, model_name): """Plot ROC curve.""" setup_plot_style() fpr, tpr, _ = roc_curve(y_test, y_proba) roc_auc = auc(fpr, tpr) fig, ax = plt.subplots(figsize=(9, 6)) ax.plot(fpr, tpr, color=COLORS['primary'], linewidth=2.5, label=f'{model_name} (AUC = {roc_auc:.3f})') ax.fill_between(fpr, tpr, alpha=0.2, color=COLORS['primary']) ax.plot([0, 1], [0, 1], color=COLORS['danger'], linestyle='--', linewidth=2, label='Random Classifier') ax.set_xlabel('False Positive Rate', fontweight='bold') ax.set_ylabel('True Positive Rate (Recall)', fontweight='bold') ax.set_title(f'ROC Curve: {model_name}', fontsize=15, fontweight='bold', pad=15) ax.legend(loc='lower right', fontsize=11, framealpha=0.95) ax.set_xlim([0, 1]) ax.set_ylim([0, 1]) ax.grid(True, alpha=0.4) plt.tight_layout() return fig def plot_confusion_matrix(y_test, y_pred, model_name): """Plot confusion matrix heatmap.""" setup_plot_style() cm = confusion_matrix(y_test, y_pred) fig, ax = plt.subplots(figsize=(9, 7)) # Use a colormap with good contrast sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax, xticklabels=['Legitimate', 'Fraud'], yticklabels=['Legitimate', 'Fraud'], annot_kws={'size': 18, 'fontweight': 'bold'}, linewidths=2, linecolor='white', cbar_kws={'label': 'Count', 'shrink': 0.8}) ax.set_xlabel('Predicted Label', fontweight='bold', fontsize=12) ax.set_ylabel('True Label', fontweight='bold', fontsize=12) ax.set_title(f'Confusion Matrix: {model_name}', fontsize=15, fontweight='bold', pad=15) # Summary box tn, fp, fn, tp = cm.ravel() summary = f"True Neg: {tn:,}\nFalse Pos: {fp:,}\nFalse Neg: {fn:,}\nTrue Pos: {tp:,}" ax.text(1.25, 0.5, summary, transform=ax.transAxes, fontsize=11, verticalalignment='center', fontfamily='monospace', bbox=dict(boxstyle='round,pad=0.5', facecolor='#f0f0f0', edgecolor='#cccccc')) plt.tight_layout() return fig def plot_feature_importance(model, feature_names, model_name): """Plot top 15 most important features.""" setup_plot_style() fig, ax = plt.subplots(figsize=(10, 8)) # Get feature importances if hasattr(model, 'feature_importances_'): importances = model.feature_importances_ elif hasattr(model, 'coef_'): importances = np.abs(model.coef_[0]) else: ax.text(0.5, 0.5, 'Feature importance not available', ha='center', va='center', fontsize=14) ax.set_facecolor('white') return fig # Create and sort dataframe importance_df = pd.DataFrame({ 'Feature': feature_names, 'Importance': importances }).sort_values('Importance', ascending=True).tail(15) # Gradient blue bars colors = plt.cm.Blues(np.linspace(0.4, 0.85, len(importance_df))) bars = ax.barh(importance_df['Feature'], importance_df['Importance'], color=colors, edgecolor='#333333', linewidth=0.5) # Add value labels for bar, val in zip(bars, importance_df['Importance']): ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2, f'{val:.3f}', va='center', fontsize=9) ax.set_xlabel('Importance Score', fontweight='bold') ax.set_title(f'Top 15 Feature Importances: {model_name}', fontsize=15, fontweight='bold', pad=15) ax.grid(True, alpha=0.4, axis='x') plt.tight_layout() return fig def plot_threshold_analysis(y_test, y_proba, model_name): """Plot threshold analysis.""" setup_plot_style() thresholds = np.arange(0.05, 0.95, 0.01) precisions, recalls, f1_scores = [], [], [] for thresh in thresholds: y_pred_thresh = (y_proba >= thresh).astype(int) precisions.append(precision_score(y_test, y_pred_thresh, zero_division=0)) recalls.append(recall_score(y_test, y_pred_thresh, zero_division=0)) f1_scores.append(f1_score(y_test, y_pred_thresh, zero_division=0)) best_idx = np.argmax(f1_scores) best_threshold = thresholds[best_idx] fig, ax = plt.subplots(figsize=(10, 6)) ax.plot(thresholds, precisions, color=COLORS['primary'], linewidth=2.5, label='Precision') ax.plot(thresholds, recalls, color=COLORS['success'], linewidth=2.5, label='Recall') ax.plot(thresholds, f1_scores, color=COLORS['danger'], linewidth=2.5, label='F1 Score') ax.axvline(x=best_threshold, color=COLORS['warning'], linestyle='--', linewidth=2, label=f'Optimal = {best_threshold:.2f}') ax.axvline(x=0.5, color='#888888', linestyle=':', linewidth=1.5, label='Default (0.5)') # Mark optimal point ax.scatter([best_threshold], [f1_scores[best_idx]], color=COLORS['warning'], s=100, zorder=5) ax.set_xlabel('Classification Threshold', fontweight='bold') ax.set_ylabel('Score', fontweight='bold') ax.set_title(f'Threshold Analysis: {model_name}', fontsize=15, fontweight='bold', pad=15) ax.legend(loc='center right', fontsize=11, framealpha=0.95) ax.set_xlim([0, 1]) ax.set_ylim([0, 1]) ax.grid(True, alpha=0.4) plt.tight_layout() return fig def plot_class_distribution(train_df, test_df): """Plot class distribution with clear, readable labels.""" setup_plot_style() fig, axes = plt.subplots(1, 2, figsize=(14, 6)) colors = [COLORS['success'], COLORS['danger']] explode = (0, 0.08) # Training data train_fraud = train_df['fraud'].sum() train_legit = len(train_df) - train_fraud train_sizes = [train_legit, train_fraud] train_pct = [train_legit/len(train_df)*100, train_fraud/len(train_df)*100] wedges1, texts1, autotexts1 = axes[0].pie( train_sizes, explode=explode, colors=colors, autopct='%1.1f%%', startangle=90, shadow=False, wedgeprops={'edgecolor': 'white', 'linewidth': 2} ) # Style the percentage text for autotext in autotexts1: autotext.set_color('white') autotext.set_fontsize(14) autotext.set_fontweight('bold') axes[0].set_title('Training Data Distribution', fontsize=14, fontweight='bold', pad=10) # Add legend with counts axes[0].legend( wedges1, [f'Legitimate: {train_legit:,} ({train_pct[0]:.1f}%)', f'Fraud: {train_fraud:,} ({train_pct[1]:.1f}%)'], loc='lower center', bbox_to_anchor=(0.5, -0.15), fontsize=11, framealpha=0.95 ) # Test data test_fraud = test_df['fraud'].sum() test_legit = len(test_df) - test_fraud test_sizes = [test_legit, test_fraud] test_pct = [test_legit/len(test_df)*100, test_fraud/len(test_df)*100] wedges2, texts2, autotexts2 = axes[1].pie( test_sizes, explode=explode, colors=colors, autopct='%1.1f%%', startangle=90, shadow=False, wedgeprops={'edgecolor': 'white', 'linewidth': 2} ) for autotext in autotexts2: autotext.set_color('white') autotext.set_fontsize(14) autotext.set_fontweight('bold') axes[1].set_title('Test Data Distribution', fontsize=14, fontweight='bold', pad=10) axes[1].legend( wedges2, [f'Legitimate: {test_legit:,} ({test_pct[0]:.1f}%)', f'Fraud: {test_fraud:,} ({test_pct[1]:.1f}%)'], loc='lower center', bbox_to_anchor=(0.5, -0.15), fontsize=11, framealpha=0.95 ) fig.suptitle('Class Imbalance in Fraud Detection Dataset', fontsize=16, fontweight='bold', y=1.02) plt.tight_layout() return fig def plot_model_comparison(all_metrics): """Bar chart comparing all models.""" setup_plot_style() fig, ax = plt.subplots(figsize=(12, 6)) models_list = list(all_metrics.keys()) metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC'] x = np.arange(len(metrics)) width = 0.2 colors = [COLORS['primary'], COLORS['success'], COLORS['danger'], COLORS['purple']] for i, model in enumerate(models_list): values = [all_metrics[model][m] for m in metrics] bars = ax.bar(x + i*width, values, width, label=model, color=colors[i], edgecolor='white', linewidth=0.5) # Add value labels for bar, v in zip(bars, values): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, f'{v:.2f}', ha='center', va='bottom', fontsize=9, fontweight='bold') ax.set_ylabel('Score', fontweight='bold') ax.set_title('Model Performance Comparison', fontsize=15, fontweight='bold', pad=15) ax.set_xticks(x + width * 1.5) ax.set_xticklabels(metrics, fontweight='bold') ax.legend(loc='upper right', fontsize=10, framealpha=0.95) ax.set_ylim([0, 1.15]) ax.grid(True, alpha=0.4, axis='y') plt.tight_layout() return fig # ============================================================================ # LOAD DATA AND TRAIN MODELS # ============================================================================ print("Loading data...") X_train, X_test, y_train, y_test, train_df, test_df = load_and_prepare_data() print("Applying SMOTE to handle class imbalance...") X_train_balanced, y_train_balanced = apply_smote(X_train, y_train) print("Training models...") models = get_models() trained_models = {} all_metrics = {} all_predictions = {} all_probabilities = {} for name, model in models.items(): print(f" Training {name}...") trained_models[name] = train_model(model, X_train_balanced, y_train_balanced) y_pred, y_proba = evaluate_model(trained_models[name], X_test, y_test) all_predictions[name] = y_pred all_probabilities[name] = y_proba all_metrics[name] = get_metrics(y_test, y_pred, y_proba) print("Models trained successfully!") # ============================================================================ # GRADIO INTERFACE # ============================================================================ def get_data_overview(): """Dataset summary.""" return f""" ## Dataset Overview ### Training Data - **Total Samples:** {len(train_df):,} - **Fraud Cases:** {train_df['fraud'].sum():,} ({train_df['fraud'].mean()*100:.2f}%) - **Legitimate Cases:** {(train_df['fraud']==0).sum():,} ({(1-train_df['fraud'].mean())*100:.2f}%) ### Test Data - **Total Samples:** {len(test_df):,} - **Fraud Cases:** {test_df['fraud'].sum():,} ({test_df['fraud'].mean()*100:.2f}%) - **Legitimate Cases:** {(test_df['fraud']==0).sum():,} ({(1-test_df['fraud'].mean())*100:.2f}%) ### Features - **Number of Features:** {X_train.shape[1]} - **Feature Types:** All numeric (pre-processed) ### Class Imbalance Handling - Applied **SMOTE** (Synthetic Minority Over-sampling Technique) - Training samples after SMOTE: {len(X_train_balanced):,} """ def update_model_display(model_name): """Update metrics when model is selected.""" metrics = all_metrics[model_name] y_pred = all_predictions[model_name] y_proba = all_probabilities[model_name] best_thresh, best_f1, _, _ = find_optimal_threshold(y_test, y_proba) metrics_text = f""" ## {model_name} Performance | Metric | Score | |--------|-------| | **Accuracy** | {metrics['Accuracy']:.4f} | | **Precision** | {metrics['Precision']:.4f} | | **Recall** | {metrics['Recall']:.4f} | | **F1 Score** | {metrics['F1 Score']:.4f} | | **ROC AUC** | {metrics['ROC AUC']:.4f} | ### Threshold Optimization - **Default Threshold:** 0.50 - **Optimal Threshold:** {best_thresh:.2f} - **F1 at Optimal:** {best_f1:.4f} """ report = classification_report(y_test, y_pred, target_names=['Legitimate', 'Fraud']) report_text = f"```\n{report}\n```" return metrics_text, report_text def get_selected_plot(model_name, plot_type): """Generate selected plot.""" y_proba = all_probabilities[model_name] y_pred = all_predictions[model_name] if plot_type == "Precision-Recall Curve": return plot_precision_recall_curve(y_test, y_proba, model_name) elif plot_type == "ROC Curve": return plot_roc_curve(y_test, y_proba, model_name) elif plot_type == "Confusion Matrix": return plot_confusion_matrix(y_test, y_pred, model_name) elif plot_type == "Feature Importance": return plot_feature_importance(trained_models[model_name], X_train.columns, model_name) elif plot_type == "Threshold Analysis": return plot_threshold_analysis(y_test, y_proba, model_name) return None def get_comparison_results(): """Generate comparison.""" comparison_df = pd.DataFrame(all_metrics).T.round(4) best_models = comparison_df.idxmax() summary = "## Best Model by Metric\n\n| Metric | Best Model | Score |\n|--------|------------|-------|\n" for metric in comparison_df.columns: best = best_models[metric] score = comparison_df.loc[best, metric] summary += f"| {metric} | **{best}** | {score:.4f} |\n" return comparison_df.to_markdown(), summary, plot_model_comparison(all_metrics) def update_threshold_plot(model_name): """Update threshold plot.""" return plot_threshold_analysis(y_test, all_probabilities[model_name], model_name) # Build UI with gr.Blocks(title="Auto Insurance Fraud Detection", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🚗 Auto Insurance Claims Fraud Detection Machine learning models for detecting fraudulent auto insurance claims. **Models:** XGBoost | LightGBM | Random Forest | Logistic Regression """) with gr.Tabs(): # Tab 1: Data Overview with gr.TabItem("📊 Data Overview"): gr.Markdown(get_data_overview()) gr.Plot(value=plot_class_distribution(train_df, test_df)) # Tab 2: Model Evaluation with gr.TabItem("đŸŽ¯ Model Evaluation"): with gr.Row(): model_selector = gr.Dropdown( choices=list(models.keys()), value="XGBoost", label="Select Model" ) plot_selector = gr.Dropdown( choices=["Precision-Recall Curve", "ROC Curve", "Confusion Matrix", "Feature Importance", "Threshold Analysis"], value="Precision-Recall Curve", label="Select Visualization" ) with gr.Row(): with gr.Column(scale=1): metrics_display = gr.Markdown() report_display = gr.Markdown() with gr.Column(scale=2): plot_display = gr.Plot() def update_all(model_name, plot_type): metrics, report = update_model_display(model_name) plot = get_selected_plot(model_name, plot_type) return metrics, report, plot model_selector.change(fn=update_all, inputs=[model_selector, plot_selector], outputs=[metrics_display, report_display, plot_display]) plot_selector.change(fn=update_all, inputs=[model_selector, plot_selector], outputs=[metrics_display, report_display, plot_display]) demo.load(fn=update_all, inputs=[model_selector, plot_selector], outputs=[metrics_display, report_display, plot_display]) # Tab 3: Compare Models with gr.TabItem("📈 Compare Models"): comparison_table, comparison_summary, comparison_plot = get_comparison_results() gr.Markdown("## All Models Performance Comparison") gr.Markdown(comparison_summary) gr.Markdown(comparison_table) gr.Plot(value=comparison_plot) # Tab 4: Threshold with gr.TabItem("âš–ī¸ Threshold Optimization"): gr.Markdown(""" ## Finding the Optimal Threshold The default 0.5 threshold often isn't optimal for imbalanced data. We balance **Recall** (catching frauds) vs **Precision** (avoiding false alarms). """) thresh_model = gr.Dropdown(choices=list(models.keys()), value="XGBoost", label="Select Model") thresh_plot = gr.Plot() thresh_model.change(fn=update_threshold_plot, inputs=[thresh_model], outputs=[thresh_plot]) demo.load(fn=update_threshold_plot, inputs=[thresh_model], outputs=[thresh_plot]) # Thresholds table thresh_summary = "### Optimal Thresholds\n\n| Model | Threshold | F1 Score |\n|-------|-----------|----------|\n" for name in models.keys(): opt_t, opt_f1, _, _ = find_optimal_threshold(y_test, all_probabilities[name]) thresh_summary += f"| {name} | {opt_t:.2f} | {opt_f1:.4f} |\n" gr.Markdown(thresh_summary) # Tab 5: About with gr.TabItem("â„šī¸ About"): gr.Markdown(""" ## About This Project ### Business Context Auto insurance fraud costs billions annually. This tool flags potentially fraudulent claims. ### Models - **XGBoost:** Gradient boosting, excellent for tabular data - **LightGBM:** Fast, memory-efficient gradient boosting - **Random Forest:** Robust ensemble method - **Logistic Regression:** Interpretable baseline ### Key Metrics - **Precision:** Of flagged claims, how many are actually fraud? - **Recall:** Of actual frauds, how many did we catch? - **F1 Score:** Balance of precision and recall """) if __name__ == "__main__": demo.launch()