Spaces:

YashChowdhary
/

Auto_Insurance_Claims_Fraud_Detection

Sleeping

App Files Files Community

Auto_Insurance_Claims_Fraud_Detection / app.py

YashChowdhary

Update app.py

f1b0880 verified about 1 month ago

raw

history blame contribute delete

24.9 kB

	"""
	Auto Insurance Claims Fraud Detection
	=====================================
	A machine learning application that trains and compares 4 different models
	for detecting fraudulent insurance claims.

	Models: XGBoost, LightGBM, Random Forest, Logistic Regression
	"""

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import warnings
	warnings.filterwarnings('ignore')

	# ML Libraries
	from sklearn.model_selection import cross_val_score
	from sklearn.metrics import (
	precision_recall_curve, roc_curve, auc,
	confusion_matrix, classification_report,
	f1_score, precision_score, recall_score, accuracy_score
	)
	from sklearn.linear_model import LogisticRegression
	from sklearn.ensemble import RandomForestClassifier
	from xgboost import XGBClassifier
	from lightgbm import LGBMClassifier
	from imblearn.over_sampling import SMOTE


	# ============================================================================
	# PLOT STYLE CONFIGURATION
	# Use white background for universal readability in both light and dark modes
	# ============================================================================

	def setup_plot_style():
	"""Configure matplotlib for clean, readable plots."""
	plt.rcParams.update({
	'figure.facecolor': 'white',
	'axes.facecolor': 'white',
	'axes.edgecolor': '#333333',
	'axes.labelcolor': '#333333',
	'text.color': '#333333',
	'xtick.color': '#333333',
	'ytick.color': '#333333',
	'grid.color': '#cccccc',
	'grid.alpha': 0.5,
	'legend.facecolor': 'white',
	'legend.edgecolor': '#cccccc',
	'font.size': 11,
	'axes.titlesize': 14,
	'axes.labelsize': 12,
	})

	setup_plot_style()

	# Color palette - vibrant colors that work on white background
	COLORS = {
	'primary': '#2563eb', # Blue
	'success': '#16a34a', # Green
	'danger': '#dc2626', # Red
	'warning': '#f59e0b', # Amber
	'purple': '#9333ea', # Purple
	'cyan': '#0891b2', # Cyan
	}


	# ============================================================================
	# DATA LOADING AND PREPROCESSING
	# ============================================================================

	def load_and_prepare_data():
	"""Load the train and test datasets."""
	train_df = pd.read_csv('train.csv')
	test_df = pd.read_csv('test.csv')

	X_train = train_df.drop('fraud', axis=1)
	y_train = train_df['fraud']
	X_test = test_df.drop('fraud', axis=1)
	y_test = test_df['fraud']

	return X_train, X_test, y_train, y_test, train_df, test_df


	def apply_smote(X_train, y_train):
	"""Apply SMOTE to handle class imbalance."""
	smote = SMOTE(random_state=42)
	X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
	return X_resampled, y_resampled


	# ============================================================================
	# MODEL DEFINITIONS
	# ============================================================================

	def get_models():
	"""Define the 4 models for comparison."""
	models = {
	'XGBoost': XGBClassifier(
	n_estimators=100,
	max_depth=4,
	learning_rate=0.1,
	scale_pos_weight=10,
	random_state=42,
	use_label_encoder=False,
	eval_metric='logloss'
	),
	'LightGBM': LGBMClassifier(
	n_estimators=100,
	max_depth=4,
	learning_rate=0.1,
	class_weight='balanced',
	random_state=42,
	verbose=-1
	),
	'Random Forest': RandomForestClassifier(
	n_estimators=100,
	max_depth=6,
	class_weight='balanced',
	random_state=42,
	n_jobs=-1
	),
	'Logistic Regression': LogisticRegression(
	class_weight='balanced',
	max_iter=1000,
	random_state=42
	)
	}
	return models


	# ============================================================================
	# MODEL TRAINING AND EVALUATION
	# ============================================================================

	def train_model(model, X_train, y_train):
	"""Train a model."""
	model.fit(X_train, y_train)
	return model


	def evaluate_model(model, X_test, y_test):
	"""Get predictions and probabilities."""
	y_pred = model.predict(X_test)
	y_proba = model.predict_proba(X_test)[:, 1]
	return y_pred, y_proba


	def get_metrics(y_test, y_pred, y_proba):
	"""Calculate evaluation metrics."""
	metrics = {
	'Accuracy': accuracy_score(y_test, y_pred),
	'Precision': precision_score(y_test, y_pred, zero_division=0),
	'Recall': recall_score(y_test, y_pred, zero_division=0),
	'F1 Score': f1_score(y_test, y_pred, zero_division=0),
	'ROC AUC': auc(*roc_curve(y_test, y_proba)[:2])
	}
	return metrics


	def find_optimal_threshold(y_test, y_proba):
	"""Find optimal threshold using F1 score."""
	thresholds = np.arange(0.1, 0.9, 0.01)
	f1_scores = []

	for thresh in thresholds:
	y_pred_thresh = (y_proba >= thresh).astype(int)
	f1 = f1_score(y_test, y_pred_thresh, zero_division=0)
	f1_scores.append(f1)

	best_idx = np.argmax(f1_scores)
	best_threshold = thresholds[best_idx]
	best_f1 = f1_scores[best_idx]

	return best_threshold, best_f1, thresholds, f1_scores


	# ============================================================================
	# VISUALIZATION FUNCTIONS
	# ============================================================================

	def plot_precision_recall_curve(y_test, y_proba, model_name):
	"""Plot Precision-Recall curve."""
	setup_plot_style()
	precision, recall, _ = precision_recall_curve(y_test, y_proba)
	pr_auc = auc(recall, precision)

	fig, ax = plt.subplots(figsize=(9, 6))

	ax.plot(recall, precision, color=COLORS['primary'], linewidth=2.5,
	label=f'{model_name} (AUC = {pr_auc:.3f})')
	ax.fill_between(recall, precision, alpha=0.2, color=COLORS['primary'])

	# Baseline
	baseline = y_test.mean()
	ax.axhline(y=baseline, color=COLORS['danger'], linestyle='--', linewidth=2,
	label=f'Random Baseline = {baseline:.3f}')

	ax.set_xlabel('Recall (Fraud Detection Rate)', fontweight='bold')
	ax.set_ylabel('Precision (True Fraud Rate)', fontweight='bold')
	ax.set_title(f'Precision-Recall Curve: {model_name}', fontsize=15, fontweight='bold', pad=15)
	ax.legend(loc='upper right', fontsize=11, framealpha=0.95)
	ax.set_xlim([0, 1])
	ax.set_ylim([0, 1])
	ax.grid(True, alpha=0.4)

	plt.tight_layout()
	return fig


	def plot_roc_curve(y_test, y_proba, model_name):
	"""Plot ROC curve."""
	setup_plot_style()
	fpr, tpr, _ = roc_curve(y_test, y_proba)
	roc_auc = auc(fpr, tpr)

	fig, ax = plt.subplots(figsize=(9, 6))

	ax.plot(fpr, tpr, color=COLORS['primary'], linewidth=2.5,
	label=f'{model_name} (AUC = {roc_auc:.3f})')
	ax.fill_between(fpr, tpr, alpha=0.2, color=COLORS['primary'])
	ax.plot([0, 1], [0, 1], color=COLORS['danger'], linestyle='--', linewidth=2,
	label='Random Classifier')

	ax.set_xlabel('False Positive Rate', fontweight='bold')
	ax.set_ylabel('True Positive Rate (Recall)', fontweight='bold')
	ax.set_title(f'ROC Curve: {model_name}', fontsize=15, fontweight='bold', pad=15)
	ax.legend(loc='lower right', fontsize=11, framealpha=0.95)
	ax.set_xlim([0, 1])
	ax.set_ylim([0, 1])
	ax.grid(True, alpha=0.4)

	plt.tight_layout()
	return fig


	def plot_confusion_matrix(y_test, y_pred, model_name):
	"""Plot confusion matrix heatmap."""
	setup_plot_style()
	cm = confusion_matrix(y_test, y_pred)

	fig, ax = plt.subplots(figsize=(9, 7))

	# Use a colormap with good contrast
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
	xticklabels=['Legitimate', 'Fraud'],
	yticklabels=['Legitimate', 'Fraud'],
	annot_kws={'size': 18, 'fontweight': 'bold'},
	linewidths=2, linecolor='white',
	cbar_kws={'label': 'Count', 'shrink': 0.8})

	ax.set_xlabel('Predicted Label', fontweight='bold', fontsize=12)
	ax.set_ylabel('True Label', fontweight='bold', fontsize=12)
	ax.set_title(f'Confusion Matrix: {model_name}', fontsize=15, fontweight='bold', pad=15)

	# Summary box
	tn, fp, fn, tp = cm.ravel()
	summary = f"True Neg: {tn:,}\nFalse Pos: {fp:,}\nFalse Neg: {fn:,}\nTrue Pos: {tp:,}"
	ax.text(1.25, 0.5, summary, transform=ax.transAxes, fontsize=11,
	verticalalignment='center', fontfamily='monospace',
	bbox=dict(boxstyle='round,pad=0.5', facecolor='#f0f0f0', edgecolor='#cccccc'))

	plt.tight_layout()
	return fig


	def plot_feature_importance(model, feature_names, model_name):
	"""Plot top 15 most important features."""
	setup_plot_style()
	fig, ax = plt.subplots(figsize=(10, 8))

	# Get feature importances
	if hasattr(model, 'feature_importances_'):
	importances = model.feature_importances_
	elif hasattr(model, 'coef_'):
	importances = np.abs(model.coef_[0])
	else:
	ax.text(0.5, 0.5, 'Feature importance not available',
	ha='center', va='center', fontsize=14)
	ax.set_facecolor('white')
	return fig

	# Create and sort dataframe
	importance_df = pd.DataFrame({
	'Feature': feature_names,
	'Importance': importances
	}).sort_values('Importance', ascending=True).tail(15)

	# Gradient blue bars
	colors = plt.cm.Blues(np.linspace(0.4, 0.85, len(importance_df)))
	bars = ax.barh(importance_df['Feature'], importance_df['Importance'], color=colors, edgecolor='#333333', linewidth=0.5)

	# Add value labels
	for bar, val in zip(bars, importance_df['Importance']):
	ax.text(bar.get_width() + 0.001, bar.get_y() + bar.get_height()/2,
	f'{val:.3f}', va='center', fontsize=9)

	ax.set_xlabel('Importance Score', fontweight='bold')
	ax.set_title(f'Top 15 Feature Importances: {model_name}', fontsize=15, fontweight='bold', pad=15)
	ax.grid(True, alpha=0.4, axis='x')

	plt.tight_layout()
	return fig


	def plot_threshold_analysis(y_test, y_proba, model_name):
	"""Plot threshold analysis."""
	setup_plot_style()
	thresholds = np.arange(0.05, 0.95, 0.01)
	precisions, recalls, f1_scores = [], [], []

	for thresh in thresholds:
	y_pred_thresh = (y_proba >= thresh).astype(int)
	precisions.append(precision_score(y_test, y_pred_thresh, zero_division=0))
	recalls.append(recall_score(y_test, y_pred_thresh, zero_division=0))
	f1_scores.append(f1_score(y_test, y_pred_thresh, zero_division=0))

	best_idx = np.argmax(f1_scores)
	best_threshold = thresholds[best_idx]

	fig, ax = plt.subplots(figsize=(10, 6))

	ax.plot(thresholds, precisions, color=COLORS['primary'], linewidth=2.5, label='Precision')
	ax.plot(thresholds, recalls, color=COLORS['success'], linewidth=2.5, label='Recall')
	ax.plot(thresholds, f1_scores, color=COLORS['danger'], linewidth=2.5, label='F1 Score')

	ax.axvline(x=best_threshold, color=COLORS['warning'], linestyle='--', linewidth=2,
	label=f'Optimal = {best_threshold:.2f}')
	ax.axvline(x=0.5, color='#888888', linestyle=':', linewidth=1.5, label='Default (0.5)')

	# Mark optimal point
	ax.scatter([best_threshold], [f1_scores[best_idx]], color=COLORS['warning'], s=100, zorder=5)

	ax.set_xlabel('Classification Threshold', fontweight='bold')
	ax.set_ylabel('Score', fontweight='bold')
	ax.set_title(f'Threshold Analysis: {model_name}', fontsize=15, fontweight='bold', pad=15)
	ax.legend(loc='center right', fontsize=11, framealpha=0.95)
	ax.set_xlim([0, 1])
	ax.set_ylim([0, 1])
	ax.grid(True, alpha=0.4)

	plt.tight_layout()
	return fig


	def plot_class_distribution(train_df, test_df):
	"""Plot class distribution with clear, readable labels."""
	setup_plot_style()
	fig, axes = plt.subplots(1, 2, figsize=(14, 6))

	colors = [COLORS['success'], COLORS['danger']]
	explode = (0, 0.08)

	# Training data
	train_fraud = train_df['fraud'].sum()
	train_legit = len(train_df) - train_fraud
	train_sizes = [train_legit, train_fraud]
	train_pct = [train_legit/len(train_df)100, train_fraud/len(train_df)100]

	wedges1, texts1, autotexts1 = axes[0].pie(
	train_sizes,
	explode=explode,
	colors=colors,
	autopct='%1.1f%%',
	startangle=90,
	shadow=False,
	wedgeprops={'edgecolor': 'white', 'linewidth': 2}
	)

	# Style the percentage text
	for autotext in autotexts1:
	autotext.set_color('white')
	autotext.set_fontsize(14)
	autotext.set_fontweight('bold')

	axes[0].set_title('Training Data Distribution', fontsize=14, fontweight='bold', pad=10)

	# Add legend with counts
	axes[0].legend(
	wedges1,
	[f'Legitimate: {train_legit:,} ({train_pct[0]:.1f}%)',
	f'Fraud: {train_fraud:,} ({train_pct[1]:.1f}%)'],
	loc='lower center',
	bbox_to_anchor=(0.5, -0.15),
	fontsize=11,
	framealpha=0.95
	)

	# Test data
	test_fraud = test_df['fraud'].sum()
	test_legit = len(test_df) - test_fraud
	test_sizes = [test_legit, test_fraud]
	test_pct = [test_legit/len(test_df)100, test_fraud/len(test_df)100]

	wedges2, texts2, autotexts2 = axes[1].pie(
	test_sizes,
	explode=explode,
	colors=colors,
	autopct='%1.1f%%',
	startangle=90,
	shadow=False,
	wedgeprops={'edgecolor': 'white', 'linewidth': 2}
	)

	for autotext in autotexts2:
	autotext.set_color('white')
	autotext.set_fontsize(14)
	autotext.set_fontweight('bold')

	axes[1].set_title('Test Data Distribution', fontsize=14, fontweight='bold', pad=10)

	axes[1].legend(
	wedges2,
	[f'Legitimate: {test_legit:,} ({test_pct[0]:.1f}%)',
	f'Fraud: {test_fraud:,} ({test_pct[1]:.1f}%)'],
	loc='lower center',
	bbox_to_anchor=(0.5, -0.15),
	fontsize=11,
	framealpha=0.95
	)

	fig.suptitle('Class Imbalance in Fraud Detection Dataset', fontsize=16, fontweight='bold', y=1.02)
	plt.tight_layout()
	return fig


	def plot_model_comparison(all_metrics):
	"""Bar chart comparing all models."""
	setup_plot_style()
	fig, ax = plt.subplots(figsize=(12, 6))

	models_list = list(all_metrics.keys())
	metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC']

	x = np.arange(len(metrics))
	width = 0.2

	colors = [COLORS['primary'], COLORS['success'], COLORS['danger'], COLORS['purple']]

	for i, model in enumerate(models_list):
	values = [all_metrics[model][m] for m in metrics]
	bars = ax.bar(x + i*width, values, width, label=model, color=colors[i],
	edgecolor='white', linewidth=0.5)

	# Add value labels
	for bar, v in zip(bars, values):
	ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
	f'{v:.2f}', ha='center', va='bottom', fontsize=9, fontweight='bold')

	ax.set_ylabel('Score', fontweight='bold')
	ax.set_title('Model Performance Comparison', fontsize=15, fontweight='bold', pad=15)
	ax.set_xticks(x + width * 1.5)
	ax.set_xticklabels(metrics, fontweight='bold')
	ax.legend(loc='upper right', fontsize=10, framealpha=0.95)
	ax.set_ylim([0, 1.15])
	ax.grid(True, alpha=0.4, axis='y')

	plt.tight_layout()
	return fig


	# ============================================================================
	# LOAD DATA AND TRAIN MODELS
	# ============================================================================

	print("Loading data...")
	X_train, X_test, y_train, y_test, train_df, test_df = load_and_prepare_data()

	print("Applying SMOTE to handle class imbalance...")
	X_train_balanced, y_train_balanced = apply_smote(X_train, y_train)

	print("Training models...")
	models = get_models()
	trained_models = {}
	all_metrics = {}
	all_predictions = {}
	all_probabilities = {}

	for name, model in models.items():
	print(f" Training {name}...")
	trained_models[name] = train_model(model, X_train_balanced, y_train_balanced)
	y_pred, y_proba = evaluate_model(trained_models[name], X_test, y_test)
	all_predictions[name] = y_pred
	all_probabilities[name] = y_proba
	all_metrics[name] = get_metrics(y_test, y_pred, y_proba)

	print("Models trained successfully!")


	# ============================================================================
	# GRADIO INTERFACE
	# ============================================================================

	def get_data_overview():
	"""Dataset summary."""
	return f"""
	## Dataset Overview

	### Training Data
	- Total Samples: {len(train_df):,}
	- Fraud Cases: {train_df['fraud'].sum():,} ({train_df['fraud'].mean()*100:.2f}%)
	- Legitimate Cases: {(train_df['fraud']==0).sum():,} ({(1-train_df['fraud'].mean())*100:.2f}%)

	### Test Data
	- Total Samples: {len(test_df):,}
	- Fraud Cases: {test_df['fraud'].sum():,} ({test_df['fraud'].mean()*100:.2f}%)
	- Legitimate Cases: {(test_df['fraud']==0).sum():,} ({(1-test_df['fraud'].mean())*100:.2f}%)

	### Features
	- Number of Features: {X_train.shape[1]}
	- Feature Types: All numeric (pre-processed)

	### Class Imbalance Handling
	- Applied SMOTE (Synthetic Minority Over-sampling Technique)
	- Training samples after SMOTE: {len(X_train_balanced):,}
	"""


	def update_model_display(model_name):
	"""Update metrics when model is selected."""
	metrics = all_metrics[model_name]
	y_pred = all_predictions[model_name]
	y_proba = all_probabilities[model_name]

	best_thresh, best_f1, _, _ = find_optimal_threshold(y_test, y_proba)

	metrics_text = f"""
	## {model_name} Performance

	\| Metric \| Score \|
	\|--------\|-------\|
	\| Accuracy \| {metrics['Accuracy']:.4f} \|
	\| Precision \| {metrics['Precision']:.4f} \|
	\| Recall \| {metrics['Recall']:.4f} \|
	\| F1 Score \| {metrics['F1 Score']:.4f} \|
	\| ROC AUC \| {metrics['ROC AUC']:.4f} \|

	### Threshold Optimization
	- Default Threshold: 0.50
	- Optimal Threshold: {best_thresh:.2f}
	- F1 at Optimal: {best_f1:.4f}
	"""

	report = classification_report(y_test, y_pred, target_names=['Legitimate', 'Fraud'])
	report_text = f"```\n{report}\n```"

	return metrics_text, report_text


	def get_selected_plot(model_name, plot_type):
	"""Generate selected plot."""
	y_proba = all_probabilities[model_name]
	y_pred = all_predictions[model_name]

	if plot_type == "Precision-Recall Curve":
	return plot_precision_recall_curve(y_test, y_proba, model_name)
	elif plot_type == "ROC Curve":
	return plot_roc_curve(y_test, y_proba, model_name)
	elif plot_type == "Confusion Matrix":
	return plot_confusion_matrix(y_test, y_pred, model_name)
	elif plot_type == "Feature Importance":
	return plot_feature_importance(trained_models[model_name], X_train.columns, model_name)
	elif plot_type == "Threshold Analysis":
	return plot_threshold_analysis(y_test, y_proba, model_name)
	return None


	def get_comparison_results():
	"""Generate comparison."""
	comparison_df = pd.DataFrame(all_metrics).T.round(4)
	best_models = comparison_df.idxmax()

	summary = "## Best Model by Metric\n\n\| Metric \| Best Model \| Score \|\n\|--------\|------------\|-------\|\n"
	for metric in comparison_df.columns:
	best = best_models[metric]
	score = comparison_df.loc[best, metric]
	summary += f"\| {metric} \| {best} \| {score:.4f} \|\n"

	return comparison_df.to_markdown(), summary, plot_model_comparison(all_metrics)


	def update_threshold_plot(model_name):
	"""Update threshold plot."""
	return plot_threshold_analysis(y_test, all_probabilities[model_name], model_name)


	# Build UI
	with gr.Blocks(title="Auto Insurance Fraud Detection", theme=gr.themes.Soft()) as demo:

	gr.Markdown("""
	# 🚗 Auto Insurance Claims Fraud Detection

	Machine learning models for detecting fraudulent auto insurance claims.

	Models: XGBoost \| LightGBM \| Random Forest \| Logistic Regression
	""")

	with gr.Tabs():
	# Tab 1: Data Overview
	with gr.TabItem("📊 Data Overview"):
	gr.Markdown(get_data_overview())
	gr.Plot(value=plot_class_distribution(train_df, test_df))

	# Tab 2: Model Evaluation
	with gr.TabItem("🎯 Model Evaluation"):
	with gr.Row():
	model_selector = gr.Dropdown(
	choices=list(models.keys()),
	value="XGBoost",
	label="Select Model"
	)
	plot_selector = gr.Dropdown(
	choices=["Precision-Recall Curve", "ROC Curve", "Confusion Matrix",
	"Feature Importance", "Threshold Analysis"],
	value="Precision-Recall Curve",
	label="Select Visualization"
	)

	with gr.Row():
	with gr.Column(scale=1):
	metrics_display = gr.Markdown()
	report_display = gr.Markdown()
	with gr.Column(scale=2):
	plot_display = gr.Plot()

	def update_all(model_name, plot_type):
	metrics, report = update_model_display(model_name)
	plot = get_selected_plot(model_name, plot_type)
	return metrics, report, plot

	model_selector.change(fn=update_all, inputs=[model_selector, plot_selector],
	outputs=[metrics_display, report_display, plot_display])
	plot_selector.change(fn=update_all, inputs=[model_selector, plot_selector],
	outputs=[metrics_display, report_display, plot_display])
	demo.load(fn=update_all, inputs=[model_selector, plot_selector],
	outputs=[metrics_display, report_display, plot_display])

	# Tab 3: Compare Models
	with gr.TabItem("📈 Compare Models"):
	comparison_table, comparison_summary, comparison_plot = get_comparison_results()
	gr.Markdown("## All Models Performance Comparison")
	gr.Markdown(comparison_summary)
	gr.Markdown(comparison_table)
	gr.Plot(value=comparison_plot)

	# Tab 4: Threshold
	with gr.TabItem("⚖️ Threshold Optimization"):
	gr.Markdown("""
	## Finding the Optimal Threshold

	The default 0.5 threshold often isn't optimal for imbalanced data.
	We balance Recall (catching frauds) vs Precision (avoiding false alarms).
	""")

	thresh_model = gr.Dropdown(choices=list(models.keys()), value="XGBoost",
	label="Select Model")
	thresh_plot = gr.Plot()

	thresh_model.change(fn=update_threshold_plot, inputs=[thresh_model], outputs=[thresh_plot])
	demo.load(fn=update_threshold_plot, inputs=[thresh_model], outputs=[thresh_plot])

	# Thresholds table
	thresh_summary = "### Optimal Thresholds\n\n\| Model \| Threshold \| F1 Score \|\n\|-------\|-----------\|----------\|\n"
	for name in models.keys():
	opt_t, opt_f1, _, _ = find_optimal_threshold(y_test, all_probabilities[name])
	thresh_summary += f"\| {name} \| {opt_t:.2f} \| {opt_f1:.4f} \|\n"
	gr.Markdown(thresh_summary)

	# Tab 5: About
	with gr.TabItem("ℹ️ About"):
	gr.Markdown("""
	## About This Project

	### Business Context
	Auto insurance fraud costs billions annually. This tool flags potentially fraudulent claims.

	### Models
	- XGBoost: Gradient boosting, excellent for tabular data
	- LightGBM: Fast, memory-efficient gradient boosting
	- Random Forest: Robust ensemble method
	- Logistic Regression: Interpretable baseline

	### Key Metrics
	- Precision: Of flagged claims, how many are actually fraud?
	- Recall: Of actual frauds, how many did we catch?
	- F1 Score: Balance of precision and recall
	""")

	if __name__ == "__main__":
	demo.launch()