Spaces:

0xnu
/

fraud-detection

Sleeping

App Files Files Community

fraud-detection / app.py

0xnu

Update app.py

99159af verified 6 months ago

raw

history blame contribute delete

18.6 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import joblib
	from model_wrapper import FraudDetectionModel
	from preprocessor import FraudDataPreprocessor
	from feature_utils import fill_missing_features
	import os

	# Initialize the fraud detection model
	fraud_model = FraudDetectionModel()

	# Load model if files exist
	try:
	# First, ensure the FraudDataPreprocessor class is available
	import sys
	sys.modules['__main__'].FraudDataPreprocessor = FraudDataPreprocessor

	# Load the specific XGBoost model files from your training
	model_path = "fraud_detection_model_xgboost_20250727_145448.joblib"
	preprocessor_path = "preprocessor_20250727_145448.joblib"
	metadata_path = "model_metadata_20250727_145448.joblib"

	if os.path.exists(model_path) and os.path.exists(preprocessor_path):
	if os.path.exists(metadata_path):
	fraud_model.load_model(model_path, preprocessor_path, metadata_path)
	else:
	fraud_model.load_model(model_path, preprocessor_path)
	model_loaded = True
	print(f"✅ Model loaded successfully!")
	else:
	model_loaded = False
	print("❌ Model files not found. Please upload the following files:")
	print("- fraud_detection_model_xgboost_20250727_145448.joblib")
	print("- preprocessor_20250727_145448.joblib")
	print("- model_metadata_20250727_145448.joblib")
	except Exception as e:
	model_loaded = False
	print(f"❌ Error loading model: {e}")

	def predict_single_transaction(
	transaction_id,
	transaction_dt,
	transaction_amt,
	product_cd,
	card1,
	card2,
	card3,
	card4,
	card5,
	card6,
	addr1,
	addr2,
	p_emaildomain
	):
	"""Predict fraud risk for a single transaction with exact API fields"""

	if not model_loaded:
	return "❌ Model not loaded. Please contact administrator.", "", "", ""

	try:
	# Prepare transaction data exactly as API expects
	transaction_data = {
	'TransactionID': int(transaction_id) if transaction_id else 123456,
	'TransactionDT': int(transaction_dt) if transaction_dt else 18403200,
	'TransactionAmt': float(transaction_amt),
	'ProductCD': product_cd,
	'card1': int(card1) if card1 else None,
	'card2': float(card2) if card2 else None,
	'card3': float(card3) if card3 else None,
	'card4': card4,
	'card5': float(card5) if card5 else None,
	'card6': card6,
	'addr1': float(addr1) if addr1 else None,
	'addr2': float(addr2) if addr2 else None,
	'P_emaildomain': p_emaildomain,
	'R_emaildomain': p_emaildomain # Often same as P_emaildomain
	}

	# Fill missing features with defaults
	complete_data = fill_missing_features(transaction_data)

	# Make prediction
	result = fraud_model.predict_single_transaction(complete_data)

	if 'error' in result:
	return f"❌ {result['error']}", "", "", ""

	# Format results
	probability = result['fraud_probability']
	risk_level = result['risk_level']
	recommendation = result['recommendation']

	# Create risk indicator
	if probability >= 0.8:
	risk_indicator = f"🔴 HIGH RISK ({probability:.1%})"
	elif probability >= 0.5:
	risk_indicator = f"🟡 MEDIUM RISK ({probability:.1%})"
	elif probability >= 0.2:
	risk_indicator = f"🟠 LOW RISK ({probability:.1%})"
	else:
	risk_indicator = f"🟢 VERY LOW RISK ({probability:.1%})"

	return risk_indicator, f"{probability:.4f}", risk_level, recommendation

	except Exception as e:
	return f"❌ Error: {str(e)}", "", "", ""

	def predict_batch_from_csv(file):
	"""Predict fraud risk for multiple transactions from CSV"""

	if not model_loaded:
	return "❌ Model not loaded. Please contact administrator.", None

	if file is None:
	return "❌ Please upload a CSV file.", None

	try:
	# Read CSV file
	df = pd.read_csv(file.name)

	# Validate required columns
	required_cols = ['TransactionAmt']
	missing_cols = [col for col in required_cols if col not in df.columns]

	if missing_cols:
	return f"❌ Missing required columns: {missing_cols}. Please ensure your CSV has at least 'TransactionAmt' column.", None

	# Add default TransactionID if not present
	if 'TransactionID' not in df.columns:
	df['TransactionID'] = range(1, len(df) + 1)

	# Process each row and make predictions
	results = []

	for idx, row in df.iterrows():
	try:
	# Fill missing features for this row
	transaction_data = row.to_dict()
	complete_data = fill_missing_features(transaction_data)

	# Make prediction
	result = fraud_model.predict_single_transaction(complete_data)

	if 'error' not in result:
	# Add results to original row data
	row_result = row.copy()
	row_result['fraud_probability'] = result['fraud_probability']
	row_result['risk_level'] = result['risk_level']
	row_result['recommendation'] = result['recommendation']
	row_result['is_suspicious'] = result['is_suspicious']
	else:
	# Handle prediction error
	row_result = row.copy()
	row_result['fraud_probability'] = None
	row_result['risk_level'] = 'Error'
	row_result['recommendation'] = result.get('error', 'Prediction failed')
	row_result['is_suspicious'] = False

	results.append(row_result)

	except Exception as e:
	# Handle row processing error
	row_result = row.copy()
	row_result['fraud_probability'] = None
	row_result['risk_level'] = 'Error'
	row_result['recommendation'] = f'Processing error: {str(e)}'
	row_result['is_suspicious'] = False
	results.append(row_result)

	# Create results DataFrame
	results_df = pd.DataFrame(results)

	# Save results
	output_path = "fraud_predictions_batch.csv"
	results_df.to_csv(output_path, index=False)

	# Create summary
	valid_predictions = results_df[results_df['fraud_probability'].notna()]
	total_transactions = len(results_df)
	valid_count = len(valid_predictions)

	if valid_count > 0:
	high_risk = len(valid_predictions[valid_predictions['fraud_probability'] >= 0.8])
	medium_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.5) & (valid_predictions['fraud_probability'] < 0.8)])
	low_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.2) & (valid_predictions['fraud_probability'] < 0.5)])
	very_low_risk = len(valid_predictions[valid_predictions['fraud_probability'] < 0.2])

	summary = f"""
	📊 Batch Prediction Summary

	Total Transactions: {total_transactions}
	Successfully Processed: {valid_count}
	Errors: {total_transactions - valid_count}

	Risk Distribution:
	🔴 High Risk: {high_risk} ({high_risk/valid_count:.1%})
	🟡 Medium Risk: {medium_risk} ({medium_risk/valid_count:.1%})
	🟠 Low Risk: {low_risk} ({low_risk/valid_count:.1%})
	🟢 Very Low Risk: {very_low_risk} ({very_low_risk/valid_count:.1%})

	Results saved to: {output_path}
	"""
	else:
	summary = f"""
	❌ Batch Processing Failed

	Total Transactions: {total_transactions}
	Successfully Processed: 0
	All transactions encountered errors.

	Please check your CSV format and try again.
	"""

	return summary, output_path

	except Exception as e:
	return f"❌ Error processing CSV: {str(e)}", None

	# Create Gradio interface
	with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app:

	gr.Markdown("""
	# 🔒 Credit Card Fraud Detection System

	This system uses XGBoost machine learning to assess the risk of credit card transactions being fraudulent.
	Enter transaction details for single prediction or upload CSV for batch processing.

	Risk Levels:
	- 🔴 High Risk (≥80%): Block transaction immediately
	- 🟡 Medium Risk (50-79%): Manual review required
	- 🟠 Low Risk (20-49%): Monitor transaction
	- 🟢 Very Low Risk (<20%): Process normally
	""")

	with gr.Tabs():

	# Single Transaction Tab
	with gr.TabItem("🔍 Single Transaction"):
	gr.Markdown("""
	### Single Transaction Fraud Detection
	Enter the transaction details below for instant fraud risk assessment.
	""")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📝 Transaction Information")
	transaction_id = gr.Number(label="Transaction ID", value=123456, precision=0)
	transaction_dt = gr.Number(label="Transaction DateTime (seconds)", value=18403200, precision=0)
	transaction_amt = gr.Number(label="Transaction Amount ($)", value=150.00)
	product_cd = gr.Dropdown(
	choices=["W", "C", "S", "R", "H"],
	label="Product Code",
	value="W"
	)

	gr.Markdown("### 💳 Card Information")
	card1 = gr.Number(label="Card 1", value=4532015112830366, precision=0)
	card2 = gr.Number(label="Card 2", value=404.0)
	card3 = gr.Number(label="Card 3", value=150.0)

	with gr.Column():
	gr.Markdown("### 💳 Card Details")
	card4 = gr.Dropdown(
	choices=["visa", "mastercard", "american express", "discover"],
	label="Card Type",
	value="visa"
	)
	card5 = gr.Number(label="Card 5", value=142.0)
	card6 = gr.Dropdown(
	choices=["credit", "debit"],
	label="Card Category",
	value="credit"
	)

	gr.Markdown("### 📍 Address Information")
	addr1 = gr.Number(label="Address 1", value=315.0)
	addr2 = gr.Number(label="Address 2", value=87.0)

	gr.Markdown("### 📧 Email Information")
	p_emaildomain = gr.Textbox(label="Email Domain", value="gmail.com")

	predict_btn = gr.Button("🔍 Analyze Transaction", variant="primary", size="lg")

	gr.Markdown("### 📊 Prediction Results")
	with gr.Row():
	risk_output = gr.Textbox(label="Risk Assessment", lines=1)
	probability_output = gr.Textbox(label="Fraud Probability", lines=1)

	with gr.Row():
	risk_level_output = gr.Textbox(label="Risk Level", lines=1)
	recommendation_output = gr.Textbox(label="Recommendation", lines=2)

	predict_btn.click(
	predict_single_transaction,
	inputs=[
	transaction_id, transaction_dt, transaction_amt, product_cd,
	card1, card2, card3, card4, card5, card6,
	addr1, addr2, p_emaildomain
	],
	outputs=[risk_output, probability_output, risk_level_output, recommendation_output]
	)

	# Batch Processing Tab
	with gr.TabItem("📁 Batch Processing"):
	gr.Markdown("""
	### CSV Batch Processing

	Upload a CSV file containing multiple transactions for batch fraud detection.

	Required CSV Columns:
	- `TransactionAmt` (required)
	- `TransactionID` (optional - will be auto-generated)
	- `TransactionDT`, `ProductCD`, `card1-6`, `addr1-2`, `P_emaildomain` (optional - smart defaults used)

	Example CSV Format:
	```
	TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain
	123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com
	123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com
	```
	""")

	file_upload = gr.File(
	label="Upload CSV File",
	file_types=[".csv"],
	elem_id="csv-upload"
	)
	batch_btn = gr.Button("🔍 Process Batch", variant="primary", size="lg")

	gr.Markdown("### 📊 Batch Results")
	batch_output = gr.Textbox(label="Processing Summary", lines=12)
	download_file = gr.File(label="Download Results CSV")

	batch_btn.click(
	predict_batch_from_csv,
	inputs=[file_upload],
	outputs=[batch_output, download_file]
	)

	# Sample Data Tab
	with gr.TabItem("📋 Sample Data"):
	gr.Markdown("""
	### Sample Transaction Data

	Use these examples to test the system or as a template for your CSV files.
	""")

	gr.Markdown("""
	#### Example 1: Low Risk Transaction
	```json
	{
	"TransactionID": 123456,
	"TransactionDT": 18403200,
	"TransactionAmt": 150.00,
	"ProductCD": "W",
	"card1": 4532015112830366,
	"card2": 404.0,
	"card3": 150.0,
	"card4": "visa",
	"card5": 142.0,
	"card6": "credit",
	"addr1": 315.0,
	"addr2": 87.0,
	"P_emaildomain": "gmail.com"
	}
	```

	#### Example 2: Higher Risk Transaction
	```json
	{
	"TransactionID": 123457,
	"TransactionDT": 18403300,
	"TransactionAmt": 2500.00,
	"ProductCD": "C",
	"card1": 5555555555554444,
	"card2": 555.0,
	"card3": 200.0,
	"card4": "mastercard",
	"card5": 224.0,
	"card6": "credit",
	"addr1": 420.0,
	"addr2": 95.0,
	"P_emaildomain": "yahoo.com"
	}
	```

	#### CSV Sample File
	You can copy this into a CSV file for batch testing:
	```
	TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain
	123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com
	123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com
	123458,18403400,75.50,W,4111111111111111,300.0,75.0,visa,100.0,debit,200.0,50.0,hotmail.com
	```
	""")

	# Model Info Tab
	with gr.TabItem("ℹ️ Model Information"):
	if model_loaded and fraud_model.metadata:
	model_info = fraud_model.get_model_info()
	gr.Markdown(f"""
	### Model Status
	Status: ✅ {model_info.get('model_name', 'XGBoost')} Model Loaded
	AUC Score: {model_info.get('auc_score', 'N/A')}
	Training Date: {model_info.get('training_timestamp', 'N/A')}
	Features: {model_info.get('feature_count', 'N/A')}

	### About This Model
	This fraud detection system uses an XGBoost classifier trained on a comprehensive dataset
	of credit card transactions. The model achieved high performance with advanced feature engineering
	and ensemble learning techniques.

	### Supported Fields
	- TransactionID: Unique transaction identifier
	- TransactionDT: Transaction datetime (seconds)
	- TransactionAmt: Transaction amount in USD
	- ProductCD: Product code (W, C, S, R, H)
	- card1-6: Card-related features
	- addr1-2: Address information
	- P_emaildomain: Primary email domain

	### Model Performance
	- Algorithm: XGBoost (Extreme Gradient Boosting)
	- AUC Score: {model_info.get('auc_score', 'N/A')}
	- Features Used: {model_info.get('feature_count', 'N/A')} engineered features
	- Training Method: Cross-validation with stratified sampling
	- Speed: Real-time predictions (<100ms)
	""")
	else:
	gr.Markdown(f"""
	### Model Status
	Status: {'✅ Basic Model Loaded' if model_loaded else '❌ Not Loaded'}

	### About This Model
	This fraud detection system uses advanced machine learning algorithms to assess transaction risk.
	The model processes transactions with the same field structure as the API endpoints.

	### Features
	- Single transaction analysis
	- Batch CSV processing
	- Real-time risk assessment
	- API-compatible field structure
	""")

	# Launch the app
	if __name__ == "__main__":
	app.launch()