Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| from model_wrapper import FraudDetectionModel | |
| from preprocessor import FraudDataPreprocessor | |
| from feature_utils import fill_missing_features | |
| import os | |
| # Initialize the fraud detection model | |
| fraud_model = FraudDetectionModel() | |
| # Load model if files exist | |
| try: | |
| # First, ensure the FraudDataPreprocessor class is available | |
| import sys | |
| sys.modules['__main__'].FraudDataPreprocessor = FraudDataPreprocessor | |
| # Load the specific XGBoost model files from your training | |
| model_path = "fraud_detection_model_xgboost_20250727_145448.joblib" | |
| preprocessor_path = "preprocessor_20250727_145448.joblib" | |
| metadata_path = "model_metadata_20250727_145448.joblib" | |
| if os.path.exists(model_path) and os.path.exists(preprocessor_path): | |
| if os.path.exists(metadata_path): | |
| fraud_model.load_model(model_path, preprocessor_path, metadata_path) | |
| else: | |
| fraud_model.load_model(model_path, preprocessor_path) | |
| model_loaded = True | |
| print(f"β Model loaded successfully!") | |
| else: | |
| model_loaded = False | |
| print("β Model files not found. Please upload the following files:") | |
| print("- fraud_detection_model_xgboost_20250727_145448.joblib") | |
| print("- preprocessor_20250727_145448.joblib") | |
| print("- model_metadata_20250727_145448.joblib") | |
| except Exception as e: | |
| model_loaded = False | |
| print(f"β Error loading model: {e}") | |
| def predict_single_transaction( | |
| transaction_id, | |
| transaction_dt, | |
| transaction_amt, | |
| product_cd, | |
| card1, | |
| card2, | |
| card3, | |
| card4, | |
| card5, | |
| card6, | |
| addr1, | |
| addr2, | |
| p_emaildomain | |
| ): | |
| """Predict fraud risk for a single transaction with exact API fields""" | |
| if not model_loaded: | |
| return "β Model not loaded. Please contact administrator.", "", "", "" | |
| try: | |
| # Prepare transaction data exactly as API expects | |
| transaction_data = { | |
| 'TransactionID': int(transaction_id) if transaction_id else 123456, | |
| 'TransactionDT': int(transaction_dt) if transaction_dt else 18403200, | |
| 'TransactionAmt': float(transaction_amt), | |
| 'ProductCD': product_cd, | |
| 'card1': int(card1) if card1 else None, | |
| 'card2': float(card2) if card2 else None, | |
| 'card3': float(card3) if card3 else None, | |
| 'card4': card4, | |
| 'card5': float(card5) if card5 else None, | |
| 'card6': card6, | |
| 'addr1': float(addr1) if addr1 else None, | |
| 'addr2': float(addr2) if addr2 else None, | |
| 'P_emaildomain': p_emaildomain, | |
| 'R_emaildomain': p_emaildomain # Often same as P_emaildomain | |
| } | |
| # Fill missing features with defaults | |
| complete_data = fill_missing_features(transaction_data) | |
| # Make prediction | |
| result = fraud_model.predict_single_transaction(complete_data) | |
| if 'error' in result: | |
| return f"β {result['error']}", "", "", "" | |
| # Format results | |
| probability = result['fraud_probability'] | |
| risk_level = result['risk_level'] | |
| recommendation = result['recommendation'] | |
| # Create risk indicator | |
| if probability >= 0.8: | |
| risk_indicator = f"π΄ HIGH RISK ({probability:.1%})" | |
| elif probability >= 0.5: | |
| risk_indicator = f"π‘ MEDIUM RISK ({probability:.1%})" | |
| elif probability >= 0.2: | |
| risk_indicator = f"π LOW RISK ({probability:.1%})" | |
| else: | |
| risk_indicator = f"π’ VERY LOW RISK ({probability:.1%})" | |
| return risk_indicator, f"{probability:.4f}", risk_level, recommendation | |
| except Exception as e: | |
| return f"β Error: {str(e)}", "", "", "" | |
| def predict_batch_from_csv(file): | |
| """Predict fraud risk for multiple transactions from CSV""" | |
| if not model_loaded: | |
| return "β Model not loaded. Please contact administrator.", None | |
| if file is None: | |
| return "β Please upload a CSV file.", None | |
| try: | |
| # Read CSV file | |
| df = pd.read_csv(file.name) | |
| # Validate required columns | |
| required_cols = ['TransactionAmt'] | |
| missing_cols = [col for col in required_cols if col not in df.columns] | |
| if missing_cols: | |
| return f"β Missing required columns: {missing_cols}. Please ensure your CSV has at least 'TransactionAmt' column.", None | |
| # Add default TransactionID if not present | |
| if 'TransactionID' not in df.columns: | |
| df['TransactionID'] = range(1, len(df) + 1) | |
| # Process each row and make predictions | |
| results = [] | |
| for idx, row in df.iterrows(): | |
| try: | |
| # Fill missing features for this row | |
| transaction_data = row.to_dict() | |
| complete_data = fill_missing_features(transaction_data) | |
| # Make prediction | |
| result = fraud_model.predict_single_transaction(complete_data) | |
| if 'error' not in result: | |
| # Add results to original row data | |
| row_result = row.copy() | |
| row_result['fraud_probability'] = result['fraud_probability'] | |
| row_result['risk_level'] = result['risk_level'] | |
| row_result['recommendation'] = result['recommendation'] | |
| row_result['is_suspicious'] = result['is_suspicious'] | |
| else: | |
| # Handle prediction error | |
| row_result = row.copy() | |
| row_result['fraud_probability'] = None | |
| row_result['risk_level'] = 'Error' | |
| row_result['recommendation'] = result.get('error', 'Prediction failed') | |
| row_result['is_suspicious'] = False | |
| results.append(row_result) | |
| except Exception as e: | |
| # Handle row processing error | |
| row_result = row.copy() | |
| row_result['fraud_probability'] = None | |
| row_result['risk_level'] = 'Error' | |
| row_result['recommendation'] = f'Processing error: {str(e)}' | |
| row_result['is_suspicious'] = False | |
| results.append(row_result) | |
| # Create results DataFrame | |
| results_df = pd.DataFrame(results) | |
| # Save results | |
| output_path = "fraud_predictions_batch.csv" | |
| results_df.to_csv(output_path, index=False) | |
| # Create summary | |
| valid_predictions = results_df[results_df['fraud_probability'].notna()] | |
| total_transactions = len(results_df) | |
| valid_count = len(valid_predictions) | |
| if valid_count > 0: | |
| high_risk = len(valid_predictions[valid_predictions['fraud_probability'] >= 0.8]) | |
| medium_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.5) & (valid_predictions['fraud_probability'] < 0.8)]) | |
| low_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.2) & (valid_predictions['fraud_probability'] < 0.5)]) | |
| very_low_risk = len(valid_predictions[valid_predictions['fraud_probability'] < 0.2]) | |
| summary = f""" | |
| π **Batch Prediction Summary** | |
| Total Transactions: {total_transactions} | |
| Successfully Processed: {valid_count} | |
| Errors: {total_transactions - valid_count} | |
| **Risk Distribution:** | |
| π΄ High Risk: {high_risk} ({high_risk/valid_count:.1%}) | |
| π‘ Medium Risk: {medium_risk} ({medium_risk/valid_count:.1%}) | |
| π Low Risk: {low_risk} ({low_risk/valid_count:.1%}) | |
| π’ Very Low Risk: {very_low_risk} ({very_low_risk/valid_count:.1%}) | |
| Results saved to: {output_path} | |
| """ | |
| else: | |
| summary = f""" | |
| β **Batch Processing Failed** | |
| Total Transactions: {total_transactions} | |
| Successfully Processed: 0 | |
| All transactions encountered errors. | |
| Please check your CSV format and try again. | |
| """ | |
| return summary, output_path | |
| except Exception as e: | |
| return f"β Error processing CSV: {str(e)}", None | |
| # Create Gradio interface | |
| with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app: | |
| gr.Markdown(""" | |
| # π Credit Card Fraud Detection System | |
| This system uses **XGBoost machine learning** to assess the risk of credit card transactions being fraudulent. | |
| Enter transaction details for single prediction or upload CSV for batch processing. | |
| **Risk Levels:** | |
| - π΄ High Risk (β₯80%): Block transaction immediately | |
| - π‘ Medium Risk (50-79%): Manual review required | |
| - π Low Risk (20-49%): Monitor transaction | |
| - π’ Very Low Risk (<20%): Process normally | |
| """) | |
| with gr.Tabs(): | |
| # Single Transaction Tab | |
| with gr.TabItem("π Single Transaction"): | |
| gr.Markdown(""" | |
| ### Single Transaction Fraud Detection | |
| Enter the transaction details below for instant fraud risk assessment. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### π Transaction Information") | |
| transaction_id = gr.Number(label="Transaction ID", value=123456, precision=0) | |
| transaction_dt = gr.Number(label="Transaction DateTime (seconds)", value=18403200, precision=0) | |
| transaction_amt = gr.Number(label="Transaction Amount ($)", value=150.00) | |
| product_cd = gr.Dropdown( | |
| choices=["W", "C", "S", "R", "H"], | |
| label="Product Code", | |
| value="W" | |
| ) | |
| gr.Markdown("### π³ Card Information") | |
| card1 = gr.Number(label="Card 1", value=4532015112830366, precision=0) | |
| card2 = gr.Number(label="Card 2", value=404.0) | |
| card3 = gr.Number(label="Card 3", value=150.0) | |
| with gr.Column(): | |
| gr.Markdown("### π³ Card Details") | |
| card4 = gr.Dropdown( | |
| choices=["visa", "mastercard", "american express", "discover"], | |
| label="Card Type", | |
| value="visa" | |
| ) | |
| card5 = gr.Number(label="Card 5", value=142.0) | |
| card6 = gr.Dropdown( | |
| choices=["credit", "debit"], | |
| label="Card Category", | |
| value="credit" | |
| ) | |
| gr.Markdown("### π Address Information") | |
| addr1 = gr.Number(label="Address 1", value=315.0) | |
| addr2 = gr.Number(label="Address 2", value=87.0) | |
| gr.Markdown("### π§ Email Information") | |
| p_emaildomain = gr.Textbox(label="Email Domain", value="gmail.com") | |
| predict_btn = gr.Button("π Analyze Transaction", variant="primary", size="lg") | |
| gr.Markdown("### π Prediction Results") | |
| with gr.Row(): | |
| risk_output = gr.Textbox(label="Risk Assessment", lines=1) | |
| probability_output = gr.Textbox(label="Fraud Probability", lines=1) | |
| with gr.Row(): | |
| risk_level_output = gr.Textbox(label="Risk Level", lines=1) | |
| recommendation_output = gr.Textbox(label="Recommendation", lines=2) | |
| predict_btn.click( | |
| predict_single_transaction, | |
| inputs=[ | |
| transaction_id, transaction_dt, transaction_amt, product_cd, | |
| card1, card2, card3, card4, card5, card6, | |
| addr1, addr2, p_emaildomain | |
| ], | |
| outputs=[risk_output, probability_output, risk_level_output, recommendation_output] | |
| ) | |
| # Batch Processing Tab | |
| with gr.TabItem("π Batch Processing"): | |
| gr.Markdown(""" | |
| ### CSV Batch Processing | |
| Upload a CSV file containing multiple transactions for batch fraud detection. | |
| **Required CSV Columns:** | |
| - `TransactionAmt` (required) | |
| - `TransactionID` (optional - will be auto-generated) | |
| - `TransactionDT`, `ProductCD`, `card1-6`, `addr1-2`, `P_emaildomain` (optional - smart defaults used) | |
| **Example CSV Format:** | |
| ``` | |
| TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain | |
| 123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com | |
| 123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com | |
| ``` | |
| """) | |
| file_upload = gr.File( | |
| label="Upload CSV File", | |
| file_types=[".csv"], | |
| elem_id="csv-upload" | |
| ) | |
| batch_btn = gr.Button("π Process Batch", variant="primary", size="lg") | |
| gr.Markdown("### π Batch Results") | |
| batch_output = gr.Textbox(label="Processing Summary", lines=12) | |
| download_file = gr.File(label="Download Results CSV") | |
| batch_btn.click( | |
| predict_batch_from_csv, | |
| inputs=[file_upload], | |
| outputs=[batch_output, download_file] | |
| ) | |
| # Sample Data Tab | |
| with gr.TabItem("π Sample Data"): | |
| gr.Markdown(""" | |
| ### Sample Transaction Data | |
| Use these examples to test the system or as a template for your CSV files. | |
| """) | |
| gr.Markdown(""" | |
| #### Example 1: Low Risk Transaction | |
| ```json | |
| { | |
| "TransactionID": 123456, | |
| "TransactionDT": 18403200, | |
| "TransactionAmt": 150.00, | |
| "ProductCD": "W", | |
| "card1": 4532015112830366, | |
| "card2": 404.0, | |
| "card3": 150.0, | |
| "card4": "visa", | |
| "card5": 142.0, | |
| "card6": "credit", | |
| "addr1": 315.0, | |
| "addr2": 87.0, | |
| "P_emaildomain": "gmail.com" | |
| } | |
| ``` | |
| #### Example 2: Higher Risk Transaction | |
| ```json | |
| { | |
| "TransactionID": 123457, | |
| "TransactionDT": 18403300, | |
| "TransactionAmt": 2500.00, | |
| "ProductCD": "C", | |
| "card1": 5555555555554444, | |
| "card2": 555.0, | |
| "card3": 200.0, | |
| "card4": "mastercard", | |
| "card5": 224.0, | |
| "card6": "credit", | |
| "addr1": 420.0, | |
| "addr2": 95.0, | |
| "P_emaildomain": "yahoo.com" | |
| } | |
| ``` | |
| #### CSV Sample File | |
| You can copy this into a CSV file for batch testing: | |
| ``` | |
| TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain | |
| 123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com | |
| 123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com | |
| 123458,18403400,75.50,W,4111111111111111,300.0,75.0,visa,100.0,debit,200.0,50.0,hotmail.com | |
| ``` | |
| """) | |
| # Model Info Tab | |
| with gr.TabItem("βΉοΈ Model Information"): | |
| if model_loaded and fraud_model.metadata: | |
| model_info = fraud_model.get_model_info() | |
| gr.Markdown(f""" | |
| ### Model Status | |
| **Status:** β {model_info.get('model_name', 'XGBoost')} Model Loaded | |
| **AUC Score:** {model_info.get('auc_score', 'N/A')} | |
| **Training Date:** {model_info.get('training_timestamp', 'N/A')} | |
| **Features:** {model_info.get('feature_count', 'N/A')} | |
| ### About This Model | |
| This fraud detection system uses an **XGBoost classifier** trained on a comprehensive dataset | |
| of credit card transactions. The model achieved high performance with advanced feature engineering | |
| and ensemble learning techniques. | |
| ### Supported Fields | |
| - **TransactionID**: Unique transaction identifier | |
| - **TransactionDT**: Transaction datetime (seconds) | |
| - **TransactionAmt**: Transaction amount in USD | |
| - **ProductCD**: Product code (W, C, S, R, H) | |
| - **card1-6**: Card-related features | |
| - **addr1-2**: Address information | |
| - **P_emaildomain**: Primary email domain | |
| ### Model Performance | |
| - **Algorithm**: XGBoost (Extreme Gradient Boosting) | |
| - **AUC Score**: {model_info.get('auc_score', 'N/A')} | |
| - **Features Used**: {model_info.get('feature_count', 'N/A')} engineered features | |
| - **Training Method**: Cross-validation with stratified sampling | |
| - **Speed**: Real-time predictions (<100ms) | |
| """) | |
| else: | |
| gr.Markdown(f""" | |
| ### Model Status | |
| **Status:** {'β Basic Model Loaded' if model_loaded else 'β Not Loaded'} | |
| ### About This Model | |
| This fraud detection system uses advanced machine learning algorithms to assess transaction risk. | |
| The model processes transactions with the same field structure as the API endpoints. | |
| ### Features | |
| - Single transaction analysis | |
| - Batch CSV processing | |
| - Real-time risk assessment | |
| - API-compatible field structure | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| app.launch() | |