Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import numpy as np
|
|
| 4 |
import joblib
|
| 5 |
from model_wrapper import FraudDetectionModel
|
| 6 |
from preprocessor import FraudDataPreprocessor
|
|
|
|
| 7 |
import os
|
| 8 |
|
| 9 |
# Initialize the fraud detection model
|
|
@@ -11,6 +12,10 @@ fraud_model = FraudDetectionModel()
|
|
| 11 |
|
| 12 |
# Load model if files exist
|
| 13 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Load the specific XGBoost model files from your training
|
| 15 |
model_path = "fraud_detection_model_xgboost_20250727_145448.joblib"
|
| 16 |
preprocessor_path = "preprocessor_20250727_145448.joblib"
|
|
@@ -22,83 +27,61 @@ try:
|
|
| 22 |
else:
|
| 23 |
fraud_model.load_model(model_path, preprocessor_path)
|
| 24 |
model_loaded = True
|
|
|
|
| 25 |
else:
|
| 26 |
model_loaded = False
|
| 27 |
-
print("Model files not found. Please upload the following files:")
|
| 28 |
print("- fraud_detection_model_xgboost_20250727_145448.joblib")
|
| 29 |
print("- preprocessor_20250727_145448.joblib")
|
| 30 |
print("- model_metadata_20250727_145448.joblib")
|
| 31 |
except Exception as e:
|
| 32 |
model_loaded = False
|
| 33 |
-
print(f"Error loading model: {e}")
|
| 34 |
|
| 35 |
-
def
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
addr1,
|
| 41 |
-
addr2,
|
| 42 |
card1,
|
| 43 |
card2,
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
| 48 |
):
|
| 49 |
-
"""Predict fraud risk for a transaction"""
|
| 50 |
|
| 51 |
if not model_loaded:
|
| 52 |
return "β Model not loaded. Please contact administrator.", "", "", ""
|
| 53 |
|
| 54 |
try:
|
| 55 |
-
# Prepare transaction data
|
| 56 |
transaction_data = {
|
| 57 |
-
'
|
| 58 |
-
'
|
| 59 |
-
'
|
| 60 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
'addr1': float(addr1) if addr1 else None,
|
| 62 |
'addr2': float(addr2) if addr2 else None,
|
| 63 |
-
'
|
| 64 |
-
'
|
| 65 |
-
'card3': float(transaction_amount), # Often similar to transaction amount
|
| 66 |
-
'card5': 142.0, # Default value
|
| 67 |
-
'card6': 'credit', # Default value
|
| 68 |
-
'dist1': float(dist1) if dist1 else None,
|
| 69 |
-
'dist2': float(dist1) if dist1 else None, # Often similar to dist1
|
| 70 |
-
'C1': float(c1),
|
| 71 |
-
'C2': float(c2),
|
| 72 |
-
'C3': float(c3),
|
| 73 |
-
'C4': float(c4),
|
| 74 |
-
'C5': float(c5),
|
| 75 |
-
'C6': float(c6),
|
| 76 |
-
'C7': 0.0,
|
| 77 |
-
'C8': 0.0,
|
| 78 |
-
'C9': 1.0,
|
| 79 |
-
'C10': 0.0,
|
| 80 |
-
'C11': 1.0,
|
| 81 |
-
'C12': 1.0,
|
| 82 |
-
'C13': 1.0,
|
| 83 |
-
'C14': 1.0,
|
| 84 |
-
'D1': float(d1),
|
| 85 |
-
'D2': float(d2),
|
| 86 |
-
'D3': float(d3),
|
| 87 |
-
'D4': float(d4),
|
| 88 |
-
'D5': float(d5),
|
| 89 |
-
'D10': 0.0,
|
| 90 |
-
'D15': 0.0,
|
| 91 |
-
'M1': m1,
|
| 92 |
-
'M2': m2,
|
| 93 |
-
'M3': m3,
|
| 94 |
-
'M4': m4,
|
| 95 |
-
'M5': m5,
|
| 96 |
-
'M6': m6,
|
| 97 |
-
'TransactionDT': transaction_hour * 3600 # Convert hour to seconds
|
| 98 |
}
|
| 99 |
|
|
|
|
|
|
|
|
|
|
| 100 |
# Make prediction
|
| 101 |
-
result = fraud_model.predict_single_transaction(
|
| 102 |
|
| 103 |
if 'error' in result:
|
| 104 |
return f"β {result['error']}", "", "", ""
|
|
@@ -123,44 +106,111 @@ def predict_fraud_risk(
|
|
| 123 |
except Exception as e:
|
| 124 |
return f"β Error: {str(e)}", "", "", ""
|
| 125 |
|
| 126 |
-
def
|
| 127 |
"""Predict fraud risk for multiple transactions from CSV"""
|
| 128 |
|
| 129 |
if not model_loaded:
|
| 130 |
-
return "β Model not loaded. Please contact administrator."
|
| 131 |
|
| 132 |
if file is None:
|
| 133 |
-
return "β Please upload a CSV file."
|
| 134 |
|
| 135 |
try:
|
| 136 |
# Read CSV file
|
| 137 |
df = pd.read_csv(file.name)
|
| 138 |
|
| 139 |
-
#
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
# Save results
|
| 143 |
-
output_path = "
|
| 144 |
results_df.to_csv(output_path, index=False)
|
| 145 |
|
| 146 |
# Create summary
|
|
|
|
| 147 |
total_transactions = len(results_df)
|
| 148 |
-
|
| 149 |
-
medium_risk = len(results_df[(results_df['fraud_probability'] >= 0.5) & (results_df['fraud_probability'] < 0.8)])
|
| 150 |
-
low_risk = len(results_df[(results_df['fraud_probability'] >= 0.2) & (results_df['fraud_probability'] < 0.5)])
|
| 151 |
-
very_low_risk = len(results_df[results_df['fraud_probability'] < 0.2])
|
| 152 |
-
|
| 153 |
-
summary = f"""
|
| 154 |
-
π **Batch Prediction Summary**
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
return summary, output_path
|
| 166 |
|
|
@@ -173,8 +223,8 @@ with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app:
|
|
| 173 |
gr.Markdown("""
|
| 174 |
# π Credit Card Fraud Detection System
|
| 175 |
|
| 176 |
-
This system uses machine learning to assess the risk of credit card transactions being fraudulent.
|
| 177 |
-
Enter transaction details
|
| 178 |
|
| 179 |
**Risk Levels:**
|
| 180 |
- π΄ High Risk (β₯80%): Block transaction immediately
|
|
@@ -186,52 +236,53 @@ with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app:
|
|
| 186 |
with gr.Tabs():
|
| 187 |
|
| 188 |
# Single Transaction Tab
|
| 189 |
-
with gr.TabItem("Single Transaction"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
with gr.Row():
|
| 191 |
with gr.Column():
|
| 192 |
-
gr.Markdown("### Transaction
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
| 198 |
)
|
| 199 |
-
email_domain = gr.Textbox(label="Email Domain", value="gmail.com")
|
| 200 |
-
transaction_hour = gr.Slider(0, 23, label="Transaction Hour", value=12)
|
| 201 |
|
| 202 |
-
gr.Markdown("###
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
card2 = gr.Number(label="Card 2", value=150.0)
|
| 207 |
-
dist1 = gr.Number(label="Distance 1", value=19.0)
|
| 208 |
|
| 209 |
with gr.Column():
|
| 210 |
-
gr.Markdown("###
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
gr.Markdown("###
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
d3 = gr.Number(label="D3", value=0.0)
|
| 222 |
-
d4 = gr.Number(label="D4", value=0.0)
|
| 223 |
-
d5 = gr.Number(label="D5", value=20.0)
|
| 224 |
|
| 225 |
-
gr.Markdown("###
|
| 226 |
-
|
| 227 |
-
m2 = gr.Dropdown(choices=["T", "F"], label="M2", value="T")
|
| 228 |
-
m3 = gr.Dropdown(choices=["T", "F"], label="M3", value="T")
|
| 229 |
-
m4 = gr.Dropdown(choices=["M0", "M1", "M2"], label="M4", value="M0")
|
| 230 |
-
m5 = gr.Dropdown(choices=["T", "F"], label="M5", value="F")
|
| 231 |
-
m6 = gr.Dropdown(choices=["T", "F"], label="M6", value="F")
|
| 232 |
|
| 233 |
predict_btn = gr.Button("π Analyze Transaction", variant="primary", size="lg")
|
| 234 |
|
|
|
|
| 235 |
with gr.Row():
|
| 236 |
risk_output = gr.Textbox(label="Risk Assessment", lines=1)
|
| 237 |
probability_output = gr.Textbox(label="Fraud Probability", lines=1)
|
|
@@ -241,74 +292,146 @@ with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app:
|
|
| 241 |
recommendation_output = gr.Textbox(label="Recommendation", lines=2)
|
| 242 |
|
| 243 |
predict_btn.click(
|
| 244 |
-
|
| 245 |
inputs=[
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
d1, d2, d3, d4, d5,
|
| 250 |
-
m1, m2, m3, m4, m5, m6
|
| 251 |
],
|
| 252 |
outputs=[risk_output, probability_output, risk_level_output, recommendation_output]
|
| 253 |
)
|
| 254 |
|
| 255 |
# Batch Processing Tab
|
| 256 |
-
with gr.TabItem("Batch Processing"):
|
| 257 |
gr.Markdown("""
|
| 258 |
-
###
|
|
|
|
|
|
|
| 259 |
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
""")
|
| 263 |
|
| 264 |
-
file_upload = gr.File(
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
|
|
|
| 269 |
|
| 270 |
batch_btn.click(
|
| 271 |
-
|
| 272 |
inputs=[file_upload],
|
| 273 |
outputs=[batch_output, download_file]
|
| 274 |
)
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
# Model Info Tab
|
| 277 |
-
with gr.TabItem("Model Information"):
|
| 278 |
if model_loaded and fraud_model.metadata:
|
| 279 |
model_info = fraud_model.get_model_info()
|
| 280 |
gr.Markdown(f"""
|
| 281 |
### Model Status
|
| 282 |
-
**Status:** β
{model_info.get('model_name', 'XGBoost')} Model Loaded
|
| 283 |
-
**AUC Score:** {model_info.get('auc_score', 'N/A')}
|
| 284 |
-
**Training Date:** {model_info.get('training_timestamp', 'N/A')}
|
| 285 |
-
**Features:** {model_info.get('feature_count', 'N/A')}
|
| 286 |
|
| 287 |
### About This Model
|
| 288 |
This fraud detection system uses an **XGBoost classifier** trained on a comprehensive dataset
|
| 289 |
of credit card transactions. The model achieved high performance with advanced feature engineering
|
| 290 |
and ensemble learning techniques.
|
| 291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
### Model Performance
|
| 293 |
- **Algorithm**: XGBoost (Extreme Gradient Boosting)
|
| 294 |
- **AUC Score**: {model_info.get('auc_score', 'N/A')}
|
| 295 |
- **Features Used**: {model_info.get('feature_count', 'N/A')} engineered features
|
| 296 |
- **Training Method**: Cross-validation with stratified sampling
|
| 297 |
- **Speed**: Real-time predictions (<100ms)
|
| 298 |
-
|
| 299 |
-
### Features Used
|
| 300 |
-
The model processes over 40 features including:
|
| 301 |
-
- **Transaction Details**: Amount, timing, frequency patterns
|
| 302 |
-
- **Card Information**: Type, issuer details, security features
|
| 303 |
-
- **User Behaviour**: Email domains, address patterns, historical counts
|
| 304 |
-
- **Device & Session**: Geographic data, device fingerprinting
|
| 305 |
-
- **Engineered Features**: Ratios, transformations, temporal patterns
|
| 306 |
-
|
| 307 |
-
### XGBoost Advantages
|
| 308 |
-
- **High Accuracy**: Excellent performance on tabular data
|
| 309 |
-
- **Feature Importance**: Clear understanding of decision factors
|
| 310 |
-
- **Robustness**: Handles missing values and outliers well
|
| 311 |
-
- **Scalability**: Efficient training and inference
|
| 312 |
""")
|
| 313 |
else:
|
| 314 |
gr.Markdown(f"""
|
|
@@ -317,21 +440,13 @@ with gr.Blocks(title="Fraud Detection System", theme=gr.themes.Soft()) as app:
|
|
| 317 |
|
| 318 |
### About This Model
|
| 319 |
This fraud detection system uses advanced machine learning algorithms to assess transaction risk.
|
| 320 |
-
The model
|
| 321 |
-
including transaction amount, card details, user behaviour patterns, and timing information.
|
| 322 |
-
|
| 323 |
-
### Features Used
|
| 324 |
-
- Transaction amount and timing
|
| 325 |
-
- Card information (type, numbers)
|
| 326 |
-
- Email domain patterns
|
| 327 |
-
- Address information
|
| 328 |
-
- User behaviour counts
|
| 329 |
-
- Device and session data
|
| 330 |
|
| 331 |
-
###
|
| 332 |
-
-
|
| 333 |
-
-
|
| 334 |
-
-
|
|
|
|
| 335 |
""")
|
| 336 |
|
| 337 |
# Launch the app
|
|
|
|
| 4 |
import joblib
|
| 5 |
from model_wrapper import FraudDetectionModel
|
| 6 |
from preprocessor import FraudDataPreprocessor
|
| 7 |
+
from feature_utils import fill_missing_features
|
| 8 |
import os
|
| 9 |
|
| 10 |
# Initialize the fraud detection model
|
|
|
|
| 12 |
|
| 13 |
# Load model if files exist
|
| 14 |
try:
|
| 15 |
+
# First, ensure the FraudDataPreprocessor class is available
|
| 16 |
+
import sys
|
| 17 |
+
sys.modules['__main__'].FraudDataPreprocessor = FraudDataPreprocessor
|
| 18 |
+
|
| 19 |
# Load the specific XGBoost model files from your training
|
| 20 |
model_path = "fraud_detection_model_xgboost_20250727_145448.joblib"
|
| 21 |
preprocessor_path = "preprocessor_20250727_145448.joblib"
|
|
|
|
| 27 |
else:
|
| 28 |
fraud_model.load_model(model_path, preprocessor_path)
|
| 29 |
model_loaded = True
|
| 30 |
+
print(f"β
Model loaded successfully!")
|
| 31 |
else:
|
| 32 |
model_loaded = False
|
| 33 |
+
print("β Model files not found. Please upload the following files:")
|
| 34 |
print("- fraud_detection_model_xgboost_20250727_145448.joblib")
|
| 35 |
print("- preprocessor_20250727_145448.joblib")
|
| 36 |
print("- model_metadata_20250727_145448.joblib")
|
| 37 |
except Exception as e:
|
| 38 |
model_loaded = False
|
| 39 |
+
print(f"β Error loading model: {e}")
|
| 40 |
|
| 41 |
+
def predict_single_transaction(
|
| 42 |
+
transaction_id,
|
| 43 |
+
transaction_dt,
|
| 44 |
+
transaction_amt,
|
| 45 |
+
product_cd,
|
|
|
|
|
|
|
| 46 |
card1,
|
| 47 |
card2,
|
| 48 |
+
card3,
|
| 49 |
+
card4,
|
| 50 |
+
card5,
|
| 51 |
+
card6,
|
| 52 |
+
addr1,
|
| 53 |
+
addr2,
|
| 54 |
+
p_emaildomain
|
| 55 |
):
|
| 56 |
+
"""Predict fraud risk for a single transaction with exact API fields"""
|
| 57 |
|
| 58 |
if not model_loaded:
|
| 59 |
return "β Model not loaded. Please contact administrator.", "", "", ""
|
| 60 |
|
| 61 |
try:
|
| 62 |
+
# Prepare transaction data exactly as API expects
|
| 63 |
transaction_data = {
|
| 64 |
+
'TransactionID': int(transaction_id) if transaction_id else 123456,
|
| 65 |
+
'TransactionDT': int(transaction_dt) if transaction_dt else 18403200,
|
| 66 |
+
'TransactionAmt': float(transaction_amt),
|
| 67 |
+
'ProductCD': product_cd,
|
| 68 |
+
'card1': int(card1) if card1 else None,
|
| 69 |
+
'card2': float(card2) if card2 else None,
|
| 70 |
+
'card3': float(card3) if card3 else None,
|
| 71 |
+
'card4': card4,
|
| 72 |
+
'card5': float(card5) if card5 else None,
|
| 73 |
+
'card6': card6,
|
| 74 |
'addr1': float(addr1) if addr1 else None,
|
| 75 |
'addr2': float(addr2) if addr2 else None,
|
| 76 |
+
'P_emaildomain': p_emaildomain,
|
| 77 |
+
'R_emaildomain': p_emaildomain # Often same as P_emaildomain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
}
|
| 79 |
|
| 80 |
+
# Fill missing features with defaults
|
| 81 |
+
complete_data = fill_missing_features(transaction_data)
|
| 82 |
+
|
| 83 |
# Make prediction
|
| 84 |
+
result = fraud_model.predict_single_transaction(complete_data)
|
| 85 |
|
| 86 |
if 'error' in result:
|
| 87 |
return f"β {result['error']}", "", "", ""
|
|
|
|
| 106 |
except Exception as e:
|
| 107 |
return f"β Error: {str(e)}", "", "", ""
|
| 108 |
|
| 109 |
+
def predict_batch_from_csv(file):
|
| 110 |
"""Predict fraud risk for multiple transactions from CSV"""
|
| 111 |
|
| 112 |
if not model_loaded:
|
| 113 |
+
return "β Model not loaded. Please contact administrator.", None
|
| 114 |
|
| 115 |
if file is None:
|
| 116 |
+
return "β Please upload a CSV file.", None
|
| 117 |
|
| 118 |
try:
|
| 119 |
# Read CSV file
|
| 120 |
df = pd.read_csv(file.name)
|
| 121 |
|
| 122 |
+
# Validate required columns
|
| 123 |
+
required_cols = ['TransactionAmt']
|
| 124 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
| 125 |
+
|
| 126 |
+
if missing_cols:
|
| 127 |
+
return f"β Missing required columns: {missing_cols}. Please ensure your CSV has at least 'TransactionAmt' column.", None
|
| 128 |
+
|
| 129 |
+
# Add default TransactionID if not present
|
| 130 |
+
if 'TransactionID' not in df.columns:
|
| 131 |
+
df['TransactionID'] = range(1, len(df) + 1)
|
| 132 |
+
|
| 133 |
+
# Process each row and make predictions
|
| 134 |
+
results = []
|
| 135 |
+
|
| 136 |
+
for idx, row in df.iterrows():
|
| 137 |
+
try:
|
| 138 |
+
# Fill missing features for this row
|
| 139 |
+
transaction_data = row.to_dict()
|
| 140 |
+
complete_data = fill_missing_features(transaction_data)
|
| 141 |
+
|
| 142 |
+
# Make prediction
|
| 143 |
+
result = fraud_model.predict_single_transaction(complete_data)
|
| 144 |
+
|
| 145 |
+
if 'error' not in result:
|
| 146 |
+
# Add results to original row data
|
| 147 |
+
row_result = row.copy()
|
| 148 |
+
row_result['fraud_probability'] = result['fraud_probability']
|
| 149 |
+
row_result['risk_level'] = result['risk_level']
|
| 150 |
+
row_result['recommendation'] = result['recommendation']
|
| 151 |
+
row_result['is_suspicious'] = result['is_suspicious']
|
| 152 |
+
else:
|
| 153 |
+
# Handle prediction error
|
| 154 |
+
row_result = row.copy()
|
| 155 |
+
row_result['fraud_probability'] = None
|
| 156 |
+
row_result['risk_level'] = 'Error'
|
| 157 |
+
row_result['recommendation'] = result.get('error', 'Prediction failed')
|
| 158 |
+
row_result['is_suspicious'] = False
|
| 159 |
+
|
| 160 |
+
results.append(row_result)
|
| 161 |
+
|
| 162 |
+
except Exception as e:
|
| 163 |
+
# Handle row processing error
|
| 164 |
+
row_result = row.copy()
|
| 165 |
+
row_result['fraud_probability'] = None
|
| 166 |
+
row_result['risk_level'] = 'Error'
|
| 167 |
+
row_result['recommendation'] = f'Processing error: {str(e)}'
|
| 168 |
+
row_result['is_suspicious'] = False
|
| 169 |
+
results.append(row_result)
|
| 170 |
+
|
| 171 |
+
# Create results DataFrame
|
| 172 |
+
results_df = pd.DataFrame(results)
|
| 173 |
|
| 174 |
# Save results
|
| 175 |
+
output_path = "fraud_predictions_batch.csv"
|
| 176 |
results_df.to_csv(output_path, index=False)
|
| 177 |
|
| 178 |
# Create summary
|
| 179 |
+
valid_predictions = results_df[results_df['fraud_probability'].notna()]
|
| 180 |
total_transactions = len(results_df)
|
| 181 |
+
valid_count = len(valid_predictions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
+
if valid_count > 0:
|
| 184 |
+
high_risk = len(valid_predictions[valid_predictions['fraud_probability'] >= 0.8])
|
| 185 |
+
medium_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.5) & (valid_predictions['fraud_probability'] < 0.8)])
|
| 186 |
+
low_risk = len(valid_predictions[(valid_predictions['fraud_probability'] >= 0.2) & (valid_predictions['fraud_probability'] < 0.5)])
|
| 187 |
+
very_low_risk = len(valid_predictions[valid_predictions['fraud_probability'] < 0.2])
|
| 188 |
+
|
| 189 |
+
summary = f"""
|
| 190 |
+
π **Batch Prediction Summary**
|
| 191 |
+
|
| 192 |
+
Total Transactions: {total_transactions}
|
| 193 |
+
Successfully Processed: {valid_count}
|
| 194 |
+
Errors: {total_transactions - valid_count}
|
| 195 |
+
|
| 196 |
+
**Risk Distribution:**
|
| 197 |
+
π΄ High Risk: {high_risk} ({high_risk/valid_count:.1%})
|
| 198 |
+
π‘ Medium Risk: {medium_risk} ({medium_risk/valid_count:.1%})
|
| 199 |
+
π Low Risk: {low_risk} ({low_risk/valid_count:.1%})
|
| 200 |
+
π’ Very Low Risk: {very_low_risk} ({very_low_risk/valid_count:.1%})
|
| 201 |
+
|
| 202 |
+
Results saved to: {output_path}
|
| 203 |
+
"""
|
| 204 |
+
else:
|
| 205 |
+
summary = f"""
|
| 206 |
+
β **Batch Processing Failed**
|
| 207 |
+
|
| 208 |
+
Total Transactions: {total_transactions}
|
| 209 |
+
Successfully Processed: 0
|
| 210 |
+
All transactions encountered errors.
|
| 211 |
+
|
| 212 |
+
Please check your CSV format and try again.
|
| 213 |
+
"""
|
| 214 |
|
| 215 |
return summary, output_path
|
| 216 |
|
|
|
|
| 223 |
gr.Markdown("""
|
| 224 |
# π Credit Card Fraud Detection System
|
| 225 |
|
| 226 |
+
This system uses **XGBoost machine learning** to assess the risk of credit card transactions being fraudulent.
|
| 227 |
+
Enter transaction details for single prediction or upload CSV for batch processing.
|
| 228 |
|
| 229 |
**Risk Levels:**
|
| 230 |
- π΄ High Risk (β₯80%): Block transaction immediately
|
|
|
|
| 236 |
with gr.Tabs():
|
| 237 |
|
| 238 |
# Single Transaction Tab
|
| 239 |
+
with gr.TabItem("π Single Transaction"):
|
| 240 |
+
gr.Markdown("""
|
| 241 |
+
### Single Transaction Fraud Detection
|
| 242 |
+
Enter the transaction details below for instant fraud risk assessment.
|
| 243 |
+
""")
|
| 244 |
+
|
| 245 |
with gr.Row():
|
| 246 |
with gr.Column():
|
| 247 |
+
gr.Markdown("### π Transaction Information")
|
| 248 |
+
transaction_id = gr.Number(label="Transaction ID", value=123456, precision=0)
|
| 249 |
+
transaction_dt = gr.Number(label="Transaction DateTime (seconds)", value=18403200, precision=0)
|
| 250 |
+
transaction_amt = gr.Number(label="Transaction Amount ($)", value=150.00)
|
| 251 |
+
product_cd = gr.Dropdown(
|
| 252 |
+
choices=["W", "C", "S", "R", "H"],
|
| 253 |
+
label="Product Code",
|
| 254 |
+
value="W"
|
| 255 |
)
|
|
|
|
|
|
|
| 256 |
|
| 257 |
+
gr.Markdown("### π³ Card Information")
|
| 258 |
+
card1 = gr.Number(label="Card 1", value=4532015112830366, precision=0)
|
| 259 |
+
card2 = gr.Number(label="Card 2", value=404.0)
|
| 260 |
+
card3 = gr.Number(label="Card 3", value=150.0)
|
|
|
|
|
|
|
| 261 |
|
| 262 |
with gr.Column():
|
| 263 |
+
gr.Markdown("### π³ Card Details")
|
| 264 |
+
card4 = gr.Dropdown(
|
| 265 |
+
choices=["visa", "mastercard", "american express", "discover"],
|
| 266 |
+
label="Card Type",
|
| 267 |
+
value="visa"
|
| 268 |
+
)
|
| 269 |
+
card5 = gr.Number(label="Card 5", value=142.0)
|
| 270 |
+
card6 = gr.Dropdown(
|
| 271 |
+
choices=["credit", "debit"],
|
| 272 |
+
label="Card Category",
|
| 273 |
+
value="credit"
|
| 274 |
+
)
|
| 275 |
|
| 276 |
+
gr.Markdown("### π Address Information")
|
| 277 |
+
addr1 = gr.Number(label="Address 1", value=315.0)
|
| 278 |
+
addr2 = gr.Number(label="Address 2", value=87.0)
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
+
gr.Markdown("### π§ Email Information")
|
| 281 |
+
p_emaildomain = gr.Textbox(label="Email Domain", value="gmail.com")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
predict_btn = gr.Button("π Analyze Transaction", variant="primary", size="lg")
|
| 284 |
|
| 285 |
+
gr.Markdown("### π Prediction Results")
|
| 286 |
with gr.Row():
|
| 287 |
risk_output = gr.Textbox(label="Risk Assessment", lines=1)
|
| 288 |
probability_output = gr.Textbox(label="Fraud Probability", lines=1)
|
|
|
|
| 292 |
recommendation_output = gr.Textbox(label="Recommendation", lines=2)
|
| 293 |
|
| 294 |
predict_btn.click(
|
| 295 |
+
predict_single_transaction,
|
| 296 |
inputs=[
|
| 297 |
+
transaction_id, transaction_dt, transaction_amt, product_cd,
|
| 298 |
+
card1, card2, card3, card4, card5, card6,
|
| 299 |
+
addr1, addr2, p_emaildomain
|
|
|
|
|
|
|
| 300 |
],
|
| 301 |
outputs=[risk_output, probability_output, risk_level_output, recommendation_output]
|
| 302 |
)
|
| 303 |
|
| 304 |
# Batch Processing Tab
|
| 305 |
+
with gr.TabItem("π Batch Processing"):
|
| 306 |
gr.Markdown("""
|
| 307 |
+
### CSV Batch Processing
|
| 308 |
+
|
| 309 |
+
Upload a CSV file containing multiple transactions for batch fraud detection.
|
| 310 |
|
| 311 |
+
**Required CSV Columns:**
|
| 312 |
+
- `TransactionAmt` (required)
|
| 313 |
+
- `TransactionID` (optional - will be auto-generated)
|
| 314 |
+
- `TransactionDT`, `ProductCD`, `card1-6`, `addr1-2`, `P_emaildomain` (optional - smart defaults used)
|
| 315 |
+
|
| 316 |
+
**Example CSV Format:**
|
| 317 |
+
```
|
| 318 |
+
TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain
|
| 319 |
+
123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com
|
| 320 |
+
123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com
|
| 321 |
+
```
|
| 322 |
""")
|
| 323 |
|
| 324 |
+
file_upload = gr.File(
|
| 325 |
+
label="Upload CSV File",
|
| 326 |
+
file_types=[".csv"],
|
| 327 |
+
elem_id="csv-upload"
|
| 328 |
+
)
|
| 329 |
+
batch_btn = gr.Button("π Process Batch", variant="primary", size="lg")
|
| 330 |
|
| 331 |
+
gr.Markdown("### π Batch Results")
|
| 332 |
+
batch_output = gr.Textbox(label="Processing Summary", lines=12)
|
| 333 |
+
download_file = gr.File(label="Download Results CSV")
|
| 334 |
|
| 335 |
batch_btn.click(
|
| 336 |
+
predict_batch_from_csv,
|
| 337 |
inputs=[file_upload],
|
| 338 |
outputs=[batch_output, download_file]
|
| 339 |
)
|
| 340 |
|
| 341 |
+
# Sample Data Tab
|
| 342 |
+
with gr.TabItem("π Sample Data"):
|
| 343 |
+
gr.Markdown("""
|
| 344 |
+
### Sample Transaction Data
|
| 345 |
+
|
| 346 |
+
Use these examples to test the system or as a template for your CSV files.
|
| 347 |
+
""")
|
| 348 |
+
|
| 349 |
+
gr.Markdown("""
|
| 350 |
+
#### Example 1: Low Risk Transaction
|
| 351 |
+
```json
|
| 352 |
+
{
|
| 353 |
+
"TransactionID": 123456,
|
| 354 |
+
"TransactionDT": 18403200,
|
| 355 |
+
"TransactionAmt": 150.00,
|
| 356 |
+
"ProductCD": "W",
|
| 357 |
+
"card1": 4532015112830366,
|
| 358 |
+
"card2": 404.0,
|
| 359 |
+
"card3": 150.0,
|
| 360 |
+
"card4": "visa",
|
| 361 |
+
"card5": 142.0,
|
| 362 |
+
"card6": "credit",
|
| 363 |
+
"addr1": 315.0,
|
| 364 |
+
"addr2": 87.0,
|
| 365 |
+
"P_emaildomain": "gmail.com"
|
| 366 |
+
}
|
| 367 |
+
```
|
| 368 |
+
|
| 369 |
+
#### Example 2: Higher Risk Transaction
|
| 370 |
+
```json
|
| 371 |
+
{
|
| 372 |
+
"TransactionID": 123457,
|
| 373 |
+
"TransactionDT": 18403300,
|
| 374 |
+
"TransactionAmt": 2500.00,
|
| 375 |
+
"ProductCD": "C",
|
| 376 |
+
"card1": 5555555555554444,
|
| 377 |
+
"card2": 555.0,
|
| 378 |
+
"card3": 200.0,
|
| 379 |
+
"card4": "mastercard",
|
| 380 |
+
"card5": 224.0,
|
| 381 |
+
"card6": "credit",
|
| 382 |
+
"addr1": 420.0,
|
| 383 |
+
"addr2": 95.0,
|
| 384 |
+
"P_emaildomain": "yahoo.com"
|
| 385 |
+
}
|
| 386 |
+
```
|
| 387 |
+
|
| 388 |
+
#### CSV Sample File
|
| 389 |
+
You can copy this into a CSV file for batch testing:
|
| 390 |
+
```
|
| 391 |
+
TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,P_emaildomain
|
| 392 |
+
123456,18403200,150.00,W,4532015112830366,404.0,150.0,visa,142.0,credit,315.0,87.0,gmail.com
|
| 393 |
+
123457,18403300,2500.00,C,5555555555554444,555.0,200.0,mastercard,224.0,credit,420.0,95.0,yahoo.com
|
| 394 |
+
123458,18403400,75.50,W,4111111111111111,300.0,75.0,visa,100.0,debit,200.0,50.0,hotmail.com
|
| 395 |
+
```
|
| 396 |
+
""")
|
| 397 |
+
|
| 398 |
# Model Info Tab
|
| 399 |
+
with gr.TabItem("βΉοΈ Model Information"):
|
| 400 |
if model_loaded and fraud_model.metadata:
|
| 401 |
model_info = fraud_model.get_model_info()
|
| 402 |
gr.Markdown(f"""
|
| 403 |
### Model Status
|
| 404 |
+
**Status:** β
{model_info.get('model_name', 'XGBoost')} Model Loaded
|
| 405 |
+
**AUC Score:** {model_info.get('auc_score', 'N/A')}
|
| 406 |
+
**Training Date:** {model_info.get('training_timestamp', 'N/A')}
|
| 407 |
+
**Features:** {model_info.get('feature_count', 'N/A')}
|
| 408 |
|
| 409 |
### About This Model
|
| 410 |
This fraud detection system uses an **XGBoost classifier** trained on a comprehensive dataset
|
| 411 |
of credit card transactions. The model achieved high performance with advanced feature engineering
|
| 412 |
and ensemble learning techniques.
|
| 413 |
|
| 414 |
+
### API Compatible Interface
|
| 415 |
+
This interface matches the exact field structure expected by the fraud detection API:
|
| 416 |
+
|
| 417 |
+
**Single Prediction Endpoint:** `/v1/predict`
|
| 418 |
+
**Batch Prediction Endpoint:** `/v1/predict/batch`
|
| 419 |
+
|
| 420 |
+
### Supported Fields
|
| 421 |
+
- **TransactionID**: Unique transaction identifier
|
| 422 |
+
- **TransactionDT**: Transaction datetime (seconds)
|
| 423 |
+
- **TransactionAmt**: Transaction amount in USD
|
| 424 |
+
- **ProductCD**: Product code (W, C, S, R, H)
|
| 425 |
+
- **card1-6**: Card-related features
|
| 426 |
+
- **addr1-2**: Address information
|
| 427 |
+
- **P_emaildomain**: Primary email domain
|
| 428 |
+
|
| 429 |
### Model Performance
|
| 430 |
- **Algorithm**: XGBoost (Extreme Gradient Boosting)
|
| 431 |
- **AUC Score**: {model_info.get('auc_score', 'N/A')}
|
| 432 |
- **Features Used**: {model_info.get('feature_count', 'N/A')} engineered features
|
| 433 |
- **Training Method**: Cross-validation with stratified sampling
|
| 434 |
- **Speed**: Real-time predictions (<100ms)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
""")
|
| 436 |
else:
|
| 437 |
gr.Markdown(f"""
|
|
|
|
| 440 |
|
| 441 |
### About This Model
|
| 442 |
This fraud detection system uses advanced machine learning algorithms to assess transaction risk.
|
| 443 |
+
The model processes transactions with the same field structure as the API endpoints.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
+
### Features
|
| 446 |
+
- Single transaction analysis
|
| 447 |
+
- Batch CSV processing
|
| 448 |
+
- Real-time risk assessment
|
| 449 |
+
- API-compatible field structure
|
| 450 |
""")
|
| 451 |
|
| 452 |
# Launch the app
|