padmanabhbosamia commited on
Commit
aa1323d
ยท
verified ยท
1 Parent(s): 1402c39

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +375 -0
app.py ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
7
+ import pickle
8
+ import os
9
+
10
+ # Global variables to store the model and data
11
+ model = None
12
+ feature_columns = None
13
+
14
+ def load_and_train_model(csv_file):
15
+ """Load dataset and train a Random Forest model"""
16
+ global model, feature_columns
17
+
18
+ try:
19
+ # Read the uploaded CSV
20
+ df = pd.read_csv(csv_file.name)
21
+
22
+ # Check if 'fraud' column exists
23
+ if 'fraud' not in df.columns:
24
+ return "โŒ Error: CSV must contain a 'fraud' column as the target variable."
25
+
26
+ # Separate features and target
27
+ X = df.drop(['fraud', 'transaction_id'], axis=1, errors='ignore')
28
+ y = df['fraud']
29
+
30
+ feature_columns = X.columns.tolist()
31
+
32
+ # Split data
33
+ X_train, X_test, y_train, y_test = train_test_split(
34
+ X, y, test_size=0.2, random_state=42, stratify=y
35
+ )
36
+
37
+ # Train Random Forest model
38
+ model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
39
+ model.fit(X_train, y_train)
40
+
41
+ # Evaluate
42
+ y_pred = model.predict(X_test)
43
+
44
+ accuracy = accuracy_score(y_test, y_pred)
45
+ precision = precision_score(y_test, y_pred)
46
+ recall = recall_score(y_test, y_pred)
47
+ f1 = f1_score(y_test, y_pred)
48
+ cm = confusion_matrix(y_test, y_pred)
49
+
50
+ # Format results
51
+ results = f"""
52
+ โœ… **Model Trained Successfully!**
53
+
54
+ ๐Ÿ“Š **Dataset Information:**
55
+ - Total Samples: {len(df)}
56
+ - Training Samples: {len(X_train)}
57
+ - Test Samples: {len(X_test)}
58
+ - Fraud Cases: {y.sum()} ({y.mean()*100:.1f}%)
59
+ - Legitimate Cases: {(y==0).sum()} ({(y==0).mean()*100:.1f}%)
60
+
61
+ ๐Ÿ“ˆ **Model Performance:**
62
+ - **Accuracy:** {accuracy*100:.2f}%
63
+ - **Precision:** {precision*100:.2f}%
64
+ - **Recall:** {recall*100:.2f}%
65
+ - **F1-Score:** {f1*100:.2f}%
66
+
67
+ ๐Ÿ”ข **Confusion Matrix:**
68
+ ```
69
+ Predicted
70
+ Fraud Legitimate
71
+ Actual Fraud {cm[1][1]} {cm[1][0]}
72
+ Legit {cm[0][1]} {cm[0][0]}
73
+ ```
74
+
75
+ **Key Metrics Explained:**
76
+ - **True Positives (TP):** {cm[1][1]} frauds correctly detected
77
+ - **False Negatives (FN):** {cm[1][0]} frauds missed (โš ๏ธ costly!)
78
+ - **False Positives (FP):** {cm[0][1]} false alarms
79
+ - **True Negatives (TN):** {cm[0][0]} legitimate transactions correctly identified
80
+
81
+ โœ… Model is ready! You can now make predictions below.
82
+ """
83
+
84
+ return results
85
+
86
+ except Exception as e:
87
+ return f"โŒ Error: {str(e)}"
88
+
89
+
90
+ def predict_single_transaction(amount, hour, dist_home, dist_last, ratio_median,
91
+ repeat_retailer, used_chip, used_pin, online_order):
92
+ """Make a prediction for a single transaction"""
93
+ global model, feature_columns
94
+
95
+ if model is None:
96
+ return "โš ๏ธ Please upload and train a model first!", ""
97
+
98
+ try:
99
+ # Create input dataframe
100
+ input_data = pd.DataFrame({
101
+ 'transaction_amount': [amount],
102
+ 'transaction_hour': [hour],
103
+ 'distance_from_home_km': [dist_home],
104
+ 'distance_from_last_transaction_km': [dist_last],
105
+ 'ratio_to_median_purchase': [ratio_median],
106
+ 'repeat_retailer': [repeat_retailer],
107
+ 'used_chip': [used_chip],
108
+ 'used_pin': [used_pin],
109
+ 'online_order': [online_order]
110
+ })
111
+
112
+ # Make prediction
113
+ prediction = model.predict(input_data)[0]
114
+ probability = model.predict_proba(input_data)[0]
115
+
116
+ # Format result
117
+ fraud_prob = probability[1] * 100
118
+ legit_prob = probability[0] * 100
119
+
120
+ if prediction == 1:
121
+ result = f"๐Ÿšจ **FRAUD DETECTED**"
122
+ confidence = fraud_prob
123
+ color = "red"
124
+ else:
125
+ result = f"โœ… **LEGITIMATE TRANSACTION**"
126
+ confidence = legit_prob
127
+ color = "green"
128
+
129
+ details = f"""
130
+ {result}
131
+
132
+ **Confidence:** {confidence:.1f}%
133
+
134
+ **Probability Distribution:**
135
+ - Fraud: {fraud_prob:.1f}%
136
+ - Legitimate: {legit_prob:.1f}%
137
+
138
+ **Risk Level:** {'๐Ÿ”ด HIGH' if fraud_prob > 70 else '๐ŸŸก MEDIUM' if fraud_prob > 40 else '๐ŸŸข LOW'}
139
+
140
+ **Transaction Details:**
141
+ - Amount: ${amount:,.2f}
142
+ - Time: {hour}:00
143
+ - Distance from home: {dist_home:.1f} km
144
+ - Distance from last transaction: {dist_last:.1f} km
145
+ - Ratio to median: {ratio_median:.2f}x
146
+ - Repeat retailer: {'Yes' if repeat_retailer else 'No'}
147
+ - Used chip: {'Yes' if used_chip else 'No'}
148
+ - Used PIN: {'Yes' if used_pin else 'No'}
149
+ - Online order: {'Yes' if online_order else 'No'}
150
+ """
151
+
152
+ return details, result
153
+
154
+ except Exception as e:
155
+ return f"โŒ Error: {str(e)}", ""
156
+
157
+
158
+ def predict_batch(csv_file):
159
+ """Make predictions for batch of transactions"""
160
+ global model, feature_columns
161
+
162
+ if model is None:
163
+ return None, "โš ๏ธ Please upload and train a model first!"
164
+
165
+ try:
166
+ # Read CSV
167
+ df = pd.read_csv(csv_file.name)
168
+
169
+ # Keep original df for output
170
+ original_df = df.copy()
171
+
172
+ # Prepare features
173
+ X = df.drop(['fraud', 'transaction_id'], axis=1, errors='ignore')
174
+
175
+ # Make predictions
176
+ predictions = model.predict(X)
177
+ probabilities = model.predict_proba(X)
178
+
179
+ # Add predictions to dataframe
180
+ original_df['predicted_fraud'] = predictions
181
+ original_df['fraud_probability'] = probabilities[:, 1] * 100
182
+ original_df['confidence'] = np.max(probabilities, axis=1) * 100
183
+
184
+ # Calculate metrics if 'fraud' column exists
185
+ if 'fraud' in original_df.columns:
186
+ accuracy = accuracy_score(original_df['fraud'], predictions)
187
+ precision = precision_score(original_df['fraud'], predictions)
188
+ recall = recall_score(original_df['fraud'], predictions)
189
+ f1 = f1_score(original_df['fraud'], predictions)
190
+
191
+ metrics = f"""
192
+ ๐Ÿ“Š **Batch Prediction Results:**
193
+
194
+ - Total Transactions: {len(df)}
195
+ - Predicted Fraud: {predictions.sum()} ({predictions.mean()*100:.1f}%)
196
+ - Predicted Legitimate: {(predictions==0).sum()} ({(predictions==0).mean()*100:.1f}%)
197
+
198
+ ๐Ÿ“ˆ **Performance Metrics:**
199
+ - Accuracy: {accuracy*100:.2f}%
200
+ - Precision: {precision*100:.2f}%
201
+ - Recall: {recall*100:.2f}%
202
+ - F1-Score: {f1*100:.2f}%
203
+
204
+ โœ… Results are ready for download!
205
+ """
206
+ else:
207
+ metrics = f"""
208
+ ๐Ÿ“Š **Batch Prediction Results:**
209
+
210
+ - Total Transactions: {len(df)}
211
+ - Predicted Fraud: {predictions.sum()} ({predictions.mean()*100:.1f}%)
212
+ - Predicted Legitimate: {(predictions==0).sum()} ({(predictions==0).mean()*100:.1f}%)
213
+
214
+ โœ… Results are ready for download!
215
+ """
216
+
217
+ # Save results to temporary CSV
218
+ output_file = "predictions_output.csv"
219
+ original_df.to_csv(output_file, index=False)
220
+
221
+ return output_file, metrics
222
+
223
+ except Exception as e:
224
+ return None, f"โŒ Error: {str(e)}"
225
+
226
+
227
+ # Create Gradio interface
228
+ with gr.Blocks(title="Fraud Detection System") as demo:
229
+
230
+ gr.Markdown("""
231
+ # ๐Ÿ’ณ Credit Card Fraud Detection System
232
+ ### AI Infinity Programme | TalentSprint
233
+
234
+ This interactive demo allows you to train a fraud detection model and make predictions on credit card transactions.
235
+
236
+ **How to use:**
237
+ 1. Upload your training dataset (CSV file)
238
+ 2. Train the model
239
+ 3. Make single predictions or batch predictions
240
+ """)
241
+
242
+ with gr.Tab("๐Ÿ“ค Upload & Train Model"):
243
+ gr.Markdown("### Step 1: Upload Training Dataset")
244
+ gr.Markdown("Upload a CSV file containing transaction data with a 'fraud' column (0 = legitimate, 1 = fraud)")
245
+
246
+ with gr.Row():
247
+ with gr.Column():
248
+ train_file = gr.File(label="Upload Training CSV", file_types=[".csv"])
249
+ train_button = gr.Button("๐Ÿš€ Train Model", variant="primary", size="lg")
250
+
251
+ with gr.Column():
252
+ train_output = gr.Markdown(label="Training Results")
253
+
254
+ train_button.click(
255
+ fn=load_and_train_model,
256
+ inputs=[train_file],
257
+ outputs=[train_output]
258
+ )
259
+
260
+ gr.Markdown("""
261
+ ---
262
+ **Expected CSV format:**
263
+ - `transaction_amount`, `transaction_hour`, `distance_from_home_km`, `distance_from_last_transaction_km`,
264
+ - `ratio_to_median_purchase`, `repeat_retailer`, `used_chip`, `used_pin`, `online_order`, `fraud`
265
+ """)
266
+
267
+ with gr.Tab("๐Ÿ” Single Prediction"):
268
+ gr.Markdown("### Test Individual Transactions")
269
+ gr.Markdown("Enter transaction details to check if it's fraudulent")
270
+
271
+ with gr.Row():
272
+ with gr.Column():
273
+ amount = gr.Number(label="Transaction Amount ($)", value=100)
274
+ hour = gr.Slider(0, 23, step=1, label="Transaction Hour (0-23)", value=14)
275
+ dist_home = gr.Number(label="Distance from Home (km)", value=10)
276
+ dist_last = gr.Number(label="Distance from Last Transaction (km)", value=5)
277
+ ratio_median = gr.Number(label="Ratio to Median Purchase", value=1.0)
278
+
279
+ with gr.Column():
280
+ repeat_retailer = gr.Checkbox(label="Repeat Retailer", value=True)
281
+ used_chip = gr.Checkbox(label="Used Chip", value=True)
282
+ used_pin = gr.Checkbox(label="Used PIN", value=True)
283
+ online_order = gr.Checkbox(label="Online Order", value=False)
284
+
285
+ predict_button = gr.Button("๐Ÿ”ฎ Predict", variant="primary", size="lg")
286
+
287
+ with gr.Row():
288
+ prediction_output = gr.Markdown(label="Prediction Result")
289
+ prediction_label = gr.Markdown(label="Quick Result")
290
+
291
+ predict_button.click(
292
+ fn=predict_single_transaction,
293
+ inputs=[amount, hour, dist_home, dist_last, ratio_median,
294
+ repeat_retailer, used_chip, used_pin, online_order],
295
+ outputs=[prediction_output, prediction_label]
296
+ )
297
+
298
+ gr.Markdown("---")
299
+ gr.Markdown("### ๐Ÿงช Quick Test Scenarios")
300
+
301
+ with gr.Row():
302
+ gr.Markdown("""
303
+ **Scenario 1: Obvious Fraud**
304
+ - Amount: $4500, Hour: 3, Dist Home: 800km
305
+ - New retailer, no chip/PIN, online
306
+ """)
307
+ gr.Markdown("""
308
+ **Scenario 2: Normal Transaction**
309
+ - Amount: $45, Hour: 14, Dist Home: 5km
310
+ - Repeat retailer, chip + PIN, in-person
311
+ """)
312
+ gr.Markdown("""
313
+ **Scenario 3: Suspicious**
314
+ - Amount: $350, Hour: 22, Dist Home: 60km
315
+ - New retailer, chip but no PIN, online
316
+ """)
317
+
318
+ with gr.Tab("๐Ÿ“Š Batch Predictions"):
319
+ gr.Markdown("### Upload Multiple Transactions")
320
+ gr.Markdown("Upload a CSV file with multiple transactions to get predictions for all of them")
321
+
322
+ with gr.Row():
323
+ with gr.Column():
324
+ batch_file = gr.File(label="Upload Test CSV", file_types=[".csv"])
325
+ batch_button = gr.Button("๐Ÿ“ˆ Predict Batch", variant="primary", size="lg")
326
+
327
+ with gr.Column():
328
+ batch_output = gr.Markdown(label="Batch Results")
329
+ download_file = gr.File(label="Download Results CSV")
330
+
331
+ batch_button.click(
332
+ fn=predict_batch,
333
+ inputs=[batch_file],
334
+ outputs=[download_file, batch_output]
335
+ )
336
+
337
+ with gr.Tab("โ„น๏ธ About"):
338
+ gr.Markdown("""
339
+ ## About This Demo
340
+
341
+ This fraud detection system uses a **Random Forest Classifier** to identify potentially fraudulent credit card transactions.
342
+
343
+ ### Features Used:
344
+ 1. **transaction_amount**: Transaction value in dollars
345
+ 2. **transaction_hour**: Hour of day (0-23)
346
+ 3. **distance_from_home_km**: Distance from cardholder's home
347
+ 4. **distance_from_last_transaction_km**: Distance from previous transaction
348
+ 5. **ratio_to_median_purchase**: Ratio compared to typical spending
349
+ 6. **repeat_retailer**: Whether customer used this merchant before
350
+ 7. **used_chip**: Whether chip card was used
351
+ 8. **used_pin**: Whether PIN was entered
352
+ 9. **online_order**: Whether transaction was online
353
+
354
+ ### Model Performance:
355
+ The model is trained to maximize **recall** (catching frauds) while maintaining reasonable **precision** (avoiding false alarms).
356
+
357
+ ### Important Metrics:
358
+ - **Precision**: Of flagged transactions, how many are actually fraud?
359
+ - **Recall**: Of all frauds, how many do we catch?
360
+ - **F1-Score**: Balance between precision and recall
361
+
362
+ ### Business Impact:
363
+ - **False Negative (missed fraud)**: Very costly - customer loses money
364
+ - **False Positive (false alarm)**: Moderately costly - customer inconvenience
365
+
366
+ ---
367
+
368
+ **Created for:** AI Infinity Programme | TalentSprint
369
+ **Target Audience:** Software engineers transitioning to AI roles
370
+ **Educational Purpose:** Understanding classification, metrics, and business logic
371
+ """)
372
+
373
+ # Launch the app
374
+ if __name__ == "__main__":
375
+ demo.launch()