sourize commited on
Commit
7448648
ยท
1 Parent(s): fb38b84

Initial Commit

Browse files
app.py CHANGED
@@ -1,49 +1,442 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- # Page config
4
  st.set_page_config(
5
- page_title="๐Ÿ” FraudLens: An E-Commerce Fraud Detection",
6
- page_icon="๐Ÿ›ก๏ธ",
7
  layout="wide",
8
  initial_sidebar_state="expanded"
9
  )
10
 
11
- # Custom CSS with enhanced styling
12
  st.markdown("""
13
  <style>
14
  .main-header {
15
  font-size: 2.5rem;
 
16
  color: #1f77b4;
17
  text-align: center;
18
  margin-bottom: 2rem;
19
- text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
21
  </style>
22
  """, unsafe_allow_html=True)
23
 
24
- st.markdown('''
25
- <div class="main-header">
26
- ๐Ÿ›ก๏ธ E-Commerce Fraud Detection System
27
- </div>
28
- ''', unsafe_allow_html=True)
 
 
 
 
 
 
29
 
30
- st.markdown("""
31
- ## ๐ŸŽฏ Welcome to FraudLens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- This is the landing page for the E-Commerce Fraud Detection System. Use the navigation menu on the left to explore:
34
- - ๐Ÿ” Fraud Detection
35
- - ๐Ÿ“Š Model Insights
36
- - ๐Ÿ“ˆ Analytics Dashboard
 
 
 
37
 
38
- Protecting businesses and customers from fraudulent transactions with Explainable AI.
39
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- st.markdown("---")
42
- st.markdown("""
43
- <div style='text-align: center; padding: 30px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
44
- border-radius: 15px; color: white; margin-top: 2rem;'>
45
- <h3>๐Ÿ›ก๏ธ E-Commerce Fraud Detection System</h3>
46
- <p>Powered by <strong>Explainable AI</strong> โ€ข Built by <strong>FraudNinjas</strong> for Security</p>
47
- <p><em>Protecting businesses and customers from fraudulent transactions</em></p>
48
- </div>
49
- """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import shap
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ from datetime import datetime, time
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from plotly.subplots import make_subplots
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
 
15
+ # Configure Streamlit page
16
  st.set_page_config(
17
+ page_title="Fraud Detection System",
18
+ page_icon="๐Ÿ”",
19
  layout="wide",
20
  initial_sidebar_state="expanded"
21
  )
22
 
23
+ # Custom CSS for better styling
24
  st.markdown("""
25
  <style>
26
  .main-header {
27
  font-size: 2.5rem;
28
+ font-weight: bold;
29
  color: #1f77b4;
30
  text-align: center;
31
  margin-bottom: 2rem;
32
+ }
33
+ .prediction-box {
34
+ padding: 1rem;
35
+ border-radius: 10px;
36
+ margin: 1rem 0;
37
+ text-align: center;
38
+ font-size: 1.2rem;
39
+ font-weight: bold;
40
+ }
41
+ .fraud-box {
42
+ background-color: #ffebee;
43
+ border: 2px solid #f44336;
44
+ color: #c62828;
45
+ }
46
+ .legitimate-box {
47
+ background-color: #e8f5e8;
48
+ border: 2px solid #4caf50;
49
+ color: #2e7d32;
50
+ }
51
+ .metric-card {
52
+ background-color: #f8f9fa;
53
+ padding: 1rem;
54
+ border-radius: 8px;
55
+ border-left: 4px solid #1f77b4;
56
  }
57
  </style>
58
  """, unsafe_allow_html=True)
59
 
60
+ @st.cache_resource
61
+ def load_models():
62
+ """Load the trained model and label encoder"""
63
+ try:
64
+ model = joblib.load('lightgbm_model.pkl')
65
+ label_encoder = joblib.load('customer_loc.pkl')
66
+ return model, label_encoder
67
+ except FileNotFoundError as e:
68
+ st.error(f"Model files not found: {e}")
69
+ st.error("Please ensure 'lightgbm_model.pkl' and 'customer_loc.pkl' are in the same directory as this app.")
70
+ st.stop()
71
 
72
+ def preprocess_data(transaction_amount, transaction_date, customer_age,
73
+ customer_location, account_age_days, transaction_time,
74
+ label_encoder):
75
+ """Preprocess input data to match training format"""
76
+
77
+ # Convert transaction date to Excel serial date format
78
+ # (days since 1899-12-30 as used in training)
79
+ reference_date = pd.Timestamp("1899-12-30")
80
+ transaction_date_serial = (pd.Timestamp(transaction_date) - reference_date).days
81
+
82
+ # Convert transaction time to fraction of day
83
+ # Convert time object to seconds and then to fraction of day
84
+ transaction_time_fraction = (transaction_time.hour * 3600 +
85
+ transaction_time.minute * 60 +
86
+ transaction_time.second) / 86400
87
+
88
+ # Encode customer location
89
+ try:
90
+ location_encoded = label_encoder.transform([customer_location])[0]
91
+ except ValueError:
92
+ # If location not seen during training, use most frequent class (mode)
93
+ st.warning(f"Location '{customer_location}' not seen during training. Using fallback encoding.")
94
+ location_encoded = 0 # Default fallback
95
+
96
+ # Create feature vector matching training format
97
+ features = pd.DataFrame({
98
+ 'Transaction Amount': [transaction_amount],
99
+ 'Transaction Date': [transaction_date_serial],
100
+ 'Customer Age': [customer_age],
101
+ 'Account Age Days': [account_age_days],
102
+ 'Transaction Time': [transaction_time_fraction],
103
+ 'Customer Location Encoded': [location_encoded]
104
+ })
105
+
106
+ return features
107
 
108
+ @st.cache_data
109
+ def get_sample_locations(_label_encoder):
110
+ """Get sample locations from the label encoder"""
111
+ try:
112
+ return list(_label_encoder.classes_[:100]) # First 100 locations for dropdown
113
+ except:
114
+ return ["Unknown Location"]
115
 
116
+ def create_shap_plots(model, features, feature_names):
117
+ """Create SHAP explanation plots"""
118
+
119
+ # Initialize SHAP explainer
120
+ explainer = shap.TreeExplainer(model)
121
+ shap_values = explainer.shap_values(features)
122
+
123
+ # For binary classification, use the positive class (fraud)
124
+ if isinstance(shap_values, list):
125
+ shap_values_fraud = shap_values[1] # Class 1 (fraud)
126
+ expected_value = explainer.expected_value[1]
127
+ else:
128
+ shap_values_fraud = shap_values
129
+ expected_value = explainer.expected_value
130
+
131
+ return shap_values_fraud, expected_value, explainer
132
 
133
+ def plot_shap_waterfall(shap_values, expected_value, features, feature_names):
134
+ """Create SHAP waterfall plot"""
135
+ fig, ax = plt.subplots(figsize=(10, 6))
136
+
137
+ # Get feature values and SHAP values for the single prediction
138
+ feature_values = features.iloc[0].values
139
+ shap_vals = shap_values[0]
140
+
141
+ # Create waterfall plot data
142
+ cumulative = expected_value
143
+ positions = []
144
+ values = []
145
+ labels = []
146
+ colors = []
147
+
148
+ # Add base value
149
+ positions.append(0)
150
+ values.append(expected_value)
151
+ labels.append(f"Base Value\n{expected_value:.3f}")
152
+ colors.append('gray')
153
+
154
+ # Add each feature contribution
155
+ for i, (feature, shap_val, feat_val) in enumerate(zip(feature_names, shap_vals, feature_values)):
156
+ positions.append(i + 1)
157
+ values.append(cumulative + shap_val)
158
+ labels.append(f"{feature}\n{feat_val:.3f}\nSHAP: {shap_val:.3f}")
159
+ colors.append('red' if shap_val > 0 else 'blue')
160
+ cumulative += shap_val
161
+
162
+ # Add final prediction
163
+ positions.append(len(feature_names) + 1)
164
+ values.append(cumulative)
165
+ labels.append(f"Final Score\n{cumulative:.3f}")
166
+ colors.append('green' if cumulative > 0 else 'orange')
167
+
168
+ # Create bar plot
169
+ bars = ax.bar(positions, values, color=colors, alpha=0.7)
170
+
171
+ # Add connecting lines
172
+ for i in range(len(positions) - 1):
173
+ ax.plot([positions[i] + 0.4, positions[i + 1] - 0.4],
174
+ [values[i], values[i]], 'k--', alpha=0.5)
175
+
176
+ ax.set_xticks(positions)
177
+ ax.set_xticklabels(labels, rotation=45, ha='right')
178
+ ax.set_ylabel('SHAP Value Contribution')
179
+ ax.set_title('SHAP Waterfall Plot - Feature Contributions to Fraud Prediction')
180
+ ax.grid(True, alpha=0.3)
181
+ ax.axhline(y=0, color='black', linestyle='-', alpha=0.5)
182
+
183
+ plt.tight_layout()
184
+ return fig
185
+
186
+ def main():
187
+ st.markdown('<div class="main-header">๐Ÿ” Fraud Detection System</div>', unsafe_allow_html=True)
188
+
189
+ # Load models
190
+ model, label_encoder = load_models()
191
+
192
+ # Get sample locations for dropdown
193
+ sample_locations = get_sample_locations(label_encoder)
194
+
195
+ # Sidebar for input
196
+ st.sidebar.header("Transaction Details")
197
+
198
+ # Input fields
199
+ transaction_amount = st.sidebar.number_input(
200
+ "Transaction Amount ($)",
201
+ min_value=0.01,
202
+ max_value=10000.0,
203
+ value=100.0,
204
+ step=0.01,
205
+ help="Enter the transaction amount in dollars"
206
+ )
207
+
208
+ transaction_date = st.sidebar.date_input(
209
+ "Transaction Date",
210
+ value=datetime.now().date(),
211
+ help="Select the date of the transaction"
212
+ )
213
+
214
+ transaction_time = st.sidebar.time_input(
215
+ "Transaction Time",
216
+ value=time(12, 0),
217
+ help="Select the time of the transaction"
218
+ )
219
+
220
+ customer_age = st.sidebar.slider(
221
+ "Customer Age",
222
+ min_value=16,
223
+ max_value=100,
224
+ value=35,
225
+ help="Customer's age in years"
226
+ )
227
+
228
+ account_age_days = st.sidebar.number_input(
229
+ "Account Age (Days)",
230
+ min_value=1,
231
+ max_value=3650,
232
+ value=365,
233
+ help="How many days old is the customer's account"
234
+ )
235
+
236
+ customer_location = st.sidebar.selectbox(
237
+ "Customer Location",
238
+ options=sample_locations,
239
+ index=0,
240
+ help="Select customer's location"
241
+ )
242
+
243
+ # Alternative: Allow manual location input
244
+ manual_location = st.sidebar.text_input(
245
+ "Or enter location manually:",
246
+ placeholder="Type location name",
247
+ help="Enter a specific location if not in dropdown"
248
+ )
249
+
250
+ if manual_location:
251
+ customer_location = manual_location
252
+
253
+ # Prediction button
254
+ if st.sidebar.button("๐Ÿ” Analyze Transaction", type="primary"):
255
+
256
+ # Preprocess data
257
+ features = preprocess_data(
258
+ transaction_amount, transaction_date, customer_age,
259
+ customer_location, account_age_days, transaction_time, label_encoder
260
+ )
261
+
262
+ # Make prediction
263
+ prediction_proba = model.predict_proba(features)[0]
264
+ prediction = model.predict(features)[0]
265
+ fraud_probability = prediction_proba[1]
266
+
267
+ # Main content area
268
+ col1, col2 = st.columns([2, 1])
269
+
270
+ with col1:
271
+ # Display prediction
272
+ if prediction == 1:
273
+ st.markdown(
274
+ f'<div class="prediction-box fraud-box">โš ๏ธ FRAUD DETECTED<br>'
275
+ f'Fraud Probability: {fraud_probability:.2%}</div>',
276
+ unsafe_allow_html=True
277
+ )
278
+ else:
279
+ st.markdown(
280
+ f'<div class="prediction-box legitimate-box">โœ… LEGITIMATE TRANSACTION<br>'
281
+ f'Fraud Probability: {fraud_probability:.2%}</div>',
282
+ unsafe_allow_html=True
283
+ )
284
+
285
+ # Feature importance
286
+ st.subheader("๐Ÿ“Š Feature Analysis")
287
+
288
+ # Display input features
289
+ st.write("**Input Features:**")
290
+ feature_df = pd.DataFrame({
291
+ 'Feature': ['Transaction Amount', 'Transaction Date', 'Customer Age',
292
+ 'Account Age Days', 'Transaction Time', 'Customer Location'],
293
+ 'Value': [f"${transaction_amount:.2f}", str(transaction_date), f"{customer_age} years",
294
+ f"{account_age_days} days", str(transaction_time), customer_location]
295
+ })
296
+ st.dataframe(feature_df, use_container_width=True)
297
+
298
+ with col2:
299
+ # Risk metrics
300
+ st.subheader("๐ŸŽฏ Risk Metrics")
301
+
302
+ # Risk level
303
+ if fraud_probability >= 0.8:
304
+ risk_level = "๐Ÿ”ด Very High"
305
+ risk_color = "#f44336"
306
+ elif fraud_probability >= 0.6:
307
+ risk_level = "๐ŸŸ  High"
308
+ risk_color = "#ff9800"
309
+ elif fraud_probability >= 0.4:
310
+ risk_level = "๐ŸŸก Medium"
311
+ risk_color = "#ffc107"
312
+ else:
313
+ risk_level = "๐ŸŸข Low"
314
+ risk_color = "#4caf50"
315
+
316
+ st.markdown(f"**Risk Level:** {risk_level}")
317
+ st.markdown(f"**Confidence:** {max(fraud_probability, 1-fraud_probability):.2%}")
318
+
319
+ # Probability gauge
320
+ fig_gauge = go.Figure(go.Indicator(
321
+ mode = "gauge+number+delta",
322
+ value = fraud_probability * 100,
323
+ domain = {'x': [0, 1], 'y': [0, 1]},
324
+ title = {'text': "Fraud Probability (%)"},
325
+ delta = {'reference': 50},
326
+ gauge = {
327
+ 'axis': {'range': [None, 100]},
328
+ 'bar': {'color': risk_color},
329
+ 'steps': [
330
+ {'range': [0, 25], 'color': "lightgray"},
331
+ {'range': [25, 50], 'color': "gray"},
332
+ {'range': [50, 75], 'color': "orange"},
333
+ {'range': [75, 100], 'color': "red"}
334
+ ],
335
+ 'threshold': {
336
+ 'line': {'color': "red", 'width': 4},
337
+ 'thickness': 0.75,
338
+ 'value': 90
339
+ }
340
+ }
341
+ ))
342
+ fig_gauge.update_layout(height=300)
343
+ st.plotly_chart(fig_gauge, use_container_width=True)
344
+
345
+ # SHAP Explanations
346
+ st.subheader("๐ŸŽฏ AI Explanation (SHAP)")
347
+
348
+ try:
349
+ # Create SHAP plots
350
+ shap_values, expected_value, explainer = create_shap_plots(
351
+ model, features, features.columns.tolist()
352
+ )
353
+
354
+ # Feature importance plot
355
+ col1, col2 = st.columns(2)
356
+
357
+ with col1:
358
+ st.write("**Feature Contributions:**")
359
+
360
+ # Create a simple bar plot of SHAP values
361
+ shap_df = pd.DataFrame({
362
+ 'Feature': features.columns,
363
+ 'SHAP Value': shap_values[0],
364
+ 'Feature Value': features.iloc[0].values
365
+ })
366
+ shap_df = shap_df.reindex(shap_df['SHAP Value'].abs().sort_values(ascending=False).index)
367
+
368
+ fig_bar = px.bar(
369
+ shap_df,
370
+ x='SHAP Value',
371
+ y='Feature',
372
+ orientation='h',
373
+ color='SHAP Value',
374
+ color_continuous_scale=['blue', 'white', 'red'],
375
+ title="SHAP Feature Importance"
376
+ )
377
+ fig_bar.update_layout(height=400)
378
+ st.plotly_chart(fig_bar, use_container_width=True)
379
+
380
+ with col2:
381
+ st.write("**Waterfall Explanation:**")
382
+
383
+ # Create waterfall plot
384
+ fig_waterfall = plot_shap_waterfall(
385
+ shap_values, expected_value, features, features.columns.tolist()
386
+ )
387
+ st.pyplot(fig_waterfall)
388
+
389
+ # Explanation text
390
+ st.write("**How to interpret SHAP values:**")
391
+ st.write("- ๐Ÿ”ด **Positive values (red)**: Push prediction towards FRAUD")
392
+ st.write("- ๐Ÿ”ต **Negative values (blue)**: Push prediction towards LEGITIMATE")
393
+ st.write("- **Magnitude**: Larger absolute values have stronger influence")
394
+
395
+ # Top contributing features
396
+ top_features = shap_df.head(3)
397
+ st.write("**Top 3 Contributing Features:**")
398
+ for _, row in top_features.iterrows():
399
+ direction = "towards FRAUD" if row['SHAP Value'] > 0 else "towards LEGITIMATE"
400
+ st.write(f"โ€ข **{row['Feature']}** (value: {row['Feature Value']:.3f}): "
401
+ f"Contributes {abs(row['SHAP Value']):.3f} {direction}")
402
+
403
+ except Exception as e:
404
+ st.error(f"Error generating SHAP explanations: {str(e)}")
405
+ st.write("SHAP explanations are not available, but the prediction is still valid.")
406
+
407
+ else:
408
+ # Default view when no prediction is made
409
+ st.info("๐Ÿ‘ˆ Enter transaction details in the sidebar and click 'Analyze Transaction' to get started!")
410
+
411
+ # Show some information about the model
412
+ st.subheader("โ„น๏ธ About This System")
413
+
414
+ col1, col2, col3 = st.columns(3)
415
+
416
+ with col1:
417
+ st.markdown("""
418
+ **๐Ÿค– Model Information**
419
+ - Algorithm: LightGBM
420
+ - Training: SMOTE-balanced data
421
+ - Features: 6 key transaction attributes
422
+ """)
423
+
424
+ with col2:
425
+ st.markdown("""
426
+ **๐ŸŽฏ Key Features**
427
+ - Transaction amount & timing
428
+ - Customer demographics
429
+ - Account age
430
+ - Geographic location
431
+ """)
432
+
433
+ with col3:
434
+ st.markdown("""
435
+ **๐Ÿ” AI Explainability**
436
+ - SHAP values for interpretability
437
+ - Feature contribution analysis
438
+ - Waterfall explanations
439
+ """)
440
+
441
+ if __name__ == "__main__":
442
+ main()
pages/๐Ÿ  Home.py DELETED
@@ -1,58 +0,0 @@
1
- import streamlit as st
2
- st.set_page_config(page_title="๐Ÿ  Home")
3
-
4
- def home_page():
5
- col1, col2 = st.columns([2, 1])
6
- with col1:
7
- st.markdown("""
8
- ## ๐ŸŽฏ Welcome to Our AI-Powered Fraud Detection System
9
- Our cutting-edge system combines **Machine Learning** and **Explainable AI** to protect
10
- e-commerce platforms from fraudulent transactions.
11
- ### โœจ Key Features
12
- ๐Ÿค– **Advanced ML Model**: LightGBM classifier with 75.2% ROC AUC
13
- ๐Ÿ” **Real-time Detection**: Instant fraud risk assessment
14
- ๐Ÿ“Š **Explainable AI**: SHAP-based feature impact analysis
15
- ๐Ÿ“ˆ **Interactive Dashboard**: Comprehensive analytics and insights
16
- ๐Ÿ›ก๏ธ **Robust Security**: Production-ready fraud prevention
17
- ### ๐Ÿš€ How It Works
18
- 1. **Input Transaction Data**: Enter transaction details
19
- 2. **AI Analysis**: Our model processes 6 key features
20
- 3. **Risk Assessment**: Get instant fraud probability
21
- 4. **Explanation**: Understand why decisions are made
22
- """)
23
- with col2:
24
- st.markdown("### ๐Ÿ“Š Model Performance")
25
- metrics = [
26
- ("๐ŸŽฏ ROC AUC Score", "75.2%", "#1f77b4"),
27
- ("๐ŸŽฒ Precision", "19.0%", "#ff7f0e"),
28
- ("๐Ÿ” Recall", "58.0%", "#2ca02c"),
29
- ("โš–๏ธ F1-Score", "29.0%", "#d62728")
30
- ]
31
- for metric, value, color in metrics:
32
- st.markdown(f"""
33
- <div style=\"background: linear-gradient(135deg, {color}20, {color}10);
34
- padding: 1rem; border-radius: 10px; margin: 0.5rem 0;
35
- border-left: 4px solid {color};\">
36
- <h4 style=\"margin: 0; color: {color};\">{metric}</h4>
37
- <h2 style=\"margin: 0; color: {color};\">{value}</h2>
38
- </div>
39
- """, unsafe_allow_html=True)
40
- st.markdown("---")
41
- st.markdown("### ๐Ÿ”ง Technology Stack")
42
- tech_cols = st.columns(4)
43
- technologies = [
44
- ("๐Ÿค– Machine Learning", "LightGBM\nScikit-learn\nIMBLEARN"),
45
- ("๐Ÿง  Explainable AI", "SHAP\nDiCE-ML\nSurrogate Models"),
46
- ("๐Ÿ“Š Visualization", "Plotly\nMatplotlib\nSeaborn"),
47
- ("๐Ÿš€ Deployment", "Streamlit\nPandas\nNumPy")
48
- ]
49
- for i, (title, tech) in enumerate(technologies):
50
- with tech_cols[i]:
51
- st.markdown(f"""
52
- <div style=\"text-align: center; padding: 1rem; background: #f0f4ff; border-radius: 10px; height: 120px; color: #222;\">
53
- <h4>{title}</h4>
54
- <p style=\"font-size: 0.9em; color: #333;\">{tech}</p>
55
- </div>
56
- """, unsafe_allow_html=True)
57
-
58
- home_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/๐Ÿ“ˆ Analytics Dashboard.py DELETED
@@ -1,111 +0,0 @@
1
- import streamlit as st
2
- st.set_page_config(page_title="๐Ÿ“ˆ Analytics Dashboard")
3
- import numpy as np
4
- import pandas as pd
5
- import plotly.express as px
6
-
7
- def analytics_dashboard_page():
8
- st.markdown("## ๐Ÿ“ˆ Fraud Analytics Dashboard")
9
- st.markdown("*Simulated data for demonstration purposes*")
10
- np.random.seed(42)
11
- n_transactions = 5000
12
- dates = pd.date_range('2024-01-01', periods=n_transactions, freq='15min')
13
- hours = dates.hour
14
- fraud_prob_base = 0.02
15
- fraud_prob_night = np.where((hours < 6) | (hours > 22), 0.08, fraud_prob_base)
16
- transactions = pd.DataFrame({
17
- 'Date': dates,
18
- 'Hour': hours,
19
- 'Amount': np.random.lognormal(4, 1.2, n_transactions),
20
- 'Customer_Age': np.random.normal(40, 15, n_transactions).clip(18, 80),
21
- 'Account_Age': np.random.exponential(200, n_transactions).clip(1, 2000),
22
- 'Is_Fraud': np.random.binomial(1, fraud_prob_night)
23
- })
24
- high_amount_mask = transactions['Amount'] > transactions['Amount'].quantile(0.9)
25
- transactions.loc[high_amount_mask, 'Is_Fraud'] = np.random.binomial(
26
- 1, 0.15, high_amount_mask.sum()
27
- )
28
- total_transactions = len(transactions)
29
- fraud_count = transactions['Is_Fraud'].sum()
30
- fraud_rate = fraud_count / total_transactions
31
- total_amount = transactions['Amount'].sum()
32
- fraud_amount = transactions[transactions['Is_Fraud'] == 1]['Amount'].sum()
33
- kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4)
34
- with kpi_col1:
35
- st.metric("๐Ÿ“Š Total Transactions", f"{total_transactions:,}")
36
- with kpi_col2:
37
- st.metric("๐Ÿšจ Fraud Cases", f"{fraud_count:,}", delta=f"{fraud_rate:.2%}")
38
- with kpi_col3:
39
- st.metric("๐Ÿ’ฐ Total Volume", f"โ‚น{total_amount:,.0f}")
40
- with kpi_col4:
41
- st.metric("โš ๏ธ Fraud Loss", f"โ‚น{fraud_amount:,.0f}")
42
- st.markdown("---")
43
- st.markdown("### โฐ Time-Based Fraud Patterns")
44
- col1, col2 = st.columns(2)
45
- with col1:
46
- hourly_stats = transactions.groupby('Hour').agg({
47
- 'Is_Fraud': ['count', 'sum', 'mean']
48
- }).round(3)
49
- hourly_stats.columns = ['Total_Transactions', 'Fraud_Count', 'Fraud_Rate']
50
- hourly_stats = hourly_stats.reset_index()
51
- st.write("hourly_stats", hourly_stats) # Debug output
52
- fig = px.line(
53
- hourly_stats,
54
- x='Hour',
55
- y='Fraud_Rate',
56
- title="Fraud Rate by Hour of Day",
57
- markers=True
58
- )
59
- fig.update_layout(height=400)
60
- st.plotly_chart(fig, use_container_width=True)
61
- with col2:
62
- fig = px.bar(
63
- hourly_stats,
64
- x='Hour',
65
- y='Total_Transactions',
66
- title="Transaction Volume by Hour",
67
- color='Fraud_Rate',
68
- color_continuous_scale='reds'
69
- )
70
- fig.update_layout(height=400)
71
- st.plotly_chart(fig, use_container_width=True)
72
- st.markdown("### ๐Ÿ’ต Transaction Amount Analysis")
73
- col1, col2 = st.columns(2)
74
- with col1:
75
- st.write("transactions", transactions) # Debug output
76
- fig = px.histogram(
77
- transactions,
78
- x='Amount',
79
- color='Is_Fraud',
80
- nbins=50,
81
- title="Transaction Amount Distribution",
82
- labels={'Is_Fraud': 'Fraud Status'},
83
- marginal="box"
84
- )
85
- st.plotly_chart(fig, use_container_width=True)
86
- with col2:
87
- fig = px.box(
88
- transactions,
89
- x='Is_Fraud',
90
- y='Amount',
91
- title="Amount Distribution: Normal vs Fraud",
92
- labels={'Is_Fraud': 'Fraud Status', 'Amount': 'Transaction Amount (โ‚น)'}
93
- )
94
- st.plotly_chart(fig, use_container_width=True)
95
- st.markdown("### ๐Ÿ‘ฅ Customer Demographics & Fraud Risk")
96
- age_bins = pd.cut(transactions['Customer_Age'], bins=6, precision=0)
97
- age_stats = transactions.groupby(age_bins)['Is_Fraud'].agg(['count', 'sum', 'mean']).reset_index()
98
- age_stats.columns = ['Age_Group', 'Total', 'Fraud_Count', 'Fraud_Rate']
99
- age_stats['Age_Group'] = age_stats['Age_Group'].astype(str) # Fix Interval serialization
100
- st.write("age_stats", age_stats) # Debug output
101
- fig = px.bar(
102
- age_stats,
103
- x='Age_Group',
104
- y='Fraud_Rate',
105
- title="Fraud Rate by Customer Age Group",
106
- color='Fraud_Rate',
107
- color_continuous_scale='reds'
108
- )
109
- st.plotly_chart(fig, use_container_width=True)
110
-
111
- analytics_dashboard_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/๐Ÿ“Š Model Insights.py DELETED
@@ -1,78 +0,0 @@
1
- import streamlit as st
2
- st.set_page_config(page_title="๐Ÿ“Š Model Insights")
3
- import numpy as np
4
- import pandas as pd
5
- import plotly.express as px
6
- import plotly.graph_objects as go
7
- from utils.model_utils import create_demo_model
8
-
9
- def model_insights_page():
10
- # Use a demo model for feature importance display
11
- model, _ = create_demo_model()
12
- st.markdown("## ๐Ÿ“Š Model Performance & Insights")
13
- feature_names = ['Transaction Amount', 'Transaction Date', 'Customer Age',
14
- 'Account Age Days', 'Transaction Time', 'Customer Location Encoded']
15
- try:
16
- if hasattr(model, 'feature_importances_'):
17
- importance = model.feature_importances_
18
- else:
19
- importance = np.random.rand(len(feature_names))
20
- importance = importance / importance.sum()
21
- importance_df = pd.DataFrame({
22
- 'Feature': feature_names,
23
- 'Importance': importance
24
- }).sort_values('Importance', ascending=True)
25
- col1, col2 = st.columns(2)
26
- with col1:
27
- st.markdown("### ๐ŸŽฏ Feature Importance Ranking")
28
- fig = px.bar(
29
- importance_df,
30
- x='Importance',
31
- y='Feature',
32
- orientation='h',
33
- color='Importance',
34
- color_continuous_scale='blues',
35
- title="How Much Each Feature Influences Predictions"
36
- )
37
- fig.update_layout(height=400)
38
- st.plotly_chart(fig, use_container_width=True)
39
- with col2:
40
- st.markdown("### ๐Ÿฅง Feature Distribution")
41
- fig = px.pie(
42
- importance_df,
43
- values='Importance',
44
- names='Feature',
45
- title="Relative Feature Importance",
46
- color_discrete_sequence=px.colors.qualitative.Set3
47
- )
48
- st.plotly_chart(fig, use_container_width=True)
49
- except Exception as e:
50
- st.error(f"Error displaying feature importance: {e}")
51
- st.markdown("---")
52
- st.markdown("### ๐Ÿ† Model Performance Dashboard")
53
- metrics_data = {
54
- 'Metric': ['ROC AUC', 'Precision (Fraud)', 'Recall (Fraud)', 'F1-Score (Fraud)', 'Accuracy'],
55
- 'Score': [0.752, 0.19, 0.58, 0.29, 0.86],
56
- 'Benchmark': [0.7, 0.2, 0.5, 0.3, 0.85]
57
- }
58
- col1, col2 = st.columns(2)
59
- with col1:
60
- fig = go.Figure()
61
- fig.add_trace(go.Bar(name='Our Model', x=metrics_data['Metric'], y=metrics_data['Score']))
62
- fig.add_trace(go.Bar(name='Industry Benchmark', x=metrics_data['Metric'], y=metrics_data['Benchmark']))
63
- fig.update_layout(
64
- title="Model vs Industry Benchmark",
65
- barmode='group',
66
- height=400
67
- )
68
- st.plotly_chart(fig, use_container_width=True)
69
- with col2:
70
- for metric, score, benchmark in zip(metrics_data['Metric'], metrics_data['Score'], metrics_data['Benchmark']):
71
- delta = score - benchmark
72
- st.metric(
73
- metric,
74
- f"{score:.3f}",
75
- delta=f"{delta:+.3f}" if delta != 0 else None
76
- )
77
-
78
- model_insights_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/๐Ÿ” Fraud Detection.py DELETED
@@ -1,130 +0,0 @@
1
- import streamlit as st
2
- st.set_page_config(page_title="๐Ÿ” Fraud Detection")
3
- import pandas as pd
4
- from utils.preprocessing import get_location_options, preprocess_inputs
5
- from utils.visualization import create_risk_gauge, explain_prediction_simple
6
- from utils.model_utils import load_models
7
-
8
- def fraud_detection_page():
9
- model, label_encoder, models_loaded = load_models()
10
- st.markdown("## ๐Ÿ” Real-Time Fraud Detection")
11
- st.markdown("Enter transaction details below to get instant fraud risk assessment:")
12
- location_options = get_location_options(label_encoder)
13
- with st.form("fraud_detection_form", clear_on_submit=False):
14
- col1, col2, col3 = st.columns(3)
15
- with col1:
16
- st.markdown("### ๐Ÿ’ฐ Transaction Info")
17
- amount = st.number_input(
18
- "Transaction Amount (โ‚น)",
19
- min_value=0.01, max_value=50000.0, value=150.0, step=0.01,
20
- help="Enter the transaction amount in INR"
21
- )
22
- date = st.date_input(
23
- "Transaction Date",
24
- value=pd.Timestamp.now().date(),
25
- help="Select the date of transaction"
26
- )
27
- with col2:
28
- st.markdown("### ๐Ÿ‘ค Customer Info")
29
- age = st.number_input(
30
- "Customer Age",
31
- min_value=16, max_value=100, value=35, step=1,
32
- help="Age of the customer making the transaction"
33
- )
34
- account_age = st.number_input(
35
- "Account Age (Days)",
36
- min_value=1, max_value=3650, value=180, step=1,
37
- help="How many days since account was created"
38
- )
39
- with col3:
40
- st.markdown("### ๐Ÿ“ Additional Details")
41
- trans_time = st.time_input(
42
- "Transaction Time",
43
- value=pd.Timestamp.now().time().replace(hour=14, minute=30, second=0, microsecond=0),
44
- help="Time when transaction occurred"
45
- )
46
- location = st.selectbox(
47
- "Customer Location",
48
- options=location_options,
49
- index=0,
50
- help="Select customer's location"
51
- )
52
- st.markdown("---")
53
- col1, col2, col3 = st.columns([1, 2, 1])
54
- with col2:
55
- submitted = st.form_submit_button("๐Ÿš€ Analyze Transaction", use_container_width=True)
56
- if submitted:
57
- processed_data = preprocess_inputs(amount, date, age, account_age, trans_time, location, label_encoder)
58
- if processed_data is not None:
59
- input_df = pd.DataFrame([processed_data])
60
- prediction_proba = model.predict_proba(input_df)[0]
61
- prediction = model.predict(input_df)[0]
62
- fraud_probability = prediction_proba[1] if len(prediction_proba) > 1 else prediction_proba[0]
63
- st.markdown("---")
64
- st.markdown("## ๐ŸŽฏ Analysis Results")
65
- col1, col2 = st.columns([1, 2])
66
- with col1:
67
- fig_gauge = create_risk_gauge(fraud_probability)
68
- st.plotly_chart(fig_gauge, use_container_width=True)
69
- with col2:
70
- if prediction == 1 or fraud_probability > 0.5:
71
- st.markdown(f'''
72
- <div class="fraud-alert">
73
- <h2>โš ๏ธ HIGH FRAUD RISK</h2>
74
- <h3>Risk Score: {fraud_probability:.1%}</h3>
75
- <p><strong>Recommendation:</strong> Review this transaction carefully</p>
76
- <p>Multiple fraud indicators detected</p>
77
- </div>
78
- ''', unsafe_allow_html=True)
79
- else:
80
- st.markdown(f'''
81
- <div class="safe-alert">
82
- <h2>โœ… LOW FRAUD RISK</h2>
83
- <h3>Risk Score: {fraud_probability:.1%}</h3>
84
- <p><strong>Recommendation:</strong> Transaction appears legitimate</p>
85
- <p>Normal transaction pattern detected</p>
86
- </div>
87
- ''', unsafe_allow_html=True)
88
- st.markdown("---")
89
- st.markdown("### ๐Ÿ”ฌ AI Explanation - Why This Decision?")
90
- explanation_df = explain_prediction_simple(model, processed_data)
91
- if explanation_df is not None:
92
- col1, col2 = st.columns(2)
93
- with col1:
94
- st.markdown("#### ๐Ÿ“Š Feature Impact Analysis")
95
- for _, row in explanation_df.head(4).iterrows():
96
- importance_pct = row['Importance'] * 100
97
- st.markdown(f"""
98
- <div class=\"feature-impact\">
99
- <strong>{row['Feature']}</strong><br>
100
- Value: {row['Value']:.3f} | Impact: {importance_pct:.1f}%
101
- </div>
102
- """, unsafe_allow_html=True)
103
- with col2:
104
- st.markdown("#### ๐Ÿ“ˆ Feature Importance Chart")
105
- import plotly.express as px
106
- fig = px.bar(
107
- explanation_df.head(6),
108
- x='Importance',
109
- y='Feature',
110
- orientation='h',
111
- color='Importance',
112
- color_continuous_scale='viridis',
113
- title="Feature Contribution to Decision"
114
- )
115
- fig.update_layout(height=400, showlegend=False)
116
- st.plotly_chart(fig, use_container_width=True)
117
- st.markdown("---")
118
- st.markdown("### ๐Ÿ“‹ Transaction Summary")
119
- summary_data = {
120
- "Field": ["Amount", "Date", "Customer Age", "Account Age", "Time", "Location"],
121
- "Value": [f"โ‚น{amount:.2f}", str(date), f"{age} years", f"{account_age} days",
122
- str(trans_time), location]
123
- }
124
- summary_df = pd.DataFrame(summary_data)
125
- # Indent the table by placing it in the center column of a 3-column layout
126
- col1, col2, col3 = st.columns([1,2,1])
127
- with col2:
128
- st.table(summary_df)
129
-
130
- fraud_detection_page()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- streamlit
2
- pandas
3
- numpy
4
- scikit-learn
5
- lightgbm
6
- matplotlib
7
- seaborn
8
- plotly
9
- joblib
10
- shap
11
- dice-ml
 
1
+ streamlit==1.28.1
2
+ pandas==2.0.3
3
+ numpy==1.24.3
4
+ joblib==1.3.2
5
+ shap==0.42.1
6
+ matplotlib==3.7.2
7
+ seaborn==0.12.2
8
+ plotly==5.17.0
9
+ lightgbm==4.1.0
10
+ scikit-learn==1.3.0
11
+ imbalanced-learn==0.11.0
utils/model_utils.py DELETED
@@ -1,33 +0,0 @@
1
- import joblib
2
- import streamlit as st
3
- import numpy as np
4
-
5
- def load_models():
6
- """Load the trained models and encoders with error handling"""
7
- try:
8
- model = joblib.load('lightgbm_model.pkl')
9
- label_encoder = joblib.load('customer_loc.pkl')
10
- return model, label_encoder, True
11
- except FileNotFoundError as e:
12
- st.error(f"โš ๏ธ Model files not found: {e}")
13
- st.info("Please ensure 'lightgbm_model.pkl' and 'customer_loc.pkl' are in the app directory.")
14
- return None, None, False
15
-
16
- def create_demo_model():
17
- """Create a demo model when real model is not available"""
18
- from sklearn.ensemble import RandomForestClassifier
19
- from sklearn.preprocessing import LabelEncoder
20
- # Create dummy data
21
- np.random.seed(42)
22
- n_samples = 1000
23
- X_demo = np.random.randn(n_samples, 6)
24
- y_demo = np.random.choice([0, 1], n_samples, p=[0.95, 0.05])
25
- # Train demo model
26
- demo_model = RandomForestClassifier(n_estimators=10, random_state=42)
27
- demo_model.fit(X_demo, y_demo)
28
- # Create demo encoder
29
- demo_encoder = LabelEncoder()
30
- demo_locations = ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix",
31
- "Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose"]
32
- demo_encoder.fit(demo_locations)
33
- return demo_model, demo_encoder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/preprocessing.py DELETED
@@ -1,34 +0,0 @@
1
- import pandas as pd
2
- import streamlit as st
3
-
4
- def get_location_options(label_encoder):
5
- try:
6
- location_classes = label_encoder.classes_
7
- return location_classes.tolist()
8
- except AttributeError:
9
- return ["Unknown"]
10
-
11
- def preprocess_inputs(amount, date, age, account_age, trans_time, location, label_encoder):
12
- """Enhanced preprocessing with better error handling"""
13
- try:
14
- excel_epoch = pd.Timestamp("1899-12-30")
15
- date_days = (pd.to_datetime(date) - excel_epoch).days
16
- time_fraction = (trans_time.hour * 3600 + trans_time.minute * 60 + trans_time.second) / 86400
17
- location_encoded = 0
18
- if label_encoder is not None:
19
- try:
20
- location_encoded = label_encoder.transform([location])[0]
21
- except ValueError:
22
- location_encoded = len(label_encoder.classes_) // 2
23
- st.warning(f"โš ๏ธ Location '{location}' not in training data. Using fallback encoding.")
24
- return {
25
- 'Transaction Amount': float(amount),
26
- 'Transaction Date': int(date_days),
27
- 'Customer Age': int(age),
28
- 'Account Age Days': int(account_age),
29
- 'Transaction Time': float(time_fraction),
30
- 'Customer Location Encoded': int(location_encoded)
31
- }
32
- except Exception as e:
33
- st.error(f"Error in preprocessing: {e}")
34
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/visualization.py DELETED
@@ -1,47 +0,0 @@
1
- import plotly.graph_objects as go
2
- import plotly.express as px
3
- import pandas as pd
4
- import numpy as np
5
- import streamlit as st
6
-
7
- def create_risk_gauge(fraud_probability):
8
- """Create a risk gauge visualization"""
9
- fig = go.Figure(go.Indicator(
10
- mode = "gauge+number+delta",
11
- value = fraud_probability * 100,
12
- domain = {'x': [0, 1], 'y': [0, 1]},
13
- title = {'text': "Fraud Risk Score (%)"},
14
- delta = {'reference': 50},
15
- gauge = {
16
- 'axis': {'range': [None, 100]},
17
- 'bar': {'color': "darkblue"},
18
- 'steps': [
19
- {'range': [0, 25], 'color': "lightgreen"},
20
- {'range': [25, 50], 'color': "yellow"},
21
- {'range': [50, 75], 'color': "orange"},
22
- {'range': [75, 100], 'color': "red"}],
23
- 'threshold': {
24
- 'line': {'color': "red", 'width': 4},
25
- 'thickness': 0.75,
26
- 'value': 70}}))
27
- fig.update_layout(height=300)
28
- return fig
29
-
30
- def explain_prediction_simple(model, input_data):
31
- """Simple feature importance explanation"""
32
- try:
33
- feature_names = list(input_data.keys())
34
- if hasattr(model, 'feature_importances_'):
35
- importances = model.feature_importances_
36
- else:
37
- importances = np.random.rand(len(feature_names))
38
- importances = importances / importances.sum()
39
- explanation_df = pd.DataFrame({
40
- 'Feature': feature_names,
41
- 'Importance': importances,
42
- 'Value': [input_data[feat] for feat in feature_names]
43
- }).sort_values('Importance', ascending=False)
44
- return explanation_df
45
- except Exception as e:
46
- st.error(f"Error generating explanation: {e}")
47
- return None