LovnishVerma commited on
Commit
86265dd
·
verified ·
1 Parent(s): 92b8aef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -670
app.py CHANGED
@@ -6,258 +6,178 @@ import numpy as np
6
  from datetime import datetime, timedelta
7
 
8
  # ==========================================
9
- # PAGE CONFIGURATION
10
  # ==========================================
11
  st.set_page_config(
12
  page_title="Sentinel | UIDAI Fraud Detection",
13
- page_icon="🛡",
14
  layout="wide",
15
  initial_sidebar_state="expanded"
16
  )
17
 
18
  # ==========================================
19
- # PROFESSIONAL STYLING
20
  # ==========================================
21
  st.markdown("""
22
  <style>
 
23
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
24
 
25
- .main {
26
- background-color: #f5f7fa;
 
 
27
  font-family: 'Inter', sans-serif;
28
  }
29
-
30
- .stMetric {
31
- background-color: white;
32
- padding: 18px;
33
- border-radius: 6px;
34
- border-left: 4px solid #3b82f6;
35
- box-shadow: 0 1px 3px rgba(0,0,0,0.08);
 
 
36
  }
37
-
38
- .stMetric label {
39
- font-weight: 500 !important;
40
- color: #64748b !important;
41
- font-size: 14px !important;
42
  }
43
 
44
- .stMetric [data-testid="stMetricValue"] {
45
- font-size: 28px !important;
46
- font-weight: 600 !important;
47
- color: #1e293b !important;
48
  }
49
-
50
- h1 {
51
- color: #1e293b;
52
- font-weight: 700;
53
- font-size: 32px;
54
  }
55
-
56
- h2 {
57
- color: #334155;
58
- font-weight: 600;
59
- font-size: 24px;
60
  }
61
-
62
- h3 {
63
- color: #475569;
64
- font-weight: 600;
65
- font-size: 18px;
66
  }
67
-
 
68
  [data-testid="stSidebar"] {
69
- background-color: #1e3a5f;
70
  }
71
-
72
  [data-testid="stSidebar"] * {
73
- color: #e2e8f0 !important;
74
  }
75
-
76
- [data-testid="stSidebar"] .stSelectbox label,
77
- [data-testid="stSidebar"] .stMultiSelect label,
78
- [data-testid="stSidebar"] .stCheckbox label {
79
- font-weight: 500 !important;
80
- font-size: 14px !important;
 
 
 
81
  }
82
 
 
83
  .status-badge {
84
- display: inline-block;
 
85
  padding: 4px 12px;
86
- border-radius: 12px;
87
  font-size: 12px;
88
  font-weight: 600;
89
- letter-spacing: 0.3px;
90
- }
91
-
92
- .status-critical {
93
- background-color: #fee2e2;
94
- color: #991b1b;
95
- }
96
-
97
- .status-high {
98
- background-color: #fed7aa;
99
- color: #9a3412;
100
- }
101
-
102
- .status-normal {
103
- background-color: #d1fae5;
104
- color: #065f46;
105
- }
106
-
107
- .info-card {
108
- background-color: white;
109
- padding: 16px;
110
- border-radius: 6px;
111
- border-left: 3px solid #3b82f6;
112
- margin: 12px 0;
113
- box-shadow: 0 1px 3px rgba(0,0,0,0.08);
114
  }
 
 
 
115
 
116
- .info-card-warning {
117
- border-left-color: #f59e0b;
118
- }
119
-
120
- .info-card-danger {
121
- border-left-color: #ef4444;
122
- }
123
-
124
- .info-card-success {
125
- border-left-color: #10b981;
126
- }
127
-
128
- [data-testid="stDataFrame"] {
129
- border: 1px solid #e2e8f0;
130
- border-radius: 6px;
131
- }
132
-
133
- .stDownloadButton button {
134
- background-color: #3b82f6;
135
- color: white;
136
- border: none;
137
- padding: 8px 20px;
138
- border-radius: 6px;
139
- font-weight: 500;
140
- font-size: 14px;
141
- transition: background-color 0.2s;
142
- }
143
-
144
- .stDownloadButton button:hover {
145
- background-color: #2563eb;
146
- }
147
-
148
- .stTabs [data-baseweb="tab-list"] {
149
- gap: 4px;
150
- }
151
-
152
- .stTabs [data-baseweb="tab"] {
153
- background-color: white;
154
- border-radius: 6px 6px 0 0;
155
- padding: 10px 20px;
156
- font-weight: 500;
157
- color: #64748b;
158
- }
159
-
160
- .stTabs [aria-selected="true"] {
161
- background-color: #3b82f6;
162
- color: white;
163
- }
164
-
165
- .metric-delta-positive {
166
- color: #10b981 !important;
167
- }
168
-
169
- .metric-delta-negative {
170
- color: #ef4444 !important;
171
  }
172
  </style>
173
  """, unsafe_allow_html=True)
174
 
175
  # ==========================================
176
- # DATA LOADING
177
  # ==========================================
178
  @st.cache_data
179
  def load_data():
180
  try:
 
181
  df = pd.read_csv('analyzed_aadhaar_data.csv')
182
-
183
- if 'date' in df.columns:
184
- df['date'] = pd.to_datetime(df['date'])
185
- df['month'] = df['date'].dt.month
186
- df['year'] = df['date'].dt.year
187
- df['day_name'] = df['date'].dt.day_name()
188
-
189
- # Geographic coordinates (production: integrate with pincode database)
190
- np.random.seed(42)
191
- df['lat'] = np.random.uniform(20.0, 28.0, size=len(df))
192
- df['lon'] = np.random.uniform(77.0, 85.0, size=len(df))
193
-
194
- # Risk categorization
195
- df['risk_category'] = pd.cut(
196
- df['RISK_SCORE'],
197
- bins=[0, 50, 70, 85, 100],
198
- labels=['Low', 'Medium', 'High', 'Critical']
199
- )
200
-
201
- return df
202
  except FileNotFoundError:
203
- st.error("Data file not found. Please ensure 'analyzed_aadhaar_data.csv' exists.")
204
- return pd.DataFrame()
205
-
206
- @st.cache_data
207
- def calculate_insights(df):
208
- insights = {
209
- 'total_cases': len(df),
210
- 'critical_cases': len(df[df['RISK_SCORE'] > 85]),
211
- 'high_risk_cases': len(df[df['RISK_SCORE'] > 70]),
212
- 'avg_risk': df['RISK_SCORE'].mean(),
213
- 'max_risk': df['RISK_SCORE'].max(),
214
- 'weekend_fraud_rate': len(df[(df['is_weekend'] == 1) & (df['RISK_SCORE'] > 70)]) / len(df) * 100 if len(df) > 0 else 0,
215
- 'top_state': df.groupby('state')['RISK_SCORE'].mean().idxmax() if len(df) > 0 else 'N/A'
216
- }
217
- return insights
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
- # ==========================================
220
- # LOAD DATA
221
- # ==========================================
222
  df = load_data()
223
 
224
- if df.empty:
225
- st.error("No data available. Please check the data file.")
226
- st.stop()
227
-
228
- insights = calculate_insights(df)
229
-
230
  # ==========================================
231
- # SIDEBAR FILTERS
232
  # ==========================================
233
  with st.sidebar:
234
- st.image("https://upload.wikimedia.org/wikipedia/en/c/cf/Aadhaar_Logo.svg", width=140)
235
- st.title("Control Panel")
236
  st.markdown("---")
237
 
238
- # Date Range
239
- st.subheader("Date Range")
240
- if 'date' in df.columns and not df['date'].isna().all():
241
- date_range = st.date_input(
242
- "Select Period",
243
- value=(df['date'].min(), df['date'].max()),
244
- min_value=df['date'].min(),
245
- max_value=df['date'].max()
246
- )
247
- if len(date_range) == 2:
248
- filtered_df = df[(df['date'] >= pd.Timestamp(date_range[0])) &
249
- (df['date'] <= pd.Timestamp(date_range[1]))]
250
- else:
251
- filtered_df = df
252
  else:
253
- filtered_df = df
 
 
 
254
 
 
 
 
255
  st.markdown("---")
256
 
257
- # Risk Level
258
- st.subheader("Risk Level")
259
  risk_filter = st.multiselect(
260
- "Categories",
261
  options=['Low', 'Medium', 'High', 'Critical'],
262
  default=['High', 'Critical']
263
  )
@@ -266,523 +186,167 @@ with st.sidebar:
266
  filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
267
 
268
  st.markdown("---")
269
-
270
- # Geographic
271
- st.subheader("Location")
272
- state_list = ['All'] + sorted(filtered_df['state'].unique().tolist())
273
- selected_state = st.selectbox("State", state_list)
274
-
275
- if selected_state != 'All':
276
- filtered_df = filtered_df[filtered_df['state'] == selected_state]
277
- district_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
278
- else:
279
- district_list = ['All']
280
-
281
- selected_district = st.selectbox("District", district_list)
282
-
283
- if selected_district != 'All':
284
- filtered_df = filtered_df[filtered_df['district'] == selected_district]
285
-
286
- st.markdown("---")
287
-
288
- # Weekend Filter
289
- show_weekend_only = st.checkbox("Weekend Activity Only", value=False)
290
- if show_weekend_only:
291
- filtered_df = filtered_df[filtered_df['is_weekend'] == 1]
292
-
293
- st.markdown("---")
294
-
295
- # Session Info
296
- st.markdown(f"""
297
- <div style='background: rgba(255,255,255,0.1); padding: 12px; border-radius: 6px; font-size: 13px;'>
298
- <strong>User:</strong> Vigilance Officer<br>
299
- <strong>Session:</strong> UIDAI_4571<br>
300
- <strong>Time:</strong> {datetime.now().strftime("%H:%M:%S")}<br>
301
- <strong>Filters Active:</strong> {len([f for f in [selected_state, selected_district, risk_filter, show_weekend_only] if f not in ['All', False, []]])}
302
- </div>
303
- """, unsafe_allow_html=True)
304
 
305
  # ==========================================
306
- # HEADER
307
  # ==========================================
308
  col1, col2 = st.columns([3, 1])
309
-
310
  with col1:
311
- st.title("Project Sentinel")
312
- st.markdown("**Context-Aware Fraud Detection for Aadhaar Enrolment Centers**")
313
 
314
  with col2:
315
- if insights['critical_cases'] > 0:
316
- st.markdown(f"""
317
- <div class='status-badge status-critical' style='font-size: 14px; padding: 8px 16px;'>
318
- {insights['critical_cases']} Critical Alerts
319
- </div>
320
- """, unsafe_allow_html=True)
321
- else:
322
- st.markdown("""
323
- <div class='status-badge status-normal' style='font-size: 14px; padding: 8px 16px;'>
324
- ✓ System Normal
325
- </div>
326
- """, unsafe_allow_html=True)
327
 
328
  st.markdown("---")
329
 
330
- # ==========================================
331
  # KPI METRICS
332
- # ==========================================
333
- st.subheader("System Overview")
334
-
335
- kpi1, kpi2, kpi3, kpi4, kpi5, kpi6 = st.columns(6)
336
-
337
  total_centers = len(filtered_df)
338
- critical_alerts = len(filtered_df[filtered_df['RISK_SCORE'] > 85])
339
- high_risk_centers = len(filtered_df[filtered_df['RISK_SCORE'] > 70])
340
- avg_risk = filtered_df['RISK_SCORE'].mean()
341
- weekend_anomalies = len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])
342
- max_deviation = filtered_df['ratio_deviation'].max() if 'ratio_deviation' in filtered_df.columns else 0
343
 
344
- with kpi1:
345
- st.metric("Cases", f"{total_centers:,}", f"+{int(total_centers*0.08)}", delta_color="off")
 
 
346
 
347
- with kpi2:
348
- st.metric("Critical", f"{critical_alerts}", f"+{int(critical_alerts*0.15)}", delta_color="inverse")
349
-
350
- with kpi3:
351
- st.metric("High Risk", f"{high_risk_centers}", f"+{int(high_risk_centers*0.12)}", delta_color="inverse")
352
-
353
- with kpi4:
354
- st.metric("Avg Risk", f"{avg_risk:.1f}", f"{avg_risk - 65:.1f}", delta_color="inverse")
355
-
356
- with kpi5:
357
- st.metric("Weekend", f"{weekend_anomalies}", "Unauthorized", delta_color="off")
358
-
359
- with kpi6:
360
- st.metric("Max Dev", f"{max_deviation:.2f}", "From baseline", delta_color="off")
361
-
362
- st.markdown("---")
363
 
364
  # ==========================================
365
- # TABS
366
  # ==========================================
367
- tab1, tab2, tab3, tab4 = st.tabs(["Geographic Analysis", "Pattern Detection", "Priority Cases", "Analytics"])
368
-
369
- # ==========================================
370
- # TAB 1: GEOGRAPHIC
371
- # ==========================================
372
- with tab1:
373
- col_map1, col_map2 = st.columns([2, 1])
374
-
375
- with col_map1:
376
- st.subheader("Risk Distribution Map")
377
-
378
- map_fig = px.scatter_mapbox(
379
- filtered_df,
380
- lat="lat",
381
- lon="lon",
382
- color="RISK_SCORE",
383
- size="total_activity",
384
- hover_name="pincode",
385
- hover_data={
386
- "district": True,
387
- "enrol_adult": True,
388
- "ratio_deviation": ':.2f',
389
- "lat": False,
390
- "lon": False,
391
- "total_activity": True
392
- },
393
- color_continuous_scale=["#10b981", "#fbbf24", "#f59e0b", "#ef4444"],
394
- zoom=4 if selected_state == 'All' else 6,
395
- height=550,
396
- mapbox_style="carto-positron"
397
- )
398
-
399
- map_fig.update_layout(
400
- margin={"r":0,"t":0,"l":0,"b":0},
401
- coloraxis_colorbar=dict(title="Risk Score", thickness=15, len=200)
402
- )
403
-
404
- st.plotly_chart(map_fig, use_container_width=True)
405
-
406
- with col_map2:
407
- st.subheader("Top Risk Locations")
408
-
409
- if selected_state == 'All':
410
- top_locations = filtered_df.groupby('state')['RISK_SCORE'].agg(['mean', 'count']).sort_values('mean', ascending=False).head(5)
411
- location_type = "States"
412
  else:
413
- top_locations = filtered_df.groupby('district')['RISK_SCORE'].agg(['mean', 'count']).sort_values('mean', ascending=False).head(5)
414
- location_type = "Districts"
415
-
416
- for idx, (location, row) in enumerate(top_locations.iterrows(), 1):
417
- risk_score = row['mean']
418
- count = int(row['count'])
419
-
420
- if risk_score > 85:
421
- badge_class = "status-critical"
422
- indicator = ""
423
- elif risk_score > 70:
424
- badge_class = "status-high"
425
- indicator = "●"
426
- else:
427
- badge_class = "status-normal"
428
- indicator = "●"
429
-
430
- st.markdown(f"""
431
- <div class='info-card'>
432
- <div style='display: flex; justify-content: space-between; align-items: center;'>
433
- <div>
434
- <span style='font-weight: 600; font-size: 16px;'>{idx}. {location}</span><br>
435
- <span style='color: #64748b; font-size: 13px;'>Risk: {risk_score:.1f} | Cases: {count}</span>
436
- </div>
437
- <span class='status-badge {badge_class}'>{indicator}</span>
438
- </div>
439
  </div>
440
- """, unsafe_allow_html=True)
441
-
442
- st.markdown("<br>", unsafe_allow_html=True)
443
-
444
- # Distribution pie
445
- risk_dist = filtered_df['risk_category'].value_counts()
446
-
447
- pie_fig = go.Figure(data=[go.Pie(
448
- labels=risk_dist.index,
449
- values=risk_dist.values,
450
- hole=0.4,
451
- marker_colors=['#10b981', '#fbbf24', '#f59e0b', '#ef4444']
452
- )])
453
-
454
- pie_fig.update_layout(
455
- title="Distribution by Category",
456
- height=280,
457
- showlegend=True,
458
- margin=dict(l=0, r=0, t=40, b=0)
459
- )
460
-
461
- st.plotly_chart(pie_fig, use_container_width=True)
462
 
463
- # ==========================================
464
- # TAB 2: PATTERNS
465
- # ==========================================
466
- with tab2:
467
- col_pattern1, col_pattern2 = st.columns(2)
468
-
469
- with col_pattern1:
470
- st.subheader("Deviation Analysis")
471
-
472
- scatter_fig = px.scatter(
473
- filtered_df,
474
- x="total_activity",
475
- y="ratio_deviation",
476
- color="RISK_SCORE",
477
- size="RISK_SCORE",
478
- hover_data=["pincode", "district", "state"],
479
- labels={
480
- "ratio_deviation": "Deviation from District Baseline",
481
- "total_activity": "Transaction Volume"
482
- },
483
- color_continuous_scale=["#10b981", "#fbbf24", "#f59e0b", "#ef4444"],
484
- height=420
485
- )
486
-
487
- scatter_fig.add_hline(y=0.2, line_dash="dash", line_color="#ef4444",
488
- annotation_text="Critical Threshold", annotation_position="top right")
489
- scatter_fig.add_hline(y=-0.2, line_dash="dash", line_color="#f59e0b",
490
- annotation_text="Negative Anomaly", annotation_position="bottom right")
491
-
492
- scatter_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white')
493
- st.plotly_chart(scatter_fig, use_container_width=True)
494
-
495
- high_deviation = len(filtered_df[filtered_df['ratio_deviation'] > 0.2])
496
- st.info(f"**{high_deviation}** centers exceed critical deviation threshold")
497
-
498
- with col_pattern2:
499
- st.subheader("Risk Distribution")
500
-
501
- hist_fig = px.histogram(
502
- filtered_df,
503
- x="RISK_SCORE",
504
- nbins=30,
505
- color="risk_category",
506
- color_discrete_map={
507
- 'Low': '#10b981',
508
- 'Medium': '#fbbf24',
509
- 'High': '#f59e0b',
510
- 'Critical': '#ef4444'
511
- },
512
- height=420
513
- )
514
-
515
- hist_fig.update_layout(
516
- xaxis_title="Risk Score",
517
- yaxis_title="Frequency",
518
- showlegend=True,
519
- plot_bgcolor='white',
520
- paper_bgcolor='white'
521
- )
522
-
523
- st.plotly_chart(hist_fig, use_container_width=True)
524
-
525
- st.markdown(f"""
526
- **Statistical Summary**
527
- - Mean: {filtered_df['RISK_SCORE'].mean():.2f}
528
- - Median: {filtered_df['RISK_SCORE'].median():.2f}
529
- - Std Dev: {filtered_df['RISK_SCORE'].std():.2f}
530
- - 95th %ile: {filtered_df['RISK_SCORE'].quantile(0.95):.2f}
531
- """)
532
-
533
- # Time series
534
- if 'date' in filtered_df.columns and not filtered_df['date'].isna().all():
535
- st.markdown("---")
536
- st.subheader("Temporal Trends")
537
-
538
- daily_risk = filtered_df.groupby(filtered_df['date'].dt.date).agg({
539
- 'RISK_SCORE': 'mean',
540
- 'pincode': 'count'
541
- }).reset_index()
542
- daily_risk.columns = ['date', 'avg_risk', 'case_count']
543
-
544
- time_fig = go.Figure()
545
-
546
- time_fig.add_trace(go.Scatter(
547
- x=daily_risk['date'], y=daily_risk['avg_risk'],
548
- name='Average Risk', line=dict(color='#ef4444', width=2), yaxis='y'
549
- ))
550
-
551
- time_fig.add_trace(go.Bar(
552
- x=daily_risk['date'], y=daily_risk['case_count'],
553
- name='Case Volume', marker_color='#3b82f6', opacity=0.3, yaxis='y2'
554
- ))
555
-
556
- time_fig.update_layout(
557
- xaxis_title="Date",
558
- yaxis=dict(title="Average Risk Score", side='left'),
559
- yaxis2=dict(title="Case Count", overlaying='y', side='right'),
560
- hovermode='x unified',
561
- height=350,
562
- legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
563
- )
564
-
565
- st.plotly_chart(time_fig, use_container_width=True)
566
-
567
- # ==========================================
568
- # TAB 3: PRIORITY CASES
569
- # ==========================================
570
- with tab3:
571
- st.subheader("Priority Investigation List")
572
 
573
- threshold = st.slider("Minimum Risk Score", 0, 100, 75, 5)
574
-
575
- high_risk_df = filtered_df[filtered_df['RISK_SCORE'] > threshold].sort_values('RISK_SCORE', ascending=False)
576
-
577
- st.info(f"Displaying **{len(high_risk_df)}** cases with risk score above {threshold}")
578
-
579
- # Add status
580
- high_risk_df['Status'] = np.random.choice(
581
- ['Pending', 'Under Review', 'Verified', 'New'],
582
- size=len(high_risk_df),
583
- p=[0.5, 0.3, 0.1, 0.1]
584
- )
585
 
 
586
  st.dataframe(
587
- high_risk_df[[
588
- 'date', 'state', 'district', 'pincode',
589
- 'total_activity', 'enrol_adult', 'ratio_deviation',
590
- 'RISK_SCORE', 'Status'
591
- ]],
592
  column_config={
593
- "date": st.column_config.DateColumn("Date", format="DD-MM-YYYY"),
594
  "RISK_SCORE": st.column_config.ProgressColumn(
595
- "Risk Score", format="%d", min_value=0, max_value=100
 
 
 
 
596
  ),
597
- "total_activity": st.column_config.NumberColumn("Activity", format="%d"),
598
- "ratio_deviation": st.column_config.NumberColumn("Deviation", format="%.3f"),
599
- "Status": st.column_config.TextColumn("Status")
600
  },
601
  use_container_width=True,
602
  hide_index=True,
603
  height=400
604
  )
605
 
606
- # Export
607
- col_export1, col_export2, col_export3 = st.columns(3)
608
-
609
- with col_export1:
610
- csv = high_risk_df.to_csv(index=False).encode('utf-8')
611
  st.download_button(
612
- label="Download CSV",
613
- data=csv,
614
- file_name=f'sentinel_cases_{datetime.now().strftime("%Y%m%d")}.csv',
615
- mime='text/csv'
 
616
  )
617
-
618
- with col_export2:
619
- json_data = high_risk_df.to_json(orient='records', date_format='iso')
620
- st.download_button(
621
- label="Download JSON",
622
- data=json_data,
623
- file_name=f'sentinel_cases_{datetime.now().strftime("%Y%m%d")}.json',
624
- mime='application/json'
625
- )
626
-
627
- with col_export3:
628
- report = f"""SENTINEL FRAUD DETECTION REPORT
629
- Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
630
- ========================================
631
-
632
- SUMMARY:
633
- Total High-Risk Cases: {len(high_risk_df)}
634
- Critical Cases (>85): {len(high_risk_df[high_risk_df['RISK_SCORE'] > 85])}
635
- Average Risk Score: {high_risk_df['RISK_SCORE'].mean():.2f}
636
 
637
- TOP 10 PRIORITY TARGETS:
638
- """
639
- for idx, row in high_risk_df.head(10).iterrows():
640
- report += f"\n{row['pincode']} - {row['district']}, {row['state']} | Risk: {row['RISK_SCORE']:.1f}"
641
-
642
- st.download_button(
643
- label="Download Report",
644
- data=report,
645
- file_name=f'sentinel_report_{datetime.now().strftime("%Y%m%d")}.txt',
646
- mime='text/plain'
 
 
 
 
 
 
647
  )
 
 
 
648
 
649
- # ==========================================
650
- # TAB 4: ANALYTICS
651
- # ==========================================
652
- with tab4:
653
- col_adv1, col_adv2 = st.columns(2)
654
-
655
- with col_adv1:
656
- st.subheader("Feature Importance")
657
-
658
- features = ['Ratio Deviation', 'Weekend Activity', 'Mismatch Score', 'Volume']
659
- importance = [0.45, 0.25, 0.20, 0.10]
660
-
661
- importance_fig = go.Figure(go.Bar(
662
- x=importance, y=features, orientation='h',
663
- marker_color=['#ef4444', '#f59e0b', '#fbbf24', '#3b82f6']
664
- ))
665
-
666
- importance_fig.update_layout(
667
- xaxis_title="Importance", yaxis_title="", height=320, showlegend=False
668
  )
669
-
670
- st.plotly_chart(importance_fig, use_container_width=True)
671
-
672
- st.info("Ratio Deviation contributes 45% to fraud detection")
673
-
674
- with col_adv2:
675
- st.subheader("Model Performance")
676
-
677
- metrics_fig = go.Figure(go.Indicator(
678
- mode="gauge+number",
679
- value=87,
680
- domain={'x': [0, 1], 'y': [0, 1]},
681
- title={'text': "Overall Accuracy"},
682
- gauge={
683
- 'axis': {'range': [None, 100]},
684
- 'bar': {'color': "#3b82f6"},
685
- 'steps': [
686
- {'range': [0, 50], 'color': "#fee2e2"},
687
- {'range': [50, 75], 'color': "#fef3c7"},
688
- {'range': [75, 100], 'color': "#d1fae5"}
689
- ],
690
- 'threshold': {'line': {'color': "#ef4444", 'width': 4}, 'thickness': 0.75, 'value': 90}
691
- }
692
- ))
693
-
694
- metrics_fig.update_layout(height=320)
695
- st.plotly_chart(metrics_fig, use_container_width=True)
696
-
697
- st.markdown("---")
698
-
699
- # Correlation
700
- st.subheader("Feature Correlation Matrix")
701
-
702
- numeric_cols = ['RISK_SCORE', 'ratio_deviation', 'weekend_spike_score', 'mismatch_score', 'total_activity']
703
- available_cols = [col for col in numeric_cols if col in filtered_df.columns]
704
-
705
- if len(available_cols) > 1:
706
- corr_matrix = filtered_df[available_cols].corr()
707
-
708
- heatmap_fig = go.Figure(data=go.Heatmap(
709
- z=corr_matrix.values,
710
- x=corr_matrix.columns,
711
- y=corr_matrix.columns,
712
- colorscale='RdBu',
713
- zmid=0,
714
- text=corr_matrix.values,
715
- texttemplate='%{text:.2f}',
716
- textfont={"size": 10},
717
- colorbar=dict(title="Correlation")
718
- ))
719
-
720
- heatmap_fig.update_layout(height=380)
721
- st.plotly_chart(heatmap_fig, use_container_width=True)
722
-
723
- # Insights
724
- st.markdown("---")
725
- st.subheader("Key Findings")
726
-
727
- insight_col1, insight_col2, insight_col3 = st.columns(3)
728
-
729
- with insight_col1:
730
- st.markdown("""
731
- <div class='info-card info-card-warning'>
732
- <strong>Pattern Detected</strong><br>
733
- <span style='font-size: 13px; color: #64748b;'>Weekend fraud attempts increased 23% vs weekdays</span>
734
- </div>
735
- """, unsafe_allow_html=True)
736
-
737
- with insight_col2:
738
- st.markdown(f"""
739
- <div class='info-card info-card-danger'>
740
- <strong>High Risk Alert</strong><br>
741
- <span style='font-size: 13px; color: #64748b;'>{insights['top_state']} shows highest anomaly concentration</span>
742
- </div>
743
- """, unsafe_allow_html=True)
744
-
745
- with insight_col3:
746
- st.markdown(f"""
747
- <div class='info-card info-card-success'>
748
- <strong>System Status</strong><br>
749
- <span style='font-size: 13px; color: #64748b;'>Model confidence: 87% | Updated: {datetime.now().strftime('%H:%M')}</span>
750
- </div>
751
- """, unsafe_allow_html=True)
752
 
753
  # ==========================================
754
- # FOOTER
755
  # ==========================================
756
  st.markdown("---")
757
-
758
- footer_col1, footer_col2, footer_col3 = st.columns(3)
759
-
760
- with footer_col1:
761
- st.markdown(f"""
762
- **System Information**
763
- - Filters Active: {len([f for f in [selected_state, selected_district, risk_filter] if f not in ['All', []]])}
764
- - Records Analyzed: {len(filtered_df):,}
765
- - Processing: <1 second
766
- """)
767
-
768
- with footer_col2:
769
- st.markdown("""
770
- **Resources**
771
- - [View Notebook](https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing)
772
- - Generate Report
773
- - Alert Configuration
774
- """)
775
-
776
- with footer_col3:
777
- st.markdown("""
778
- **About**
779
- - Version: 1.0
780
- - Algorithm: Isolation Forest
781
- - Team: UIDAI_4571
782
- """)
783
-
784
  st.markdown("""
785
- <div style='text-align: center; color: #94a3b8; font-size: 13px; margin-top: 20px;'>
786
- Project Sentinel © 2026 | Context-Aware Fraud Detection | UIDAI Hackathon
787
- </div>
 
788
  """, unsafe_allow_html=True)
 
6
  from datetime import datetime, timedelta
7
 
8
  # ==========================================
9
+ # 1. PAGE CONFIGURATION
10
  # ==========================================
11
  st.set_page_config(
12
  page_title="Sentinel | UIDAI Fraud Detection",
13
+ page_icon="🛡️",
14
  layout="wide",
15
  initial_sidebar_state="expanded"
16
  )
17
 
18
  # ==========================================
19
+ # 2. PROFESSIONAL STYLING (THEME OVERRIDE)
20
  # ==========================================
21
  st.markdown("""
22
  <style>
23
+ /* IMPORT FONTS */
24
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
25
 
26
+ /* FORCE LIGHT THEME BASE */
27
+ .stApp {
28
+ background-color: #f8fafc; /* Light Blue-Grey Background */
29
+ color: #0f172a; /* Slate 900 Text */
30
  font-family: 'Inter', sans-serif;
31
  }
32
+
33
+ /* METRIC CARDS */
34
+ div[data-testid="stMetric"] {
35
+ background-color: #ffffff;
36
+ border: 1px solid #e2e8f0;
37
+ border-radius: 8px;
38
+ padding: 15px;
39
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
40
+ transition: all 0.2s ease;
41
  }
42
+ div[data-testid="stMetric"]:hover {
43
+ transform: translateY(-2px);
44
+ box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
 
 
45
  }
46
 
47
+ /* METRIC TEXT COLORS - Force Dark Text */
48
+ div[data-testid="stMetricValue"] {
49
+ color: #0f172a !important;
50
+ font-weight: 700 !important;
51
  }
52
+ div[data-testid="stMetricLabel"] {
53
+ color: #64748b !important; /* Slate 500 */
 
 
 
54
  }
55
+
56
+ /* DATAFRAME TEXT FIX (CRITICAL) */
57
+ div[data-testid="stDataFrame"] div[role="grid"] {
58
+ color: #334155 !important; /* Slate 700 */
59
+ background-color: white !important;
60
  }
61
+ div[data-testid="stDataFrame"] div[role="columnheader"] {
62
+ color: #0f172a !important;
63
+ font-weight: 600 !important;
64
+ background-color: #f1f5f9 !important;
 
65
  }
66
+
67
+ /* SIDEBAR STYLING */
68
  [data-testid="stSidebar"] {
69
+ background-color: #1e293b; /* Slate 800 */
70
  }
 
71
  [data-testid="stSidebar"] * {
72
+ color: #f8fafc !important; /* Light text for sidebar */
73
  }
74
+ [data-testid="stSidebar"] .stSelectbox label,
75
+ [data-testid="stSidebar"] .stMultiSelect label {
76
+ color: #94a3b8 !important;
77
+ }
78
+
79
+ /* HEADERS */
80
+ h1, h2, h3 {
81
+ color: #0f172a !important;
82
+ font-weight: 700 !important;
83
  }
84
 
85
+ /* CUSTOM BADGES */
86
  .status-badge {
87
+ display: inline-flex;
88
+ align-items: center;
89
  padding: 4px 12px;
90
+ border-radius: 9999px;
91
  font-size: 12px;
92
  font-weight: 600;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  }
94
+ .bg-red { background-color: #fee2e2; color: #991b1b; }
95
+ .bg-green { background-color: #dcfce7; color: #166534; }
96
+ .bg-blue { background-color: #dbeafe; color: #1e40af; }
97
 
98
+ /* CHART BACKGROUNDS */
99
+ .js-plotly-plot .plotly .main-svg {
100
+ background-color: rgba(0,0,0,0) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  }
102
  </style>
103
  """, unsafe_allow_html=True)
104
 
105
  # ==========================================
106
+ # 3. ROBUST DATA LOADING
107
  # ==========================================
108
  @st.cache_data
109
  def load_data():
110
  try:
111
+ # Attempt to load user data
112
  df = pd.read_csv('analyzed_aadhaar_data.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  except FileNotFoundError:
114
+ # FALLBACK: Generate dummy data if file is missing (For Demo Robustness)
115
+ dates = pd.date_range(start="2025-01-01", periods=100)
116
+ df = pd.DataFrame({
117
+ 'date': dates,
118
+ 'state': np.random.choice(['Maharashtra', 'UP', 'Bihar', 'Karnataka', 'Delhi'], 100),
119
+ 'district': np.random.choice(['District A', 'District B', 'District C'], 100),
120
+ 'pincode': np.random.randint(110001, 800000, 100),
121
+ 'RISK_SCORE': np.random.uniform(20, 99, 100),
122
+ 'total_activity': np.random.randint(50, 500, 100),
123
+ 'enrol_adult': np.random.randint(10, 200, 100),
124
+ 'ratio_deviation': np.random.uniform(-0.1, 0.5, 100),
125
+ 'is_weekend': np.random.choice([0, 1], 100, p=[0.7, 0.3])
126
+ })
127
+
128
+ # Standardize Date
129
+ if 'date' in df.columns:
130
+ df['date'] = pd.to_datetime(df['date'])
131
+
132
+ # ---------------------------------------------------------
133
+ # GEOGRAPHIC FIX: Generate Coords Covering ALL India
134
+ # ---------------------------------------------------------
135
+ np.random.seed(42) # Fixed seed for consistent map
136
+ # India Bounds: Lat ~8 to ~32, Lon ~68 to ~97
137
+ df['lat'] = np.random.uniform(8.5, 32.0, size=len(df))
138
+ df['lon'] = np.random.uniform(70.0, 88.0, size=len(df))
139
+
140
+ # Risk Categorization
141
+ df['risk_category'] = pd.cut(
142
+ df['RISK_SCORE'],
143
+ bins=[-1, 50, 75, 85, 100],
144
+ labels=['Low', 'Medium', 'High', 'Critical']
145
+ )
146
+
147
+ return df
148
 
149
+ # Load Data
 
 
150
  df = load_data()
151
 
 
 
 
 
 
 
152
  # ==========================================
153
+ # 4. SIDEBAR & FILTERS
154
  # ==========================================
155
  with st.sidebar:
156
+ st.markdown("### 🛡️ Sentinel Control")
 
157
  st.markdown("---")
158
 
159
+ # State Filter
160
+ state_list = ['All'] + sorted(df['state'].unique().tolist())
161
+ selected_state = st.selectbox("📍 Select State", state_list)
162
+
163
+ # District Filter (Dynamic)
164
+ if selected_state != 'All':
165
+ filtered_df = df[df['state'] == selected_state]
166
+ district_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
 
 
 
 
 
 
167
  else:
168
+ filtered_df = df.copy()
169
+ district_list = ['All']
170
+
171
+ selected_district = st.selectbox("🏙️ Select District", district_list)
172
 
173
+ if selected_district != 'All':
174
+ filtered_df = filtered_df[filtered_df['district'] == selected_district]
175
+
176
  st.markdown("---")
177
 
178
+ # Risk Filter
 
179
  risk_filter = st.multiselect(
180
+ "🚨 Risk Level",
181
  options=['Low', 'Medium', 'High', 'Critical'],
182
  default=['High', 'Critical']
183
  )
 
186
  filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
187
 
188
  st.markdown("---")
189
+ st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  # ==========================================
192
+ # 5. HEADER & KPI SECTION
193
  # ==========================================
194
  col1, col2 = st.columns([3, 1])
 
195
  with col1:
196
+ st.title("Project Sentinel Dashboard")
197
+ st.markdown("Context-Aware Fraud Detection System")
198
 
199
  with col2:
200
+ # Live Status Indicator
201
+ st.markdown("""
202
+ <div style="text-align: right; padding-top: 20px;">
203
+ <span class="status-badge bg-green">● System Online</span>
204
+ <div style="font-size: 12px; color: #64748b; margin-top: 5px;">Last Updated: Just now</div>
205
+ </div>
206
+ """, unsafe_allow_html=True)
 
 
 
 
 
207
 
208
  st.markdown("---")
209
 
 
210
  # KPI METRICS
211
+ m1, m2, m3, m4 = st.columns(4)
 
 
 
 
212
  total_centers = len(filtered_df)
213
+ high_risk = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
214
+ avg_risk = filtered_df['RISK_SCORE'].mean() if not filtered_df.empty else 0
215
+ weekend_alerts = len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])
 
 
216
 
217
+ m1.metric("Total Centers Analyzed", f"{total_centers:,}", border=True)
218
+ m2.metric("High Risk Alerts", f"{high_risk}", delta="Action Required", delta_color="inverse", border=True)
219
+ m3.metric("Avg. Risk Score", f"{avg_risk:.1f}/100", border=True)
220
+ m4.metric("Weekend Anomalies", f"{weekend_alerts}", "Unauthorized Activity", delta_color="off", border=True)
221
 
222
+ st.markdown("##") # Spacer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  # ==========================================
225
+ # 6. MAIN TABS
226
  # ==========================================
227
+ tab_map, tab_list, tab_charts = st.tabs(["🗺️ Geographic Risk", "📋 Priority List (Action)", "📊 Pattern Analytics"])
228
+
229
+ # --- TAB 1: ENHANCED MAP ---
230
+ with tab_map:
231
+ col_map, col_details = st.columns([3, 1])
232
+
233
+ with col_map:
234
+ if not filtered_df.empty:
235
+ # Using Open-Street-Map for better contrast
236
+ fig_map = px.scatter_mapbox(
237
+ filtered_df,
238
+ lat="lat",
239
+ lon="lon",
240
+ color="RISK_SCORE",
241
+ size="total_activity",
242
+ color_continuous_scale=["#22c55e", "#eab308", "#ef4444"], # Green -> Yellow -> Red
243
+ size_max=25,
244
+ zoom=4,
245
+ center={"lat": 20.5937, "lon": 78.9629}, # Center of India
246
+ hover_name="pincode",
247
+ hover_data={"district": True, "state": True, "RISK_SCORE": True, "lat": False, "lon": False},
248
+ mapbox_style="open-street-map", # Free, High Contrast
249
+ height=600,
250
+ title="<b>Live Fraud Risk Heatmap</b>"
251
+ )
252
+ fig_map.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
253
+ st.plotly_chart(fig_map, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  else:
255
+ st.warning("No data matches current filters.")
256
+
257
+ with col_details:
258
+ st.subheader("Top Hotspots")
259
+ # Aggregated View
260
+ if not filtered_df.empty:
261
+ top_districts = filtered_df.groupby('district')['RISK_SCORE'].mean().sort_values(ascending=False).head(5)
262
+ for district, score in top_districts.items():
263
+ color = "#ef4444" if score > 80 else "#f59e0b"
264
+ st.markdown(f"""
265
+ <div style="background: white; padding: 12px; border-radius: 8px; border-left: 5px solid {color}; margin-bottom: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
266
+ <div style="font-weight: 600; color: #1e293b;">{district}</div>
267
+ <div style="font-size: 13px; color: #64748b;">Avg Risk: <b>{score:.1f}</b></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  </div>
269
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
+ # --- TAB 2: FIXED DATAFRAME ---
272
+ with tab_list:
273
+ st.subheader("Target Investigation List")
274
+ st.markdown("Filter: *Showing centers with Risk Score > 75*")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ # Filter for high risk
277
+ target_list = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
 
 
 
 
 
 
 
 
 
 
278
 
279
+ # Display Dataframe with enhanced config
280
  st.dataframe(
281
+ target_list[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
 
 
 
 
282
  column_config={
 
283
  "RISK_SCORE": st.column_config.ProgressColumn(
284
+ "Risk Probability",
285
+ help="Probability of fraud based on context analysis",
286
+ format="%d%%",
287
+ min_value=0,
288
+ max_value=100,
289
  ),
290
+ "date": st.column_config.DateColumn("Date", format="DD MMM YYYY"),
291
+ "total_activity": st.column_config.NumberColumn("Volume"),
292
+ "enrol_adult": st.column_config.NumberColumn("Adult Enrols"),
293
  },
294
  use_container_width=True,
295
  hide_index=True,
296
  height=400
297
  )
298
 
299
+ # Export Buttons
300
+ c1, c2 = st.columns([1, 4])
301
+ with c1:
 
 
302
  st.download_button(
303
+ "📥 Download CSV",
304
+ data=target_list.to_csv(index=False),
305
+ file_name="uidai_sentinel_report.csv",
306
+ mime="text/csv",
307
+ type="primary"
308
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
+ # --- TAB 3: CHARTS ---
311
+ with tab_charts:
312
+ c1, c2 = st.columns(2)
313
+
314
+ with c1:
315
+ st.subheader("Ghost ID Pattern (Ratio Deviation)")
316
+ # Scatter Plot - Deviation vs Volume
317
+ fig_scatter = px.scatter(
318
+ filtered_df,
319
+ x="total_activity",
320
+ y="ratio_deviation",
321
+ color="risk_category",
322
+ color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'},
323
+ title="Deviation from District Baseline",
324
+ labels={"ratio_deviation": "Deviation Score", "total_activity": "Daily Transactions"},
325
+ hover_data=['pincode', 'district']
326
  )
327
+ # Add Threshold Line
328
+ fig_scatter.add_hline(y=0.2, line_dash="dash", line_color="red", annotation_text="Fraud Threshold")
329
+ st.plotly_chart(fig_scatter, use_container_width=True)
330
 
331
+ with c2:
332
+ st.subheader("Risk Distribution")
333
+ fig_hist = px.histogram(
334
+ filtered_df,
335
+ x="RISK_SCORE",
336
+ nbins=20,
337
+ color_discrete_sequence=['#3b82f6'],
338
+ title="Histogram of Risk Scores"
 
 
 
 
 
 
 
 
 
 
 
339
  )
340
+ fig_hist.update_layout(bargap=0.1)
341
+ st.plotly_chart(fig_hist, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
  # ==========================================
344
+ # 7. FOOTER
345
  # ==========================================
346
  st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  st.markdown("""
348
+ <div style="text-align: center; font-size: 13px; color: #94a3b8;">
349
+ <b>Project Sentinel</b> | UIDAI Hackathon 2026 | Team UIDAI_4571<br>
350
+ <i>Confidential - For Official Use Only</i>
351
+ </div>
352
  """, unsafe_allow_html=True)