LovnishVerma commited on
Commit
7b77dfb
Β·
verified Β·
1 Parent(s): f49bfe4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -138
app.py CHANGED
@@ -59,18 +59,20 @@ st.markdown("""
59
  border-right: 1px solid #334155;
60
  }
61
 
62
- /* Force Sidebar Text Colors */
63
  [data-testid="stSidebar"] h1, [data-testid="stSidebar"] h2, [data-testid="stSidebar"] h3 {
64
  color: #f8fafc !important;
65
  }
66
- [data-testid="stSidebar"] p, [data-testid="stSidebar"] label {
 
 
67
  color: #e2e8f0 !important;
68
  font-weight: 600;
69
  }
70
 
71
  /* Input Boxes (Selectbox, DateInput) Text Color Fix */
72
- /* This targets the text INSIDE the white input box */
73
- [data-testid="stSidebar"] div[data-baseweb="select"] span {
74
  color: #0f172a !important;
75
  }
76
  [data-testid="stSidebar"] div[data-baseweb="input"] input {
@@ -80,6 +82,11 @@ st.markdown("""
80
  color: #0f172a !important;
81
  }
82
 
 
 
 
 
 
83
  /* Link Button Fix - CRITICAL OVERRIDE FOR DARK TEXT */
84
  [data-testid="stSidebar"] a {
85
  background-color: #3b82f6 !important; /* Brighter blue background */
@@ -101,7 +108,7 @@ st.markdown("""
101
  transform: translateY(-1px);
102
  }
103
 
104
- /* Link Button Container Background */
105
  [data-testid="stSidebar"] button[kind="secondary"] {
106
  background-color: #f8fafc !important; /* Light bg for button */
107
  color: #0f172a !important; /* Dark text */
@@ -132,8 +139,6 @@ st.markdown("""
132
  """, unsafe_allow_html=True)
133
 
134
  # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
135
-
136
-
137
  @st.cache_data(show_spinner=False)
138
  def fetch_coordinates_batch(unique_locations):
139
  """
@@ -156,7 +161,7 @@ def fetch_coordinates_batch(unique_locations):
156
  d, s = k.split("|")
157
  coords_map[(d, s)] = tuple(v)
158
  except json.JSONDecodeError:
159
- pass # File corrupted, start fresh
160
 
161
  # 2. Add Hardcoded Pre-fills (High Priority Redundancy)
162
  # These override if missing, but usually JSON is preferred source if present
@@ -183,77 +188,70 @@ def fetch_coordinates_batch(unique_locations):
183
  for k, v in prefills.items():
184
  if k not in coords_map:
185
  coords_map[k] = v
186
-
187
  # 3. Identify missing locations
188
  missing_locs = [loc for loc in unique_locations if loc not in coords_map]
189
-
190
  if not missing_locs:
191
  return coords_map
192
 
193
  # 4. Dynamic Fetching for missing
194
  progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
195
  my_bar = st.progress(0, text=progress_text)
196
-
197
- headers = {
198
- 'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
199
  updated = False
200
-
201
  for i, (district, state) in enumerate(missing_locs):
202
  try:
203
  # Update Progress
204
- my_bar.progress((i + 1) / len(missing_locs),
205
- text=f"πŸ“ Locating: {district}, {state}")
206
-
207
  # API Call
208
  query = f"{district}, {state}, India"
209
  url = "https://nominatim.openstreetmap.org/search"
210
  params = {'q': query, 'format': 'json', 'limit': 1}
211
-
212
- response = requests.get(
213
- url, params=params, headers=headers, timeout=5)
214
-
215
  if response.status_code == 200 and response.json():
216
  data = response.json()[0]
217
- coords_map[(district, state)] = (
218
- float(data['lat']), float(data['lon']))
219
  updated = True
220
  else:
221
- pass # Fail silently, will fall back to state center logic later
222
-
223
  # Respect Rate Limiting (1 request per second)
224
- time.sleep(1.1)
225
-
226
  except Exception as e:
227
  continue
228
-
229
  my_bar.empty()
230
-
231
  # 5. Save back to JSON if new data fetched
232
  if updated:
233
  # Convert keys to string "District|State" for JSON compatibility
234
  save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
235
  with open(json_file, 'w') as f:
236
  json.dump(save_data, f)
237
-
238
  return coords_map
239
 
240
  # 4. MAIN DATA LOADER
241
-
242
-
243
  @st.cache_data(ttl=300)
244
  def load_data():
245
  try:
246
  df = pd.read_csv('analyzed_aadhaar_data.csv')
247
  except FileNotFoundError:
248
- return pd.DataFrame() # Return empty to trigger external error check
249
-
250
- if 'date' in df.columns:
251
- df['date'] = pd.to_datetime(df['date'])
252
 
 
 
253
  # Clean Data
254
  df['district'] = df['district'].astype(str).str.strip()
255
  df['state'] = df['state'].astype(str).str.strip()
256
-
257
  # --- FIX DUPLICATE STATES ---
258
  # Standardize State Names to remove variations (e.g., J&K)
259
  state_mapping = {
@@ -265,14 +263,13 @@ def load_data():
265
  'Pondicherry': 'Puducherry'
266
  }
267
  df['state'] = df['state'].replace(state_mapping)
268
-
269
  # Get Unique Locations
270
- unique_locs = list(
271
- df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
272
-
273
  # Fetch Coordinates (Cached + Persistent JSON)
274
  coords_db = fetch_coordinates_batch(unique_locs)
275
-
276
  # Fallback Centers (State Capitals)
277
  state_centers = {
278
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
@@ -288,33 +285,31 @@ def load_data():
288
  'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
289
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
290
  }
291
-
292
  def get_lat_lon(row):
293
  key = (row['district'], row['state'])
294
-
295
  # 1. Check Exact Match from API/Cache
296
  if key in coords_db:
297
  lat, lon = coords_db[key]
298
  # Tiny jitter to separate stacked points
299
  return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
300
-
301
  # 2. Fallback to State Center
302
  center = state_centers.get(row['state'], (20.5937, 78.9629))
303
  np.random.seed(hash(key) % 2**32)
304
  return pd.Series({
305
- 'lat': center[0] + np.random.uniform(-0.5, 0.5),
306
  'lon': center[1] + np.random.uniform(-0.5, 0.5)
307
  })
308
 
309
  coords = df.apply(get_lat_lon, axis=1)
310
  df['lat'] = coords['lat']
311
  df['lon'] = coords['lon']
312
-
313
- df['risk_category'] = pd.cut(
314
- df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
315
  return df
316
 
317
-
318
  with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
319
  df = load_data()
320
 
@@ -322,40 +317,30 @@ with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
322
  with st.sidebar:
323
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
324
  st.markdown("---")
325
-
326
  if not df.empty:
327
  if 'date' in df.columns:
328
  min_d, max_d = df['date'].min().date(), df['date'].max().date()
329
- dr = st.date_input("Date Range", value=(
330
- min_d, max_d), min_value=min_d, max_value=max_d)
331
- if len(dr) == 2:
332
- df = df[(df['date'].dt.date >= dr[0]) &
333
- (df['date'].dt.date <= dr[1])]
334
-
335
  state_list = ['All'] + sorted(df['state'].unique().tolist())
336
  sel_state = st.selectbox("State", state_list)
337
- filtered_df = df[df['state'] ==
338
- sel_state] if sel_state != 'All' else df.copy()
339
-
340
  dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
341
  sel_dist = st.selectbox("District", dist_list)
342
- if sel_dist != 'All':
343
- filtered_df = filtered_df[filtered_df['district'] == sel_dist]
344
-
345
  st.markdown("---")
346
- risk_filter = st.multiselect(
347
- "Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
348
- if risk_filter:
349
- filtered_df = filtered_df[filtered_df['risk_category'].isin(
350
- risk_filter)]
351
  else:
352
  filtered_df = pd.DataFrame()
353
-
354
  st.markdown("---")
355
- st.link_button("πŸ““ Open Analysis Notebook",
356
- "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
357
- st.info(
358
- f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
359
 
360
  # 6. HEADER & METRICS
361
  col1, col2 = st.columns([3, 1])
@@ -363,81 +348,62 @@ with col1:
363
  st.title("πŸ›‘οΈ S.T.A.R.K AI Dashboard")
364
  st.markdown("**Context-Aware Fraud Detection & Prevention System**")
365
  with col2:
366
- st.markdown(
367
- f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
368
 
369
  st.markdown("---")
370
 
371
  if not filtered_df.empty:
372
  m1, m2, m3, m4, m5 = st.columns(5)
373
- total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(
374
- filtered_df[filtered_df['RISK_SCORE'] > 85])
375
  m1.metric("Total Centers", f"{total:,}", border=True)
376
- m2.metric("High Risk", f"{high}", delta="Review",
377
- delta_color="inverse", border=True)
378
- m3.metric("Critical", f"{crit}", delta="Urgent",
379
- delta_color="inverse", border=True)
380
- m4.metric(
381
- "Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
382
- m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}",
383
- delta="Suspicious", delta_color="off", border=True)
384
  else:
385
- st.error(
386
- "❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
387
 
388
  st.markdown("##")
389
 
390
  # 7. TABS
391
- tab_map, tab_list, tab_charts, tab_insights = st.tabs(
392
- ["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
393
 
394
  with tab_map:
395
  c_map, c_det = st.columns([3, 1])
396
  with c_map:
397
  if not filtered_df.empty:
398
  # Dynamic Zoom based on selection
399
- if sel_dist != 'All':
400
- zoom_lvl = 10
401
- elif sel_state != 'All':
402
- zoom_lvl = 6
403
- else:
404
- zoom_lvl = 3.8
405
 
406
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
407
- color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
408
- center=None if sel_state == 'All' else {
409
- "lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
410
- hover_name="district", hover_data={"state": True, "pincode": True, "lat": False, "lon": False},
411
- mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
412
-
413
- fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
414
  st.plotly_chart(fig, use_container_width=True)
415
- else:
416
- st.info("Waiting for data...")
417
-
418
  with c_det:
419
  st.subheader("πŸ”₯ Top Hotspots")
420
  if not filtered_df.empty:
421
- top = filtered_df.groupby('district').agg(
422
- {'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
423
  for i, (d, r) in enumerate(top.iterrows(), 1):
424
- clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else (
425
- "#f97316", "HIGH")
426
- st.markdown(
427
- f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
428
 
429
  with tab_list:
430
  st.subheader("🎯 Priority Investigation")
431
  if not filtered_df.empty:
432
- targets = filtered_df[filtered_df['RISK_SCORE'] >
433
- 75].sort_values('RISK_SCORE', ascending=False)
434
  csv = targets.to_csv(index=False).encode('utf-8')
435
- st.download_button("πŸ“₯ Export CSV", data=csv,
436
- file_name="stark_priority.csv", mime="text/csv", type="primary")
437
- st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
438
- column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
439
- else:
440
- st.info("Waiting for data...")
441
 
442
  with tab_charts:
443
  c1, c2 = st.columns(2)
@@ -445,40 +411,32 @@ with tab_charts:
445
  st.markdown("**Ghost ID Detection**")
446
  if not filtered_df.empty:
447
  fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
448
- color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
449
  fig.add_hline(y=0.2, line_dash="dash", line_color="red")
450
  st.plotly_chart(fig, use_container_width=True)
451
  with c2:
452
  st.markdown("**Weekend Activity Analysis**")
453
  if not filtered_df.empty:
454
- wk_counts = filtered_df.groupby(
455
- 'is_weekend')['total_activity'].sum().reset_index()
456
- wk_counts['Type'] = wk_counts['is_weekend'].map(
457
- {0: 'Weekday', 1: 'Weekend'})
458
- fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={
459
- 'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
460
  st.plotly_chart(fig, use_container_width=True)
461
 
462
  with tab_insights:
463
  st.subheader("πŸ” AI Detective Insights")
464
  if not filtered_df.empty:
465
  anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
466
- st.info(
467
- f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
468
-
469
  c_i1, c_i2 = st.columns(2)
470
  with c_i1:
471
  st.markdown("#### 🚨 Primary Risk Factors")
472
- st.markdown(
473
- "- **High Volume on Weekends:** 28% correlation with fraud")
474
- st.markdown(
475
- "- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
476
  with c_i2:
477
  st.markdown("#### πŸ’‘ Recommended Actions")
478
- st.markdown(
479
- f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
480
- st.markdown(
481
- "2. Deploy biometric re-verification for 'Rural A' cluster")
482
 
483
  st.markdown("---")
484
- st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)
 
59
  border-right: 1px solid #334155;
60
  }
61
 
62
+ /* Force Sidebar Header Text Colors */
63
  [data-testid="stSidebar"] h1, [data-testid="stSidebar"] h2, [data-testid="stSidebar"] h3 {
64
  color: #f8fafc !important;
65
  }
66
+
67
+ /* Force Label Colors (Input titles like 'State', 'Date Range') */
68
+ [data-testid="stSidebar"] label {
69
  color: #e2e8f0 !important;
70
  font-weight: 600;
71
  }
72
 
73
  /* Input Boxes (Selectbox, DateInput) Text Color Fix */
74
+ /* This targets the text INSIDE the white input box/dropdown */
75
+ [data-testid="stSidebar"] div[data-baseweb="select"] div {
76
  color: #0f172a !important;
77
  }
78
  [data-testid="stSidebar"] div[data-baseweb="input"] input {
 
82
  color: #0f172a !important;
83
  }
84
 
85
+ /* Markdown Text in Sidebar */
86
+ [data-testid="stSidebar"] .stMarkdown p {
87
+ color: #cbd5e1 !important;
88
+ }
89
+
90
  /* Link Button Fix - CRITICAL OVERRIDE FOR DARK TEXT */
91
  [data-testid="stSidebar"] a {
92
  background-color: #3b82f6 !important; /* Brighter blue background */
 
108
  transform: translateY(-1px);
109
  }
110
 
111
+ /* Link Button Container Background (for st.link_button wrapper) */
112
  [data-testid="stSidebar"] button[kind="secondary"] {
113
  background-color: #f8fafc !important; /* Light bg for button */
114
  color: #0f172a !important; /* Dark text */
 
139
  """, unsafe_allow_html=True)
140
 
141
  # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
 
 
142
  @st.cache_data(show_spinner=False)
143
  def fetch_coordinates_batch(unique_locations):
144
  """
 
161
  d, s = k.split("|")
162
  coords_map[(d, s)] = tuple(v)
163
  except json.JSONDecodeError:
164
+ pass # File corrupted, start fresh
165
 
166
  # 2. Add Hardcoded Pre-fills (High Priority Redundancy)
167
  # These override if missing, but usually JSON is preferred source if present
 
188
  for k, v in prefills.items():
189
  if k not in coords_map:
190
  coords_map[k] = v
191
+
192
  # 3. Identify missing locations
193
  missing_locs = [loc for loc in unique_locations if loc not in coords_map]
194
+
195
  if not missing_locs:
196
  return coords_map
197
 
198
  # 4. Dynamic Fetching for missing
199
  progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
200
  my_bar = st.progress(0, text=progress_text)
201
+
202
+ headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
 
203
  updated = False
204
+
205
  for i, (district, state) in enumerate(missing_locs):
206
  try:
207
  # Update Progress
208
+ my_bar.progress((i + 1) / len(missing_locs), text=f"πŸ“ Locating: {district}, {state}")
209
+
 
210
  # API Call
211
  query = f"{district}, {state}, India"
212
  url = "https://nominatim.openstreetmap.org/search"
213
  params = {'q': query, 'format': 'json', 'limit': 1}
214
+
215
+ response = requests.get(url, params=params, headers=headers, timeout=5)
216
+
 
217
  if response.status_code == 200 and response.json():
218
  data = response.json()[0]
219
+ coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
 
220
  updated = True
221
  else:
222
+ pass # Fail silently, will fall back to state center logic later
223
+
224
  # Respect Rate Limiting (1 request per second)
225
+ time.sleep(1.1)
226
+
227
  except Exception as e:
228
  continue
229
+
230
  my_bar.empty()
231
+
232
  # 5. Save back to JSON if new data fetched
233
  if updated:
234
  # Convert keys to string "District|State" for JSON compatibility
235
  save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
236
  with open(json_file, 'w') as f:
237
  json.dump(save_data, f)
238
+
239
  return coords_map
240
 
241
  # 4. MAIN DATA LOADER
 
 
242
  @st.cache_data(ttl=300)
243
  def load_data():
244
  try:
245
  df = pd.read_csv('analyzed_aadhaar_data.csv')
246
  except FileNotFoundError:
247
+ return pd.DataFrame() # Return empty to trigger external error check
 
 
 
248
 
249
+ if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
250
+
251
  # Clean Data
252
  df['district'] = df['district'].astype(str).str.strip()
253
  df['state'] = df['state'].astype(str).str.strip()
254
+
255
  # --- FIX DUPLICATE STATES ---
256
  # Standardize State Names to remove variations (e.g., J&K)
257
  state_mapping = {
 
263
  'Pondicherry': 'Puducherry'
264
  }
265
  df['state'] = df['state'].replace(state_mapping)
266
+
267
  # Get Unique Locations
268
+ unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
269
+
 
270
  # Fetch Coordinates (Cached + Persistent JSON)
271
  coords_db = fetch_coordinates_batch(unique_locs)
272
+
273
  # Fallback Centers (State Capitals)
274
  state_centers = {
275
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
 
285
  'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
286
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
287
  }
288
+
289
  def get_lat_lon(row):
290
  key = (row['district'], row['state'])
291
+
292
  # 1. Check Exact Match from API/Cache
293
  if key in coords_db:
294
  lat, lon = coords_db[key]
295
  # Tiny jitter to separate stacked points
296
  return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
297
+
298
  # 2. Fallback to State Center
299
  center = state_centers.get(row['state'], (20.5937, 78.9629))
300
  np.random.seed(hash(key) % 2**32)
301
  return pd.Series({
302
+ 'lat': center[0] + np.random.uniform(-0.5, 0.5),
303
  'lon': center[1] + np.random.uniform(-0.5, 0.5)
304
  })
305
 
306
  coords = df.apply(get_lat_lon, axis=1)
307
  df['lat'] = coords['lat']
308
  df['lon'] = coords['lon']
309
+
310
+ df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
 
311
  return df
312
 
 
313
  with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
314
  df = load_data()
315
 
 
317
  with st.sidebar:
318
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
319
  st.markdown("---")
320
+
321
  if not df.empty:
322
  if 'date' in df.columns:
323
  min_d, max_d = df['date'].min().date(), df['date'].max().date()
324
+ dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
325
+ if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
326
+
 
 
 
327
  state_list = ['All'] + sorted(df['state'].unique().tolist())
328
  sel_state = st.selectbox("State", state_list)
329
+ filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
330
+
 
331
  dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
332
  sel_dist = st.selectbox("District", dist_list)
333
+ if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
334
+
 
335
  st.markdown("---")
336
+ risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
337
+ if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
 
 
 
338
  else:
339
  filtered_df = pd.DataFrame()
340
+
341
  st.markdown("---")
342
+ st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
343
+ st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
 
 
344
 
345
  # 6. HEADER & METRICS
346
  col1, col2 = st.columns([3, 1])
 
348
  st.title("πŸ›‘οΈ S.T.A.R.K AI Dashboard")
349
  st.markdown("**Context-Aware Fraud Detection & Prevention System**")
350
  with col2:
351
+ st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
 
352
 
353
  st.markdown("---")
354
 
355
  if not filtered_df.empty:
356
  m1, m2, m3, m4, m5 = st.columns(5)
357
+ total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
 
358
  m1.metric("Total Centers", f"{total:,}", border=True)
359
+ m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
360
+ m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
361
+ m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
362
+ m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
 
 
 
 
363
  else:
364
+ st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
 
365
 
366
  st.markdown("##")
367
 
368
  # 7. TABS
369
+ tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
 
370
 
371
  with tab_map:
372
  c_map, c_det = st.columns([3, 1])
373
  with c_map:
374
  if not filtered_df.empty:
375
  # Dynamic Zoom based on selection
376
+ if sel_dist != 'All': zoom_lvl = 10
377
+ elif sel_state != 'All': zoom_lvl = 6
378
+ else: zoom_lvl = 3.8
 
 
 
379
 
380
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
381
+ color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
382
+ center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
383
+ hover_name="district", hover_data={"state":True, "pincode":True, "lat":False, "lon":False},
384
+ mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
385
+
386
+ fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
 
387
  st.plotly_chart(fig, use_container_width=True)
388
+ else: st.info("Waiting for data...")
389
+
 
390
  with c_det:
391
  st.subheader("πŸ”₯ Top Hotspots")
392
  if not filtered_df.empty:
393
+ top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
 
394
  for i, (d, r) in enumerate(top.iterrows(), 1):
395
+ clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else ("#f97316", "HIGH")
396
+ st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
 
 
397
 
398
  with tab_list:
399
  st.subheader("🎯 Priority Investigation")
400
  if not filtered_df.empty:
401
+ targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
 
402
  csv = targets.to_csv(index=False).encode('utf-8')
403
+ st.download_button("πŸ“₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
404
+ st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
405
+ column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
406
+ else: st.info("Waiting for data...")
 
 
407
 
408
  with tab_charts:
409
  c1, c2 = st.columns(2)
 
411
  st.markdown("**Ghost ID Detection**")
412
  if not filtered_df.empty:
413
  fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
414
+ color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
415
  fig.add_hline(y=0.2, line_dash="dash", line_color="red")
416
  st.plotly_chart(fig, use_container_width=True)
417
  with c2:
418
  st.markdown("**Weekend Activity Analysis**")
419
  if not filtered_df.empty:
420
+ wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
421
+ wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
422
+ fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
 
 
 
423
  st.plotly_chart(fig, use_container_width=True)
424
 
425
  with tab_insights:
426
  st.subheader("πŸ” AI Detective Insights")
427
  if not filtered_df.empty:
428
  anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
429
+ st.info(f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
430
+
 
431
  c_i1, c_i2 = st.columns(2)
432
  with c_i1:
433
  st.markdown("#### 🚨 Primary Risk Factors")
434
+ st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
435
+ st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
 
 
436
  with c_i2:
437
  st.markdown("#### πŸ’‘ Recommended Actions")
438
+ st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
439
+ st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")
 
 
440
 
441
  st.markdown("---")
442
+ st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)