LovnishVerma commited on
Commit
a009ce9
Β·
verified Β·
1 Parent(s): 06659a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -223
app.py CHANGED
@@ -17,134 +17,117 @@ st.set_page_config(
17
  initial_sidebar_state="expanded"
18
  )
19
 
20
- # 2. ENHANCED PROFESSIONAL STYLING (Optimized for Visibility)
21
  st.markdown("""
22
  <style>
23
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
24
 
25
- /* Global Text Visibility Fix - Main Area */
26
- .stApp {
27
- background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
28
- color: #0f172a !important;
29
- font-family: 'Inter', sans-serif;
 
30
  }
31
 
32
- /* Default text color for Main Area (Dark) */
33
- p, span, div, label, h1, h2, h3, h4, h5, h6 {
34
- color: #0f172a;
35
  }
36
-
37
- /* METRIC CARDS */
38
  div[data-testid="stMetric"] {
39
  background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
40
- border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px;
41
- box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); transition: transform 0.2s;
42
- }
43
- div[data-testid="stMetric"]:hover { transform: translateY(-2px); box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1); }
44
- div[data-testid="stMetricValue"] { color: #0f172a !important; font-weight: 800 !important; font-size: 2rem !important; }
45
- div[data-testid="stMetricLabel"] { color: #334155 !important; font-weight: 600 !important; text-transform: uppercase; font-size: 0.75rem; letter-spacing: 0.05em; }
46
-
47
- /* DATAFRAME VISIBILITY FIX */
48
- div[data-testid="stDataFrame"] { border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
49
- div[data-testid="stDataFrame"] div[role="columnheader"] {
50
- background: linear-gradient(to bottom, #f8fafc, #f1f5f9) !important;
51
- color: #0f172a !important; font-weight: 700 !important; border-bottom: 2px solid #cbd5e1 !important;
52
  }
53
- div[data-testid="stDataFrame"] div[role="grid"] {
54
- color: #334155 !important;
 
 
 
 
 
55
  }
56
-
57
- /* --- SIDEBAR VISIBILITY FIXES (DARK MODE COMPATIBLE) --- */
58
-
59
- /* 1. Force Sidebar Background to Dark Blue */
60
- [data-testid="stSidebar"] {
61
- background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%);
62
- border-right: 1px solid #334155;
63
  }
64
-
65
- /* 2. FORCE ALL TEXT IN SIDEBAR TO BE WHITE/LIGHT */
66
- [data-testid="stSidebar"] h1, [data-testid="stSidebar"] h2, [data-testid="stSidebar"] h3,
67
- [data-testid="stSidebar"] p, [data-testid="stSidebar"] span, [data-testid="stSidebar"] label,
68
- [data-testid="stSidebar"] div {
69
- color: #f8fafc !important;
 
 
70
  }
71
 
72
- /* 3. EXCEPTION: Text INSIDE Input Boxes (Selectbox, DateInput) must be DARK */
73
- /* Because the input box background is usually white */
74
- [data-testid="stSidebar"] input {
75
  color: #0f172a !important;
76
  }
77
- [data-testid="stSidebar"] div[data-baseweb="select"] div {
78
- color: #0f172a !important;
79
- }
80
- /* Fix for Dropdown Options List */
81
- div[role="listbox"] li, div[role="listbox"] div {
82
  color: #0f172a !important;
83
  }
84
 
85
- /* Link Button Fix */
86
- [data-testid="stSidebar"] a {
87
- background-color: #3b82f6 !important;
88
- color: #ffffff !important;
89
- font-weight: 700 !important;
90
  text-decoration: none;
91
- border: 1px solid #2563eb !important;
92
- border-radius: 8px !important;
93
- padding: 10px 16px !important;
94
- display: inline-block !important;
95
- text-align: center !important;
96
- width: 100% !important;
97
- box-shadow: 0 2px 4px rgba(0,0,0,0.2) !important;
98
  }
99
- [data-testid="stSidebar"] a:hover {
100
- background-color: #2563eb !important;
101
- color: #ffffff !important;
102
- border-color: #1d4ed8 !important;
103
- transform: translateY(-1px);
 
 
 
 
104
  }
 
105
 
106
- /* HOTSPOTS */
107
- .hotspot-card { background: white; padding: 16px; border-radius: 10px; border-left: 5px solid; margin-bottom: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); transition: all 0.2s; }
108
- .hotspot-card:hover { transform: translateX(4px); box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
109
- .js-plotly-plot .plotly .main-svg { background-color: rgba(0,0,0,0) !important; }
110
-
111
- /* STATUS BADGES */
112
- .status-badge { display: inline-flex; align-items: center; padding: 6px 14px; border-radius: 9999px; font-size: 12px; font-weight: 700; text-transform: uppercase; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
113
- .bg-red { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); color: #991b1b !important; }
114
- .bg-green { background: linear-gradient(135deg, #dcfce7 0%, #bbf7d0 100%); color: #166534 !important; }
115
- .bg-amber { background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); color: #92400e !important; }
116
  </style>
117
  """, unsafe_allow_html=True)
118
 
119
  # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
120
-
121
-
122
  @st.cache_data(show_spinner=False)
123
  def fetch_coordinates_batch(unique_locations):
124
- """
125
- Fetches coordinates from OpenStreetMap Nominatim API.
126
- Uses 'district_coords.json' for persistent storage.
127
- unique_locations: List of tuples (District, State)
128
- Returns: Dictionary {(District, State): (lat, lon)}
129
- """
130
  json_file = 'district_coords.json'
131
  coords_map = {}
132
 
133
- # 1. Load from JSON if exists
134
  if os.path.exists(json_file):
135
  try:
136
  with open(json_file, 'r') as f:
137
- # Convert string keys "District|State" back to tuple
138
  loaded_data = json.load(f)
139
  for k, v in loaded_data.items():
140
  if "|" in k:
141
  d, s = k.split("|")
142
  coords_map[(d, s)] = tuple(v)
143
  except json.JSONDecodeError:
144
- pass # File corrupted, start fresh
145
 
146
- # 2. Add Hardcoded Pre-fills (High Priority Redundancy)
147
- # These override if missing, but usually JSON is preferred source if present
148
  prefills = {
149
  ('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
150
  ('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
@@ -169,53 +152,34 @@ def fetch_coordinates_batch(unique_locations):
169
  if k not in coords_map:
170
  coords_map[k] = v
171
 
172
- # 3. Identify missing locations
173
  missing_locs = [loc for loc in unique_locations if loc not in coords_map]
174
-
175
  if not missing_locs:
176
  return coords_map
177
 
178
- # 4. Dynamic Fetching for missing
179
  progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
180
  my_bar = st.progress(0, text=progress_text)
181
-
182
- headers = {
183
- 'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
184
  updated = False
185
 
186
  for i, (district, state) in enumerate(missing_locs):
187
  try:
188
- # Update Progress
189
- my_bar.progress((i + 1) / len(missing_locs),
190
- text=f"πŸ“ Locating: {district}, {state}")
191
-
192
- # API Call
193
  query = f"{district}, {state}, India"
194
  url = "https://nominatim.openstreetmap.org/search"
195
  params = {'q': query, 'format': 'json', 'limit': 1}
196
-
197
- response = requests.get(
198
- url, params=params, headers=headers, timeout=5)
199
 
200
  if response.status_code == 200 and response.json():
201
  data = response.json()[0]
202
- coords_map[(district, state)] = (
203
- float(data['lat']), float(data['lon']))
204
  updated = True
205
- else:
206
- pass # Fail silently, will fall back to state center logic later
207
-
208
- # Respect Rate Limiting (1 request per second)
209
  time.sleep(1.1)
210
-
211
- except Exception as e:
212
  continue
213
 
214
  my_bar.empty()
215
 
216
- # 5. Save back to JSON if new data fetched
217
  if updated:
218
- # Convert keys to string "District|State" for JSON compatibility
219
  save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
220
  with open(json_file, 'w') as f:
221
  json.dump(save_data, f)
@@ -223,24 +187,19 @@ def fetch_coordinates_batch(unique_locations):
223
  return coords_map
224
 
225
  # 4. MAIN DATA LOADER
226
-
227
-
228
  @st.cache_data(ttl=300)
229
  def load_data():
230
  try:
231
  df = pd.read_csv('analyzed_aadhaar_data.csv')
232
  except FileNotFoundError:
233
- return pd.DataFrame() # Return empty to trigger external error check
234
 
235
  if 'date' in df.columns:
236
  df['date'] = pd.to_datetime(df['date'])
237
 
238
- # Clean Data
239
  df['district'] = df['district'].astype(str).str.strip()
240
  df['state'] = df['state'].astype(str).str.strip()
241
 
242
- # --- FIX DUPLICATE STATES ---
243
- # Standardize State Names to remove variations (e.g., J&K)
244
  state_mapping = {
245
  'Jammu & Kashmir': 'Jammu and Kashmir',
246
  'J&K': 'Jammu and Kashmir',
@@ -258,56 +217,28 @@ def load_data():
258
  }
259
  df['state'] = df['state'].replace(state_mapping)
260
 
261
- # Get Unique Locations
262
- unique_locs = list(
263
- df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
264
-
265
- # Fetch Coordinates (Cached + Persistent JSON)
266
  coords_db = fetch_coordinates_batch(unique_locs)
267
-
268
- # Fallback Centers (State Capitals)
269
  state_centers = {
270
- 'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
271
- 'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
272
- 'Chandigarh': (30.7333, 76.7794), 'Chhattisgarh': (21.2787, 81.8661), 'Delhi': (28.7041, 77.1025),
273
- 'Goa': (15.2993, 74.1240), 'Gujarat': (22.2587, 71.1924), 'Haryana': (29.0588, 76.0856),
274
- 'Himachal Pradesh': (31.9579, 77.1095), 'Jammu and Kashmir': (33.7782, 76.5762), 'Jharkhand': (23.6102, 85.2799),
275
- 'Karnataka': (15.3173, 75.7139), 'Kerala': (10.8505, 76.2711), 'Ladakh': (34.1526, 77.5770),
276
- 'Madhya Pradesh': (22.9734, 78.6569), 'Maharashtra': (19.7515, 75.7139), 'Manipur': (24.6637, 93.9063),
277
- 'Meghalaya': (25.4670, 91.3662), 'Mizoram': (23.1645, 92.9376), 'Nagaland': (26.1584, 94.5624),
278
- 'Odisha': (20.9517, 85.0985), 'Puducherry': (11.9416, 79.8083), 'Punjab': (31.1471, 75.3412),
279
- 'Rajasthan': (27.0238, 74.2179), 'Sikkim': (27.5330, 88.5122), 'Tamil Nadu': (11.1271, 78.6569),
280
- 'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
281
- 'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
282
  }
283
 
284
  def get_lat_lon(row):
285
  key = (row['district'], row['state'])
286
-
287
- # 1. Check Exact Match from API/Cache
288
  if key in coords_db:
289
  lat, lon = coords_db[key]
290
- # Tiny jitter to separate stacked points
291
  return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
292
-
293
- # 2. Fallback to State Center
294
  center = state_centers.get(row['state'], (20.5937, 78.9629))
295
  np.random.seed(hash(key) % 2**32)
296
- return pd.Series({
297
- 'lat': center[0] + np.random.uniform(-0.5, 0.5),
298
- 'lon': center[1] + np.random.uniform(-0.5, 0.5)
299
- })
300
 
301
  coords = df.apply(get_lat_lon, axis=1)
302
  df['lat'] = coords['lat']
303
  df['lon'] = coords['lon']
304
-
305
- df['risk_category'] = pd.cut(
306
- df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
307
  return df
308
 
309
-
310
- with st.spinner('Initializing S.A.T.A.R.K AI & Geocoding...'):
311
  df = load_data()
312
 
313
  # 5. SIDEBAR & FILTERS
@@ -318,16 +249,13 @@ with st.sidebar:
318
  if not df.empty:
319
  if 'date' in df.columns:
320
  min_d, max_d = df['date'].min().date(), df['date'].max().date()
321
- dr = st.date_input("Date Range", value=(
322
- min_d, max_d), min_value=min_d, max_value=max_d)
323
  if len(dr) == 2:
324
- df = df[(df['date'].dt.date >= dr[0]) &
325
- (df['date'].dt.date <= dr[1])]
326
 
327
  state_list = ['All'] + sorted(df['state'].unique().tolist())
328
  sel_state = st.selectbox("State", state_list)
329
- filtered_df = df[df['state'] ==
330
- sel_state] if sel_state != 'All' else df.copy()
331
 
332
  dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
333
  sel_dist = st.selectbox("District", dist_list)
@@ -335,19 +263,15 @@ with st.sidebar:
335
  filtered_df = filtered_df[filtered_df['district'] == sel_dist]
336
 
337
  st.markdown("---")
338
- risk_filter = st.multiselect(
339
- "Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
340
  if risk_filter:
341
- filtered_df = filtered_df[filtered_df['risk_category'].isin(
342
- risk_filter)]
343
  else:
344
  filtered_df = pd.DataFrame()
345
 
346
  st.markdown("---")
347
- st.link_button("πŸ““ Open Analysis Notebook",
348
- "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
349
- st.info(
350
- f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
351
 
352
  # 6. HEADER & METRICS
353
  col1, col2 = st.columns([3, 1])
@@ -355,81 +279,57 @@ with col1:
355
  st.title("πŸ›‘οΈ S.A.T.A.R.K AI Dashboard")
356
  st.markdown("**Context-Aware Fraud Detection & Prevention System**")
357
  with col2:
358
- st.markdown(
359
- f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
360
 
361
  st.markdown("---")
362
 
363
  if not filtered_df.empty:
364
  m1, m2, m3, m4, m5 = st.columns(5)
365
- total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(
366
- filtered_df[filtered_df['RISK_SCORE'] > 85])
 
 
367
  m1.metric("Total Centers", f"{total:,}", border=True)
368
- m2.metric("High Risk", f"{high}", delta="Review",
369
- delta_color="inverse", border=True)
370
- m3.metric("Critical", f"{crit}", delta="Urgent",
371
- delta_color="inverse", border=True)
372
- m4.metric(
373
- "Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
374
- m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}",
375
- delta="Suspicious", delta_color="off", border=True)
376
  else:
377
- st.error(
378
- "❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
379
 
380
  st.markdown("##")
381
 
382
  # 7. TABS
383
- tab_map, tab_list, tab_charts, tab_insights = st.tabs(
384
- ["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
385
 
386
  with tab_map:
387
  c_map, c_det = st.columns([3, 1])
388
  with c_map:
389
  if not filtered_df.empty:
390
- # Dynamic Zoom based on selection
391
- if sel_dist != 'All':
392
- zoom_lvl = 10
393
- elif sel_state != 'All':
394
- zoom_lvl = 6
395
- else:
396
- zoom_lvl = 3.8
397
-
398
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
399
  color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
400
- center=None if sel_state == 'All' else {
401
- "lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
402
- hover_name="district", hover_data={"state": True, "pincode": True, "lat": False, "lon": False},
403
  mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
404
-
405
  fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
406
  st.plotly_chart(fig, use_container_width=True)
407
- else:
408
- st.info("Waiting for data...")
409
-
410
  with c_det:
411
  st.subheader("πŸ”₯ Top Hotspots")
412
  if not filtered_df.empty:
413
- top = filtered_df.groupby('district').agg(
414
- {'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
415
  for i, (d, r) in enumerate(top.iterrows(), 1):
416
- clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else (
417
- "#f97316", "HIGH")
418
- st.markdown(
419
- f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
420
 
421
  with tab_list:
422
  st.subheader("🎯 Priority Investigation")
423
  if not filtered_df.empty:
424
- targets = filtered_df[filtered_df['RISK_SCORE'] >
425
- 75].sort_values('RISK_SCORE', ascending=False)
426
  csv = targets.to_csv(index=False).encode('utf-8')
427
- st.download_button("πŸ“₯ Export CSV", data=csv,
428
- file_name="stark_priority.csv", mime="text/csv", type="primary")
429
  st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
430
  column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
431
- else:
432
- st.info("Waiting for data...")
433
 
434
  with tab_charts:
435
  c1, c2 = st.columns(2)
@@ -443,34 +343,25 @@ with tab_charts:
443
  with c2:
444
  st.markdown("**Weekend Activity Analysis**")
445
  if not filtered_df.empty:
446
- wk_counts = filtered_df.groupby(
447
- 'is_weekend')['total_activity'].sum().reset_index()
448
- wk_counts['Type'] = wk_counts['is_weekend'].map(
449
- {0: 'Weekday', 1: 'Weekend'})
450
- fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={
451
- 'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
452
  st.plotly_chart(fig, use_container_width=True)
453
 
454
  with tab_insights:
455
  st.subheader("πŸ” AI Detective Insights")
456
  if not filtered_df.empty:
457
  anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
458
- st.info(
459
- f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
460
-
461
  c_i1, c_i2 = st.columns(2)
462
  with c_i1:
463
  st.markdown("#### 🚨 Primary Risk Factors")
464
- st.markdown(
465
- "- **High Volume on Weekends:** 28% correlation with fraud")
466
- st.markdown(
467
- "- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
468
  with c_i2:
469
  st.markdown("#### πŸ’‘ Recommended Actions")
470
- st.markdown(
471
- f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
472
- st.markdown(
473
- "2. Deploy biometric re-verification for 'Rural A' cluster")
474
 
475
  st.markdown("---")
476
  st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.A.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)
 
17
  initial_sidebar_state="expanded"
18
  )
19
 
20
+ # 2. ROBUST CSS STYLING (Dark Mode Proof)
21
  st.markdown("""
22
  <style>
23
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
24
 
25
+ /* --- 1. MAIN CONTENT AREA (Light Theme Enforced) --- */
26
+ /* Target only the main content, NOT the sidebar */
27
+ .stApp > header { background-color: transparent !important; }
28
+
29
+ div[data-testid="stAppViewContainer"] {
30
+ background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
31
  }
32
 
33
+ /* Force Dark Text in Main Area */
34
+ section[data-testid="stMain"] * {
35
+ color: #0f172a; /* Dark Blue Text */
36
  }
37
+
38
+ /* Metric Cards in Main Area */
39
  div[data-testid="stMetric"] {
40
  background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
41
+ border: 1px solid #e2e8f0;
42
+ border-radius: 12px;
43
+ box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
 
 
44
  }
45
+ div[data-testid="stMetricValue"] { color: #0f172a !important; }
46
+ div[data-testid="stMetricLabel"] { color: #475569 !important; }
47
+
48
+ /* --- 2. SIDEBAR (Dark Theme Enforced) --- */
49
+ section[data-testid="stSidebar"] {
50
+ background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%);
51
+ border-right: 1px solid #334155;
52
  }
53
+
54
+ /* NUCLEAR OPTION: Force ALL text in sidebar to be White */
55
+ section[data-testid="stSidebar"] * {
56
+ color: #f8fafc !important; /* White Text */
 
 
 
57
  }
58
+
59
+ /* EXCEPTION: Inputs inside Sidebar (Selectbox, DateInput) */
60
+ /* These usually have white backgrounds, so we need Dark Text inside them */
61
+ section[data-testid="stSidebar"] input,
62
+ section[data-testid="stSidebar"] textarea,
63
+ section[data-testid="stSidebar"] div[data-baseweb="select"] div {
64
+ color: #0f172a !important; /* Dark Text for Inputs */
65
+ -webkit-text-fill-color: #0f172a !important;
66
  }
67
 
68
+ /* Specific fix for the 'Selected Option' in dropdowns */
69
+ div[role="listbox"] div {
 
70
  color: #0f172a !important;
71
  }
72
+
73
+ /* --- 3. COMMON ELEMENTS --- */
74
+ /* DataFrame Headers */
75
+ div[data-testid="stDataFrame"] div[role="columnheader"] {
76
+ background-color: #f1f5f9;
77
  color: #0f172a !important;
78
  }
79
 
80
+ /* Link Button Style */
81
+ section[data-testid="stSidebar"] a {
82
+ background-color: #3b82f6 !important;
83
+ color: white !important;
 
84
  text-decoration: none;
85
+ padding: 8px 16px;
86
+ border-radius: 8px;
87
+ display: block;
88
+ text-align: center;
89
+ border: 1px solid #2563eb;
 
 
90
  }
91
+
92
+ /* Hotspot Cards */
93
+ .hotspot-card {
94
+ background: white;
95
+ padding: 16px;
96
+ border-radius: 10px;
97
+ border-left: 5px solid;
98
+ margin-bottom: 12px;
99
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
100
  }
101
+ /* Since Hotspot Cards are in Main Area, text inherits Dark, which is good. */
102
 
103
+ /* Status Badges */
104
+ .status-badge {
105
+ display: inline-flex; align-items: center;
106
+ padding: 6px 14px; border-radius: 9999px;
107
+ font-size: 12px; font-weight: 700;
108
+ text-transform: uppercase;
109
+ }
110
+ .bg-green { background: #dcfce7; color: #166534 !important; }
 
 
111
  </style>
112
  """, unsafe_allow_html=True)
113
 
114
  # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
 
 
115
  @st.cache_data(show_spinner=False)
116
  def fetch_coordinates_batch(unique_locations):
 
 
 
 
 
 
117
  json_file = 'district_coords.json'
118
  coords_map = {}
119
 
 
120
  if os.path.exists(json_file):
121
  try:
122
  with open(json_file, 'r') as f:
 
123
  loaded_data = json.load(f)
124
  for k, v in loaded_data.items():
125
  if "|" in k:
126
  d, s = k.split("|")
127
  coords_map[(d, s)] = tuple(v)
128
  except json.JSONDecodeError:
129
+ pass
130
 
 
 
131
  prefills = {
132
  ('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
133
  ('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
 
152
  if k not in coords_map:
153
  coords_map[k] = v
154
 
 
155
  missing_locs = [loc for loc in unique_locations if loc not in coords_map]
 
156
  if not missing_locs:
157
  return coords_map
158
 
 
159
  progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
160
  my_bar = st.progress(0, text=progress_text)
161
+ headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
 
 
162
  updated = False
163
 
164
  for i, (district, state) in enumerate(missing_locs):
165
  try:
166
+ my_bar.progress((i + 1) / len(missing_locs), text=f"πŸ“ Locating: {district}, {state}")
 
 
 
 
167
  query = f"{district}, {state}, India"
168
  url = "https://nominatim.openstreetmap.org/search"
169
  params = {'q': query, 'format': 'json', 'limit': 1}
170
+ response = requests.get(url, params=params, headers=headers, timeout=5)
 
 
171
 
172
  if response.status_code == 200 and response.json():
173
  data = response.json()[0]
174
+ coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
 
175
  updated = True
 
 
 
 
176
  time.sleep(1.1)
177
+ except Exception:
 
178
  continue
179
 
180
  my_bar.empty()
181
 
 
182
  if updated:
 
183
  save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
184
  with open(json_file, 'w') as f:
185
  json.dump(save_data, f)
 
187
  return coords_map
188
 
189
  # 4. MAIN DATA LOADER
 
 
190
  @st.cache_data(ttl=300)
191
  def load_data():
192
  try:
193
  df = pd.read_csv('analyzed_aadhaar_data.csv')
194
  except FileNotFoundError:
195
+ return pd.DataFrame()
196
 
197
  if 'date' in df.columns:
198
  df['date'] = pd.to_datetime(df['date'])
199
 
 
200
  df['district'] = df['district'].astype(str).str.strip()
201
  df['state'] = df['state'].astype(str).str.strip()
202
 
 
 
203
  state_mapping = {
204
  'Jammu & Kashmir': 'Jammu and Kashmir',
205
  'J&K': 'Jammu and Kashmir',
 
217
  }
218
  df['state'] = df['state'].replace(state_mapping)
219
 
220
+ unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
 
 
 
 
221
  coords_db = fetch_coordinates_batch(unique_locs)
 
 
222
  state_centers = {
223
+ 'Delhi': (28.7041, 77.1025), 'Maharashtra': (19.7515, 75.7139), 'Karnataka': (15.3173, 75.7139)
 
 
 
 
 
 
 
 
 
 
 
224
  }
225
 
226
  def get_lat_lon(row):
227
  key = (row['district'], row['state'])
 
 
228
  if key in coords_db:
229
  lat, lon = coords_db[key]
 
230
  return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
 
 
231
  center = state_centers.get(row['state'], (20.5937, 78.9629))
232
  np.random.seed(hash(key) % 2**32)
233
+ return pd.Series({'lat': center[0] + np.random.uniform(-0.5, 0.5), 'lon': center[1] + np.random.uniform(-0.5, 0.5)})
 
 
 
234
 
235
  coords = df.apply(get_lat_lon, axis=1)
236
  df['lat'] = coords['lat']
237
  df['lon'] = coords['lon']
238
+ df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
 
 
239
  return df
240
 
241
+ with st.spinner('Initializing S.A.T.A.R.K AI...'):
 
242
  df = load_data()
243
 
244
  # 5. SIDEBAR & FILTERS
 
249
  if not df.empty:
250
  if 'date' in df.columns:
251
  min_d, max_d = df['date'].min().date(), df['date'].max().date()
252
+ dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
 
253
  if len(dr) == 2:
254
+ df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
 
255
 
256
  state_list = ['All'] + sorted(df['state'].unique().tolist())
257
  sel_state = st.selectbox("State", state_list)
258
+ filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
 
259
 
260
  dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
261
  sel_dist = st.selectbox("District", dist_list)
 
263
  filtered_df = filtered_df[filtered_df['district'] == sel_dist]
264
 
265
  st.markdown("---")
266
+ risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
 
267
  if risk_filter:
268
+ filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
 
269
  else:
270
  filtered_df = pd.DataFrame()
271
 
272
  st.markdown("---")
273
+ st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
274
+ st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
 
 
275
 
276
  # 6. HEADER & METRICS
277
  col1, col2 = st.columns([3, 1])
 
279
  st.title("πŸ›‘οΈ S.A.T.A.R.K AI Dashboard")
280
  st.markdown("**Context-Aware Fraud Detection & Prevention System**")
281
  with col2:
282
+ st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
 
283
 
284
  st.markdown("---")
285
 
286
  if not filtered_df.empty:
287
  m1, m2, m3, m4, m5 = st.columns(5)
288
+ total = len(filtered_df)
289
+ high = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
290
+ crit = len(filtered_df[filtered_df['RISK_SCORE'] > 85])
291
+
292
  m1.metric("Total Centers", f"{total:,}", border=True)
293
+ m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
294
+ m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
295
+ m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100", border=True)
296
+ m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
 
 
 
 
297
  else:
298
+ st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
 
299
 
300
  st.markdown("##")
301
 
302
  # 7. TABS
303
+ tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
 
304
 
305
  with tab_map:
306
  c_map, c_det = st.columns([3, 1])
307
  with c_map:
308
  if not filtered_df.empty:
309
+ zoom_lvl = 10 if sel_dist != 'All' else (6 if sel_state != 'All' else 3.8)
 
 
 
 
 
 
 
310
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
311
  color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
312
+ center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
313
+ hover_name="district", hover_data={"state": True, "pincode": True},
 
314
  mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
 
315
  fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
316
  st.plotly_chart(fig, use_container_width=True)
 
 
 
317
  with c_det:
318
  st.subheader("πŸ”₯ Top Hotspots")
319
  if not filtered_df.empty:
320
+ top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
 
321
  for i, (d, r) in enumerate(top.iterrows(), 1):
322
+ clr = "#ef4444" if r['RISK_SCORE'] > 85 else "#f97316"
323
+ st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
 
 
324
 
325
  with tab_list:
326
  st.subheader("🎯 Priority Investigation")
327
  if not filtered_df.empty:
328
+ targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
 
329
  csv = targets.to_csv(index=False).encode('utf-8')
330
+ st.download_button("πŸ“₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
 
331
  st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
332
  column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
 
 
333
 
334
  with tab_charts:
335
  c1, c2 = st.columns(2)
 
343
  with c2:
344
  st.markdown("**Weekend Activity Analysis**")
345
  if not filtered_df.empty:
346
+ wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
347
+ wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
348
+ fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
 
 
 
349
  st.plotly_chart(fig, use_container_width=True)
350
 
351
  with tab_insights:
352
  st.subheader("πŸ” AI Detective Insights")
353
  if not filtered_df.empty:
354
  anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
355
+ st.info(f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
 
 
356
  c_i1, c_i2 = st.columns(2)
357
  with c_i1:
358
  st.markdown("#### 🚨 Primary Risk Factors")
359
+ st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
360
+ st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
 
 
361
  with c_i2:
362
  st.markdown("#### πŸ’‘ Recommended Actions")
363
+ st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
364
+ st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")
 
 
365
 
366
  st.markdown("---")
367
  st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.A.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)