LovnishVerma commited on
Commit
07daf60
Β·
verified Β·
1 Parent(s): 837e300

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -83
app.py CHANGED
@@ -102,6 +102,8 @@ st.markdown("""
102
  """, unsafe_allow_html=True)
103
 
104
  # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
 
 
105
  @st.cache_data(show_spinner=False)
106
  def fetch_coordinates_batch(unique_locations):
107
  """
@@ -124,7 +126,7 @@ def fetch_coordinates_batch(unique_locations):
124
  d, s = k.split("|")
125
  coords_map[(d, s)] = tuple(v)
126
  except json.JSONDecodeError:
127
- pass # File corrupted, start fresh
128
 
129
  # 2. Add Hardcoded Pre-fills (High Priority Redundancy)
130
  prefills = {
@@ -150,76 +152,84 @@ def fetch_coordinates_batch(unique_locations):
150
  for k, v in prefills.items():
151
  if k not in coords_map:
152
  coords_map[k] = v
153
-
154
  # 3. Identify missing locations
155
  missing_locs = [loc for loc in unique_locations if loc not in coords_map]
156
-
157
  if not missing_locs:
158
  return coords_map
159
 
160
  # 4. Dynamic Fetching for missing
161
  progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
162
  my_bar = st.progress(0, text=progress_text)
163
-
164
- headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
 
165
  updated = False
166
-
167
  for i, (district, state) in enumerate(missing_locs):
168
  try:
169
  # Update Progress
170
- my_bar.progress((i + 1) / len(missing_locs), text=f"πŸ“ Locating: {district}, {state}")
171
-
 
172
  # API Call
173
  query = f"{district}, {state}, India"
174
  url = "https://nominatim.openstreetmap.org/search"
175
  params = {'q': query, 'format': 'json', 'limit': 1}
176
-
177
- response = requests.get(url, params=params, headers=headers, timeout=5)
178
-
 
179
  if response.status_code == 200 and response.json():
180
  data = response.json()[0]
181
- coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
 
182
  updated = True
183
  else:
184
- pass # Fail silently, will fall back to state center logic later
185
-
186
  # Respect Rate Limiting (1 request per second)
187
- time.sleep(1.1)
188
-
189
  except Exception as e:
190
  continue
191
-
192
  my_bar.empty()
193
-
194
  # 5. Save back to JSON if new data fetched
195
  if updated:
196
  # Convert keys to string "District|State" for JSON compatibility
197
  save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
198
  with open(json_file, 'w') as f:
199
  json.dump(save_data, f)
200
-
201
  return coords_map
202
 
203
  # 4. MAIN DATA LOADER
 
 
204
  @st.cache_data(ttl=300)
205
  def load_data():
206
  try:
207
  df = pd.read_csv('analyzed_aadhaar_data.csv')
208
  except FileNotFoundError:
209
- return pd.DataFrame() # Return empty to trigger external error check
 
 
 
210
 
211
- if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
212
-
213
  # Clean Data
214
  df['district'] = df['district'].astype(str).str.strip()
215
  df['state'] = df['state'].astype(str).str.strip()
216
-
217
  # Get Unique Locations
218
- unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
219
-
 
220
  # Fetch Coordinates (Cached + Persistent JSON)
221
  coords_db = fetch_coordinates_batch(unique_locs)
222
-
223
  # Fallback Centers (State Capitals)
224
  state_centers = {
225
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
@@ -235,31 +245,33 @@ def load_data():
235
  'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
236
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
237
  }
238
-
239
  def get_lat_lon(row):
240
  key = (row['district'], row['state'])
241
-
242
  # 1. Check Exact Match from API/Cache
243
  if key in coords_db:
244
  lat, lon = coords_db[key]
245
  # Tiny jitter to separate stacked points
246
  return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
247
-
248
  # 2. Fallback to State Center
249
  center = state_centers.get(row['state'], (20.5937, 78.9629))
250
  np.random.seed(hash(key) % 2**32)
251
  return pd.Series({
252
- 'lat': center[0] + np.random.uniform(-0.5, 0.5),
253
  'lon': center[1] + np.random.uniform(-0.5, 0.5)
254
  })
255
 
256
  coords = df.apply(get_lat_lon, axis=1)
257
  df['lat'] = coords['lat']
258
  df['lon'] = coords['lon']
259
-
260
- df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
 
261
  return df
262
 
 
263
  with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
264
  df = load_data()
265
 
@@ -267,30 +279,40 @@ with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
267
  with st.sidebar:
268
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
269
  st.markdown("---")
270
-
271
  if not df.empty:
272
  if 'date' in df.columns:
273
  min_d, max_d = df['date'].min().date(), df['date'].max().date()
274
- dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
275
- if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
276
-
 
 
 
277
  state_list = ['All'] + sorted(df['state'].unique().tolist())
278
  sel_state = st.selectbox("State", state_list)
279
- filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
280
-
 
281
  dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
282
  sel_dist = st.selectbox("District", dist_list)
283
- if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
284
-
 
285
  st.markdown("---")
286
- risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
287
- if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
 
 
 
288
  else:
289
  filtered_df = pd.DataFrame()
290
-
291
  st.markdown("---")
292
- st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
293
- st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
 
 
294
 
295
  # 6. HEADER & METRICS
296
  col1, col2 = st.columns([3, 1])
@@ -298,62 +320,81 @@ with col1:
298
  st.title("πŸ›‘οΈ S.T.A.R.K AI Dashboard")
299
  st.markdown("**Context-Aware Fraud Detection & Prevention System**")
300
  with col2:
301
- st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
 
302
 
303
  st.markdown("---")
304
 
305
  if not filtered_df.empty:
306
  m1, m2, m3, m4, m5 = st.columns(5)
307
- total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
 
308
  m1.metric("Total Centers", f"{total:,}", border=True)
309
- m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
310
- m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
311
- m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
312
- m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
 
 
 
 
313
  else:
314
- st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
 
315
 
316
  st.markdown("##")
317
 
318
  # 7. TABS
319
- tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
 
320
 
321
  with tab_map:
322
  c_map, c_det = st.columns([3, 1])
323
  with c_map:
324
  if not filtered_df.empty:
325
  # Dynamic Zoom based on selection
326
- if sel_dist != 'All': zoom_lvl = 10
327
- elif sel_state != 'All': zoom_lvl = 6
328
- else: zoom_lvl = 3.8
 
 
 
329
 
330
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
331
- color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
332
- center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
333
- hover_name="district", hover_data={"state":True, "pincode":True, "lat":False, "lon":False},
334
- mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
335
-
336
- fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
 
337
  st.plotly_chart(fig, use_container_width=True)
338
- else: st.info("Waiting for data...")
339
-
 
340
  with c_det:
341
  st.subheader("πŸ”₯ Top Hotspots")
342
  if not filtered_df.empty:
343
- top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
 
344
  for i, (d, r) in enumerate(top.iterrows(), 1):
345
- clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else ("#f97316", "HIGH")
346
- st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
 
 
347
 
348
  with tab_list:
349
  st.subheader("🎯 Priority Investigation")
350
  if not filtered_df.empty:
351
- targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
 
352
  csv = targets.to_csv(index=False).encode('utf-8')
353
- st.download_button("πŸ“₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
354
- st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
355
- column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
356
- else: st.info("Waiting for data...")
 
 
357
 
358
  with tab_charts:
359
  c1, c2 = st.columns(2)
@@ -361,32 +402,40 @@ with tab_charts:
361
  st.markdown("**Ghost ID Detection**")
362
  if not filtered_df.empty:
363
  fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
364
- color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
365
  fig.add_hline(y=0.2, line_dash="dash", line_color="red")
366
  st.plotly_chart(fig, use_container_width=True)
367
  with c2:
368
  st.markdown("**Weekend Activity Analysis**")
369
  if not filtered_df.empty:
370
- wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
371
- wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
372
- fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
 
 
 
373
  st.plotly_chart(fig, use_container_width=True)
374
 
375
  with tab_insights:
376
  st.subheader("πŸ” AI Detective Insights")
377
  if not filtered_df.empty:
378
  anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
379
- st.info(f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
380
-
 
381
  c_i1, c_i2 = st.columns(2)
382
  with c_i1:
383
  st.markdown("#### 🚨 Primary Risk Factors")
384
- st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
385
- st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
 
 
386
  with c_i2:
387
  st.markdown("#### πŸ’‘ Recommended Actions")
388
- st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
389
- st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")
 
 
390
 
391
  st.markdown("---")
392
- st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)
 
102
  """, unsafe_allow_html=True)
103
 
104
  # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
105
+
106
+
107
  @st.cache_data(show_spinner=False)
108
  def fetch_coordinates_batch(unique_locations):
109
  """
 
126
  d, s = k.split("|")
127
  coords_map[(d, s)] = tuple(v)
128
  except json.JSONDecodeError:
129
+ pass # File corrupted, start fresh
130
 
131
  # 2. Add Hardcoded Pre-fills (High Priority Redundancy)
132
  prefills = {
 
152
  for k, v in prefills.items():
153
  if k not in coords_map:
154
  coords_map[k] = v
155
+
156
  # 3. Identify missing locations
157
  missing_locs = [loc for loc in unique_locations if loc not in coords_map]
158
+
159
  if not missing_locs:
160
  return coords_map
161
 
162
  # 4. Dynamic Fetching for missing
163
  progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
164
  my_bar = st.progress(0, text=progress_text)
165
+
166
+ headers = {
167
+ 'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
168
  updated = False
169
+
170
  for i, (district, state) in enumerate(missing_locs):
171
  try:
172
  # Update Progress
173
+ my_bar.progress((i + 1) / len(missing_locs),
174
+ text=f"πŸ“ Locating: {district}, {state}")
175
+
176
  # API Call
177
  query = f"{district}, {state}, India"
178
  url = "https://nominatim.openstreetmap.org/search"
179
  params = {'q': query, 'format': 'json', 'limit': 1}
180
+
181
+ response = requests.get(
182
+ url, params=params, headers=headers, timeout=5)
183
+
184
  if response.status_code == 200 and response.json():
185
  data = response.json()[0]
186
+ coords_map[(district, state)] = (
187
+ float(data['lat']), float(data['lon']))
188
  updated = True
189
  else:
190
+ pass # Fail silently, will fall back to state center logic later
191
+
192
  # Respect Rate Limiting (1 request per second)
193
+ time.sleep(1.1)
194
+
195
  except Exception as e:
196
  continue
197
+
198
  my_bar.empty()
199
+
200
  # 5. Save back to JSON if new data fetched
201
  if updated:
202
  # Convert keys to string "District|State" for JSON compatibility
203
  save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
204
  with open(json_file, 'w') as f:
205
  json.dump(save_data, f)
206
+
207
  return coords_map
208
 
209
  # 4. MAIN DATA LOADER
210
+
211
+
212
  @st.cache_data(ttl=300)
213
  def load_data():
214
  try:
215
  df = pd.read_csv('analyzed_aadhaar_data.csv')
216
  except FileNotFoundError:
217
+ return pd.DataFrame() # Return empty to trigger external error check
218
+
219
+ if 'date' in df.columns:
220
+ df['date'] = pd.to_datetime(df['date'])
221
 
 
 
222
  # Clean Data
223
  df['district'] = df['district'].astype(str).str.strip()
224
  df['state'] = df['state'].astype(str).str.strip()
225
+
226
  # Get Unique Locations
227
+ unique_locs = list(
228
+ df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
229
+
230
  # Fetch Coordinates (Cached + Persistent JSON)
231
  coords_db = fetch_coordinates_batch(unique_locs)
232
+
233
  # Fallback Centers (State Capitals)
234
  state_centers = {
235
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
 
245
  'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
246
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
247
  }
248
+
249
  def get_lat_lon(row):
250
  key = (row['district'], row['state'])
251
+
252
  # 1. Check Exact Match from API/Cache
253
  if key in coords_db:
254
  lat, lon = coords_db[key]
255
  # Tiny jitter to separate stacked points
256
  return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
257
+
258
  # 2. Fallback to State Center
259
  center = state_centers.get(row['state'], (20.5937, 78.9629))
260
  np.random.seed(hash(key) % 2**32)
261
  return pd.Series({
262
+ 'lat': center[0] + np.random.uniform(-0.5, 0.5),
263
  'lon': center[1] + np.random.uniform(-0.5, 0.5)
264
  })
265
 
266
  coords = df.apply(get_lat_lon, axis=1)
267
  df['lat'] = coords['lat']
268
  df['lon'] = coords['lon']
269
+
270
+ df['risk_category'] = pd.cut(
271
+ df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
272
  return df
273
 
274
+
275
  with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
276
  df = load_data()
277
 
 
279
  with st.sidebar:
280
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
281
  st.markdown("---")
282
+
283
  if not df.empty:
284
  if 'date' in df.columns:
285
  min_d, max_d = df['date'].min().date(), df['date'].max().date()
286
+ dr = st.date_input("Date Range", value=(
287
+ min_d, max_d), min_value=min_d, max_value=max_d)
288
+ if len(dr) == 2:
289
+ df = df[(df['date'].dt.date >= dr[0]) &
290
+ (df['date'].dt.date <= dr[1])]
291
+
292
  state_list = ['All'] + sorted(df['state'].unique().tolist())
293
  sel_state = st.selectbox("State", state_list)
294
+ filtered_df = df[df['state'] ==
295
+ sel_state] if sel_state != 'All' else df.copy()
296
+
297
  dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
298
  sel_dist = st.selectbox("District", dist_list)
299
+ if sel_dist != 'All':
300
+ filtered_df = filtered_df[filtered_df['district'] == sel_dist]
301
+
302
  st.markdown("---")
303
+ risk_filter = st.multiselect(
304
+ "Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
305
+ if risk_filter:
306
+ filtered_df = filtered_df[filtered_df['risk_category'].isin(
307
+ risk_filter)]
308
  else:
309
  filtered_df = pd.DataFrame()
310
+
311
  st.markdown("---")
312
+ st.link_button("πŸ““ Open Analysis Notebook",
313
+ "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
314
+ st.info(
315
+ f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
316
 
317
  # 6. HEADER & METRICS
318
  col1, col2 = st.columns([3, 1])
 
320
  st.title("πŸ›‘οΈ S.T.A.R.K AI Dashboard")
321
  st.markdown("**Context-Aware Fraud Detection & Prevention System**")
322
  with col2:
323
+ st.markdown(
324
+ f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
325
 
326
  st.markdown("---")
327
 
328
  if not filtered_df.empty:
329
  m1, m2, m3, m4, m5 = st.columns(5)
330
+ total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(
331
+ filtered_df[filtered_df['RISK_SCORE'] > 85])
332
  m1.metric("Total Centers", f"{total:,}", border=True)
333
+ m2.metric("High Risk", f"{high}", delta="Review",
334
+ delta_color="inverse", border=True)
335
+ m3.metric("Critical", f"{crit}", delta="Urgent",
336
+ delta_color="inverse", border=True)
337
+ m4.metric(
338
+ "Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
339
+ m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}",
340
+ delta="Suspicious", delta_color="off", border=True)
341
  else:
342
+ st.error(
343
+ "❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
344
 
345
  st.markdown("##")
346
 
347
  # 7. TABS
348
+ tab_map, tab_list, tab_charts, tab_insights = st.tabs(
349
+ ["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
350
 
351
  with tab_map:
352
  c_map, c_det = st.columns([3, 1])
353
  with c_map:
354
  if not filtered_df.empty:
355
  # Dynamic Zoom based on selection
356
+ if sel_dist != 'All':
357
+ zoom_lvl = 10
358
+ elif sel_state != 'All':
359
+ zoom_lvl = 6
360
+ else:
361
+ zoom_lvl = 3.8
362
 
363
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
364
+ color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
365
+ center=None if sel_state == 'All' else {
366
+ "lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
367
+ hover_name="district", hover_data={"state": True, "pincode": True, "lat": False, "lon": False},
368
+ mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
369
+
370
+ fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
371
  st.plotly_chart(fig, use_container_width=True)
372
+ else:
373
+ st.info("Waiting for data...")
374
+
375
  with c_det:
376
  st.subheader("πŸ”₯ Top Hotspots")
377
  if not filtered_df.empty:
378
+ top = filtered_df.groupby('district').agg(
379
+ {'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
380
  for i, (d, r) in enumerate(top.iterrows(), 1):
381
+ clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else (
382
+ "#f97316", "HIGH")
383
+ st.markdown(
384
+ f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
385
 
386
  with tab_list:
387
  st.subheader("🎯 Priority Investigation")
388
  if not filtered_df.empty:
389
+ targets = filtered_df[filtered_df['RISK_SCORE'] >
390
+ 75].sort_values('RISK_SCORE', ascending=False)
391
  csv = targets.to_csv(index=False).encode('utf-8')
392
+ st.download_button("πŸ“₯ Export CSV", data=csv,
393
+ file_name="stark_priority.csv", mime="text/csv", type="primary")
394
+ st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
395
+ column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
396
+ else:
397
+ st.info("Waiting for data...")
398
 
399
  with tab_charts:
400
  c1, c2 = st.columns(2)
 
402
  st.markdown("**Ghost ID Detection**")
403
  if not filtered_df.empty:
404
  fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
405
+ color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
406
  fig.add_hline(y=0.2, line_dash="dash", line_color="red")
407
  st.plotly_chart(fig, use_container_width=True)
408
  with c2:
409
  st.markdown("**Weekend Activity Analysis**")
410
  if not filtered_df.empty:
411
+ wk_counts = filtered_df.groupby(
412
+ 'is_weekend')['total_activity'].sum().reset_index()
413
+ wk_counts['Type'] = wk_counts['is_weekend'].map(
414
+ {0: 'Weekday', 1: 'Weekend'})
415
+ fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={
416
+ 'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
417
  st.plotly_chart(fig, use_container_width=True)
418
 
419
  with tab_insights:
420
  st.subheader("πŸ” AI Detective Insights")
421
  if not filtered_df.empty:
422
  anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
423
+ st.info(
424
+ f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
425
+
426
  c_i1, c_i2 = st.columns(2)
427
  with c_i1:
428
  st.markdown("#### 🚨 Primary Risk Factors")
429
+ st.markdown(
430
+ "- **High Volume on Weekends:** 28% correlation with fraud")
431
+ st.markdown(
432
+ "- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
433
  with c_i2:
434
  st.markdown("#### πŸ’‘ Recommended Actions")
435
+ st.markdown(
436
+ f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
437
+ st.markdown(
438
+ "2. Deploy biometric re-verification for 'Rural A' cluster")
439
 
440
  st.markdown("---")
441
+ st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)