Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,18 +59,20 @@ st.markdown("""
|
|
| 59 |
border-right: 1px solid #334155;
|
| 60 |
}
|
| 61 |
|
| 62 |
-
/* Force Sidebar Text Colors */
|
| 63 |
[data-testid="stSidebar"] h1, [data-testid="stSidebar"] h2, [data-testid="stSidebar"] h3 {
|
| 64 |
color: #f8fafc !important;
|
| 65 |
}
|
| 66 |
-
|
|
|
|
|
|
|
| 67 |
color: #e2e8f0 !important;
|
| 68 |
font-weight: 600;
|
| 69 |
}
|
| 70 |
|
| 71 |
/* Input Boxes (Selectbox, DateInput) Text Color Fix */
|
| 72 |
-
/* This targets the text INSIDE the white input box */
|
| 73 |
-
[data-testid="stSidebar"] div[data-baseweb="select"]
|
| 74 |
color: #0f172a !important;
|
| 75 |
}
|
| 76 |
[data-testid="stSidebar"] div[data-baseweb="input"] input {
|
|
@@ -80,6 +82,11 @@ st.markdown("""
|
|
| 80 |
color: #0f172a !important;
|
| 81 |
}
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
/* Link Button Fix - CRITICAL OVERRIDE FOR DARK TEXT */
|
| 84 |
[data-testid="stSidebar"] a {
|
| 85 |
background-color: #3b82f6 !important; /* Brighter blue background */
|
|
@@ -101,7 +108,7 @@ st.markdown("""
|
|
| 101 |
transform: translateY(-1px);
|
| 102 |
}
|
| 103 |
|
| 104 |
-
/* Link Button Container Background */
|
| 105 |
[data-testid="stSidebar"] button[kind="secondary"] {
|
| 106 |
background-color: #f8fafc !important; /* Light bg for button */
|
| 107 |
color: #0f172a !important; /* Dark text */
|
|
@@ -132,8 +139,6 @@ st.markdown("""
|
|
| 132 |
""", unsafe_allow_html=True)
|
| 133 |
|
| 134 |
# 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
|
| 135 |
-
|
| 136 |
-
|
| 137 |
@st.cache_data(show_spinner=False)
|
| 138 |
def fetch_coordinates_batch(unique_locations):
|
| 139 |
"""
|
|
@@ -156,7 +161,7 @@ def fetch_coordinates_batch(unique_locations):
|
|
| 156 |
d, s = k.split("|")
|
| 157 |
coords_map[(d, s)] = tuple(v)
|
| 158 |
except json.JSONDecodeError:
|
| 159 |
-
pass
|
| 160 |
|
| 161 |
# 2. Add Hardcoded Pre-fills (High Priority Redundancy)
|
| 162 |
# These override if missing, but usually JSON is preferred source if present
|
|
@@ -183,77 +188,70 @@ def fetch_coordinates_batch(unique_locations):
|
|
| 183 |
for k, v in prefills.items():
|
| 184 |
if k not in coords_map:
|
| 185 |
coords_map[k] = v
|
| 186 |
-
|
| 187 |
# 3. Identify missing locations
|
| 188 |
missing_locs = [loc for loc in unique_locations if loc not in coords_map]
|
| 189 |
-
|
| 190 |
if not missing_locs:
|
| 191 |
return coords_map
|
| 192 |
|
| 193 |
# 4. Dynamic Fetching for missing
|
| 194 |
progress_text = "π‘ New locations found. Fetching coordinates..."
|
| 195 |
my_bar = st.progress(0, text=progress_text)
|
| 196 |
-
|
| 197 |
-
headers = {
|
| 198 |
-
'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
|
| 199 |
updated = False
|
| 200 |
-
|
| 201 |
for i, (district, state) in enumerate(missing_locs):
|
| 202 |
try:
|
| 203 |
# Update Progress
|
| 204 |
-
my_bar.progress((i + 1) / len(missing_locs),
|
| 205 |
-
|
| 206 |
-
|
| 207 |
# API Call
|
| 208 |
query = f"{district}, {state}, India"
|
| 209 |
url = "https://nominatim.openstreetmap.org/search"
|
| 210 |
params = {'q': query, 'format': 'json', 'limit': 1}
|
| 211 |
-
|
| 212 |
-
response = requests.get(
|
| 213 |
-
|
| 214 |
-
|
| 215 |
if response.status_code == 200 and response.json():
|
| 216 |
data = response.json()[0]
|
| 217 |
-
coords_map[(district, state)] = (
|
| 218 |
-
float(data['lat']), float(data['lon']))
|
| 219 |
updated = True
|
| 220 |
else:
|
| 221 |
-
pass
|
| 222 |
-
|
| 223 |
# Respect Rate Limiting (1 request per second)
|
| 224 |
-
time.sleep(1.1)
|
| 225 |
-
|
| 226 |
except Exception as e:
|
| 227 |
continue
|
| 228 |
-
|
| 229 |
my_bar.empty()
|
| 230 |
-
|
| 231 |
# 5. Save back to JSON if new data fetched
|
| 232 |
if updated:
|
| 233 |
# Convert keys to string "District|State" for JSON compatibility
|
| 234 |
save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
|
| 235 |
with open(json_file, 'w') as f:
|
| 236 |
json.dump(save_data, f)
|
| 237 |
-
|
| 238 |
return coords_map
|
| 239 |
|
| 240 |
# 4. MAIN DATA LOADER
|
| 241 |
-
|
| 242 |
-
|
| 243 |
@st.cache_data(ttl=300)
|
| 244 |
def load_data():
|
| 245 |
try:
|
| 246 |
df = pd.read_csv('analyzed_aadhaar_data.csv')
|
| 247 |
except FileNotFoundError:
|
| 248 |
-
return pd.DataFrame()
|
| 249 |
-
|
| 250 |
-
if 'date' in df.columns:
|
| 251 |
-
df['date'] = pd.to_datetime(df['date'])
|
| 252 |
|
|
|
|
|
|
|
| 253 |
# Clean Data
|
| 254 |
df['district'] = df['district'].astype(str).str.strip()
|
| 255 |
df['state'] = df['state'].astype(str).str.strip()
|
| 256 |
-
|
| 257 |
# --- FIX DUPLICATE STATES ---
|
| 258 |
# Standardize State Names to remove variations (e.g., J&K)
|
| 259 |
state_mapping = {
|
|
@@ -265,14 +263,13 @@ def load_data():
|
|
| 265 |
'Pondicherry': 'Puducherry'
|
| 266 |
}
|
| 267 |
df['state'] = df['state'].replace(state_mapping)
|
| 268 |
-
|
| 269 |
# Get Unique Locations
|
| 270 |
-
unique_locs = list(
|
| 271 |
-
|
| 272 |
-
|
| 273 |
# Fetch Coordinates (Cached + Persistent JSON)
|
| 274 |
coords_db = fetch_coordinates_batch(unique_locs)
|
| 275 |
-
|
| 276 |
# Fallback Centers (State Capitals)
|
| 277 |
state_centers = {
|
| 278 |
'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
|
|
@@ -288,33 +285,31 @@ def load_data():
|
|
| 288 |
'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
|
| 289 |
'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
|
| 290 |
}
|
| 291 |
-
|
| 292 |
def get_lat_lon(row):
|
| 293 |
key = (row['district'], row['state'])
|
| 294 |
-
|
| 295 |
# 1. Check Exact Match from API/Cache
|
| 296 |
if key in coords_db:
|
| 297 |
lat, lon = coords_db[key]
|
| 298 |
# Tiny jitter to separate stacked points
|
| 299 |
return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
|
| 300 |
-
|
| 301 |
# 2. Fallback to State Center
|
| 302 |
center = state_centers.get(row['state'], (20.5937, 78.9629))
|
| 303 |
np.random.seed(hash(key) % 2**32)
|
| 304 |
return pd.Series({
|
| 305 |
-
'lat': center[0] + np.random.uniform(-0.5, 0.5),
|
| 306 |
'lon': center[1] + np.random.uniform(-0.5, 0.5)
|
| 307 |
})
|
| 308 |
|
| 309 |
coords = df.apply(get_lat_lon, axis=1)
|
| 310 |
df['lat'] = coords['lat']
|
| 311 |
df['lon'] = coords['lon']
|
| 312 |
-
|
| 313 |
-
df['risk_category'] = pd.cut(
|
| 314 |
-
df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
|
| 315 |
return df
|
| 316 |
|
| 317 |
-
|
| 318 |
with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
|
| 319 |
df = load_data()
|
| 320 |
|
|
@@ -322,40 +317,30 @@ with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
|
|
| 322 |
with st.sidebar:
|
| 323 |
st.markdown("### π‘οΈ S.T.A.R.K AI Control")
|
| 324 |
st.markdown("---")
|
| 325 |
-
|
| 326 |
if not df.empty:
|
| 327 |
if 'date' in df.columns:
|
| 328 |
min_d, max_d = df['date'].min().date(), df['date'].max().date()
|
| 329 |
-
dr = st.date_input("Date Range", value=(
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
df = df[(df['date'].dt.date >= dr[0]) &
|
| 333 |
-
(df['date'].dt.date <= dr[1])]
|
| 334 |
-
|
| 335 |
state_list = ['All'] + sorted(df['state'].unique().tolist())
|
| 336 |
sel_state = st.selectbox("State", state_list)
|
| 337 |
-
filtered_df = df[df['state'] ==
|
| 338 |
-
|
| 339 |
-
|
| 340 |
dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
|
| 341 |
sel_dist = st.selectbox("District", dist_list)
|
| 342 |
-
if sel_dist != 'All':
|
| 343 |
-
|
| 344 |
-
|
| 345 |
st.markdown("---")
|
| 346 |
-
risk_filter = st.multiselect(
|
| 347 |
-
|
| 348 |
-
if risk_filter:
|
| 349 |
-
filtered_df = filtered_df[filtered_df['risk_category'].isin(
|
| 350 |
-
risk_filter)]
|
| 351 |
else:
|
| 352 |
filtered_df = pd.DataFrame()
|
| 353 |
-
|
| 354 |
st.markdown("---")
|
| 355 |
-
st.link_button("π Open Analysis Notebook",
|
| 356 |
-
|
| 357 |
-
st.info(
|
| 358 |
-
f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
|
| 359 |
|
| 360 |
# 6. HEADER & METRICS
|
| 361 |
col1, col2 = st.columns([3, 1])
|
|
@@ -363,81 +348,62 @@ with col1:
|
|
| 363 |
st.title("π‘οΈ S.T.A.R.K AI Dashboard")
|
| 364 |
st.markdown("**Context-Aware Fraud Detection & Prevention System**")
|
| 365 |
with col2:
|
| 366 |
-
st.markdown(
|
| 367 |
-
f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">β System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
|
| 368 |
|
| 369 |
st.markdown("---")
|
| 370 |
|
| 371 |
if not filtered_df.empty:
|
| 372 |
m1, m2, m3, m4, m5 = st.columns(5)
|
| 373 |
-
total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(
|
| 374 |
-
filtered_df[filtered_df['RISK_SCORE'] > 85])
|
| 375 |
m1.metric("Total Centers", f"{total:,}", border=True)
|
| 376 |
-
m2.metric("High Risk", f"{high}", delta="Review",
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
m4.metric(
|
| 381 |
-
"Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
|
| 382 |
-
m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}",
|
| 383 |
-
delta="Suspicious", delta_color="off", border=True)
|
| 384 |
else:
|
| 385 |
-
st.error(
|
| 386 |
-
"β Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
|
| 387 |
|
| 388 |
st.markdown("##")
|
| 389 |
|
| 390 |
# 7. TABS
|
| 391 |
-
tab_map, tab_list, tab_charts, tab_insights = st.tabs(
|
| 392 |
-
["πΊοΈ Geographic Risk", "π Priority List", "π Patterns", "π AI Insights"])
|
| 393 |
|
| 394 |
with tab_map:
|
| 395 |
c_map, c_det = st.columns([3, 1])
|
| 396 |
with c_map:
|
| 397 |
if not filtered_df.empty:
|
| 398 |
# Dynamic Zoom based on selection
|
| 399 |
-
if sel_dist != 'All':
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
zoom_lvl = 6
|
| 403 |
-
else:
|
| 404 |
-
zoom_lvl = 3.8
|
| 405 |
|
| 406 |
fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
|
| 414 |
st.plotly_chart(fig, use_container_width=True)
|
| 415 |
-
else:
|
| 416 |
-
|
| 417 |
-
|
| 418 |
with c_det:
|
| 419 |
st.subheader("π₯ Top Hotspots")
|
| 420 |
if not filtered_df.empty:
|
| 421 |
-
top = filtered_df.groupby('district').agg(
|
| 422 |
-
{'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
|
| 423 |
for i, (d, r) in enumerate(top.iterrows(), 1):
|
| 424 |
-
clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else (
|
| 425 |
-
|
| 426 |
-
st.markdown(
|
| 427 |
-
f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
|
| 428 |
|
| 429 |
with tab_list:
|
| 430 |
st.subheader("π― Priority Investigation")
|
| 431 |
if not filtered_df.empty:
|
| 432 |
-
targets = filtered_df[filtered_df['RISK_SCORE'] >
|
| 433 |
-
75].sort_values('RISK_SCORE', ascending=False)
|
| 434 |
csv = targets.to_csv(index=False).encode('utf-8')
|
| 435 |
-
st.download_button("π₯ Export CSV", data=csv,
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
else:
|
| 440 |
-
st.info("Waiting for data...")
|
| 441 |
|
| 442 |
with tab_charts:
|
| 443 |
c1, c2 = st.columns(2)
|
|
@@ -445,40 +411,32 @@ with tab_charts:
|
|
| 445 |
st.markdown("**Ghost ID Detection**")
|
| 446 |
if not filtered_df.empty:
|
| 447 |
fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
|
| 448 |
-
|
| 449 |
fig.add_hline(y=0.2, line_dash="dash", line_color="red")
|
| 450 |
st.plotly_chart(fig, use_container_width=True)
|
| 451 |
with c2:
|
| 452 |
st.markdown("**Weekend Activity Analysis**")
|
| 453 |
if not filtered_df.empty:
|
| 454 |
-
wk_counts = filtered_df.groupby(
|
| 455 |
-
|
| 456 |
-
wk_counts
|
| 457 |
-
{0: 'Weekday', 1: 'Weekend'})
|
| 458 |
-
fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={
|
| 459 |
-
'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
|
| 460 |
st.plotly_chart(fig, use_container_width=True)
|
| 461 |
|
| 462 |
with tab_insights:
|
| 463 |
st.subheader("π AI Detective Insights")
|
| 464 |
if not filtered_df.empty:
|
| 465 |
anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
|
| 466 |
-
st.info(
|
| 467 |
-
|
| 468 |
-
|
| 469 |
c_i1, c_i2 = st.columns(2)
|
| 470 |
with c_i1:
|
| 471 |
st.markdown("#### π¨ Primary Risk Factors")
|
| 472 |
-
st.markdown(
|
| 473 |
-
|
| 474 |
-
st.markdown(
|
| 475 |
-
"- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
|
| 476 |
with c_i2:
|
| 477 |
st.markdown("#### π‘ Recommended Actions")
|
| 478 |
-
st.markdown(
|
| 479 |
-
|
| 480 |
-
st.markdown(
|
| 481 |
-
"2. Deploy biometric re-verification for 'Rural A' cluster")
|
| 482 |
|
| 483 |
st.markdown("---")
|
| 484 |
-
st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)
|
|
|
|
| 59 |
border-right: 1px solid #334155;
|
| 60 |
}
|
| 61 |
|
| 62 |
+
/* Force Sidebar Header Text Colors */
|
| 63 |
[data-testid="stSidebar"] h1, [data-testid="stSidebar"] h2, [data-testid="stSidebar"] h3 {
|
| 64 |
color: #f8fafc !important;
|
| 65 |
}
|
| 66 |
+
|
| 67 |
+
/* Force Label Colors (Input titles like 'State', 'Date Range') */
|
| 68 |
+
[data-testid="stSidebar"] label {
|
| 69 |
color: #e2e8f0 !important;
|
| 70 |
font-weight: 600;
|
| 71 |
}
|
| 72 |
|
| 73 |
/* Input Boxes (Selectbox, DateInput) Text Color Fix */
|
| 74 |
+
/* This targets the text INSIDE the white input box/dropdown */
|
| 75 |
+
[data-testid="stSidebar"] div[data-baseweb="select"] div {
|
| 76 |
color: #0f172a !important;
|
| 77 |
}
|
| 78 |
[data-testid="stSidebar"] div[data-baseweb="input"] input {
|
|
|
|
| 82 |
color: #0f172a !important;
|
| 83 |
}
|
| 84 |
|
| 85 |
+
/* Markdown Text in Sidebar */
|
| 86 |
+
[data-testid="stSidebar"] .stMarkdown p {
|
| 87 |
+
color: #cbd5e1 !important;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
/* Link Button Fix - CRITICAL OVERRIDE FOR DARK TEXT */
|
| 91 |
[data-testid="stSidebar"] a {
|
| 92 |
background-color: #3b82f6 !important; /* Brighter blue background */
|
|
|
|
| 108 |
transform: translateY(-1px);
|
| 109 |
}
|
| 110 |
|
| 111 |
+
/* Link Button Container Background (for st.link_button wrapper) */
|
| 112 |
[data-testid="stSidebar"] button[kind="secondary"] {
|
| 113 |
background-color: #f8fafc !important; /* Light bg for button */
|
| 114 |
color: #0f172a !important; /* Dark text */
|
|
|
|
| 139 |
""", unsafe_allow_html=True)
|
| 140 |
|
| 141 |
# 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
|
|
|
|
|
|
|
| 142 |
@st.cache_data(show_spinner=False)
|
| 143 |
def fetch_coordinates_batch(unique_locations):
|
| 144 |
"""
|
|
|
|
| 161 |
d, s = k.split("|")
|
| 162 |
coords_map[(d, s)] = tuple(v)
|
| 163 |
except json.JSONDecodeError:
|
| 164 |
+
pass # File corrupted, start fresh
|
| 165 |
|
| 166 |
# 2. Add Hardcoded Pre-fills (High Priority Redundancy)
|
| 167 |
# These override if missing, but usually JSON is preferred source if present
|
|
|
|
| 188 |
for k, v in prefills.items():
|
| 189 |
if k not in coords_map:
|
| 190 |
coords_map[k] = v
|
| 191 |
+
|
| 192 |
# 3. Identify missing locations
|
| 193 |
missing_locs = [loc for loc in unique_locations if loc not in coords_map]
|
| 194 |
+
|
| 195 |
if not missing_locs:
|
| 196 |
return coords_map
|
| 197 |
|
| 198 |
# 4. Dynamic Fetching for missing
|
| 199 |
progress_text = "π‘ New locations found. Fetching coordinates..."
|
| 200 |
my_bar = st.progress(0, text=progress_text)
|
| 201 |
+
|
| 202 |
+
headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
|
|
|
|
| 203 |
updated = False
|
| 204 |
+
|
| 205 |
for i, (district, state) in enumerate(missing_locs):
|
| 206 |
try:
|
| 207 |
# Update Progress
|
| 208 |
+
my_bar.progress((i + 1) / len(missing_locs), text=f"π Locating: {district}, {state}")
|
| 209 |
+
|
|
|
|
| 210 |
# API Call
|
| 211 |
query = f"{district}, {state}, India"
|
| 212 |
url = "https://nominatim.openstreetmap.org/search"
|
| 213 |
params = {'q': query, 'format': 'json', 'limit': 1}
|
| 214 |
+
|
| 215 |
+
response = requests.get(url, params=params, headers=headers, timeout=5)
|
| 216 |
+
|
|
|
|
| 217 |
if response.status_code == 200 and response.json():
|
| 218 |
data = response.json()[0]
|
| 219 |
+
coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
|
|
|
|
| 220 |
updated = True
|
| 221 |
else:
|
| 222 |
+
pass # Fail silently, will fall back to state center logic later
|
| 223 |
+
|
| 224 |
# Respect Rate Limiting (1 request per second)
|
| 225 |
+
time.sleep(1.1)
|
| 226 |
+
|
| 227 |
except Exception as e:
|
| 228 |
continue
|
| 229 |
+
|
| 230 |
my_bar.empty()
|
| 231 |
+
|
| 232 |
# 5. Save back to JSON if new data fetched
|
| 233 |
if updated:
|
| 234 |
# Convert keys to string "District|State" for JSON compatibility
|
| 235 |
save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
|
| 236 |
with open(json_file, 'w') as f:
|
| 237 |
json.dump(save_data, f)
|
| 238 |
+
|
| 239 |
return coords_map
|
| 240 |
|
| 241 |
# 4. MAIN DATA LOADER
|
|
|
|
|
|
|
| 242 |
@st.cache_data(ttl=300)
|
| 243 |
def load_data():
|
| 244 |
try:
|
| 245 |
df = pd.read_csv('analyzed_aadhaar_data.csv')
|
| 246 |
except FileNotFoundError:
|
| 247 |
+
return pd.DataFrame() # Return empty to trigger external error check
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
+
if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
|
| 250 |
+
|
| 251 |
# Clean Data
|
| 252 |
df['district'] = df['district'].astype(str).str.strip()
|
| 253 |
df['state'] = df['state'].astype(str).str.strip()
|
| 254 |
+
|
| 255 |
# --- FIX DUPLICATE STATES ---
|
| 256 |
# Standardize State Names to remove variations (e.g., J&K)
|
| 257 |
state_mapping = {
|
|
|
|
| 263 |
'Pondicherry': 'Puducherry'
|
| 264 |
}
|
| 265 |
df['state'] = df['state'].replace(state_mapping)
|
| 266 |
+
|
| 267 |
# Get Unique Locations
|
| 268 |
+
unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
|
| 269 |
+
|
|
|
|
| 270 |
# Fetch Coordinates (Cached + Persistent JSON)
|
| 271 |
coords_db = fetch_coordinates_batch(unique_locs)
|
| 272 |
+
|
| 273 |
# Fallback Centers (State Capitals)
|
| 274 |
state_centers = {
|
| 275 |
'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
|
|
|
|
| 285 |
'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
|
| 286 |
'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
|
| 287 |
}
|
| 288 |
+
|
| 289 |
def get_lat_lon(row):
|
| 290 |
key = (row['district'], row['state'])
|
| 291 |
+
|
| 292 |
# 1. Check Exact Match from API/Cache
|
| 293 |
if key in coords_db:
|
| 294 |
lat, lon = coords_db[key]
|
| 295 |
# Tiny jitter to separate stacked points
|
| 296 |
return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
|
| 297 |
+
|
| 298 |
# 2. Fallback to State Center
|
| 299 |
center = state_centers.get(row['state'], (20.5937, 78.9629))
|
| 300 |
np.random.seed(hash(key) % 2**32)
|
| 301 |
return pd.Series({
|
| 302 |
+
'lat': center[0] + np.random.uniform(-0.5, 0.5),
|
| 303 |
'lon': center[1] + np.random.uniform(-0.5, 0.5)
|
| 304 |
})
|
| 305 |
|
| 306 |
coords = df.apply(get_lat_lon, axis=1)
|
| 307 |
df['lat'] = coords['lat']
|
| 308 |
df['lon'] = coords['lon']
|
| 309 |
+
|
| 310 |
+
df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
|
|
|
|
| 311 |
return df
|
| 312 |
|
|
|
|
| 313 |
with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
|
| 314 |
df = load_data()
|
| 315 |
|
|
|
|
| 317 |
with st.sidebar:
|
| 318 |
st.markdown("### π‘οΈ S.T.A.R.K AI Control")
|
| 319 |
st.markdown("---")
|
| 320 |
+
|
| 321 |
if not df.empty:
|
| 322 |
if 'date' in df.columns:
|
| 323 |
min_d, max_d = df['date'].min().date(), df['date'].max().date()
|
| 324 |
+
dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
|
| 325 |
+
if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
|
| 326 |
+
|
|
|
|
|
|
|
|
|
|
| 327 |
state_list = ['All'] + sorted(df['state'].unique().tolist())
|
| 328 |
sel_state = st.selectbox("State", state_list)
|
| 329 |
+
filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
|
| 330 |
+
|
|
|
|
| 331 |
dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
|
| 332 |
sel_dist = st.selectbox("District", dist_list)
|
| 333 |
+
if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
|
| 334 |
+
|
|
|
|
| 335 |
st.markdown("---")
|
| 336 |
+
risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
|
| 337 |
+
if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
|
|
|
|
|
|
|
|
|
|
| 338 |
else:
|
| 339 |
filtered_df = pd.DataFrame()
|
| 340 |
+
|
| 341 |
st.markdown("---")
|
| 342 |
+
st.link_button("π Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
|
| 343 |
+
st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
|
|
|
|
|
|
|
| 344 |
|
| 345 |
# 6. HEADER & METRICS
|
| 346 |
col1, col2 = st.columns([3, 1])
|
|
|
|
| 348 |
st.title("π‘οΈ S.T.A.R.K AI Dashboard")
|
| 349 |
st.markdown("**Context-Aware Fraud Detection & Prevention System**")
|
| 350 |
with col2:
|
| 351 |
+
st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">β System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
|
|
|
|
| 352 |
|
| 353 |
st.markdown("---")
|
| 354 |
|
| 355 |
if not filtered_df.empty:
|
| 356 |
m1, m2, m3, m4, m5 = st.columns(5)
|
| 357 |
+
total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
|
|
|
|
| 358 |
m1.metric("Total Centers", f"{total:,}", border=True)
|
| 359 |
+
m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
|
| 360 |
+
m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
|
| 361 |
+
m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
|
| 362 |
+
m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
else:
|
| 364 |
+
st.error("β Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")
|
|
|
|
| 365 |
|
| 366 |
st.markdown("##")
|
| 367 |
|
| 368 |
# 7. TABS
|
| 369 |
+
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πΊοΈ Geographic Risk", "π Priority List", "π Patterns", "π AI Insights"])
|
|
|
|
| 370 |
|
| 371 |
with tab_map:
|
| 372 |
c_map, c_det = st.columns([3, 1])
|
| 373 |
with c_map:
|
| 374 |
if not filtered_df.empty:
|
| 375 |
# Dynamic Zoom based on selection
|
| 376 |
+
if sel_dist != 'All': zoom_lvl = 10
|
| 377 |
+
elif sel_state != 'All': zoom_lvl = 6
|
| 378 |
+
else: zoom_lvl = 3.8
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
|
| 381 |
+
color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
|
| 382 |
+
center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
|
| 383 |
+
hover_name="district", hover_data={"state":True, "pincode":True, "lat":False, "lon":False},
|
| 384 |
+
mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
|
| 385 |
+
|
| 386 |
+
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
|
|
|
|
| 387 |
st.plotly_chart(fig, use_container_width=True)
|
| 388 |
+
else: st.info("Waiting for data...")
|
| 389 |
+
|
|
|
|
| 390 |
with c_det:
|
| 391 |
st.subheader("π₯ Top Hotspots")
|
| 392 |
if not filtered_df.empty:
|
| 393 |
+
top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
|
|
|
|
| 394 |
for i, (d, r) in enumerate(top.iterrows(), 1):
|
| 395 |
+
clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else ("#f97316", "HIGH")
|
| 396 |
+
st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
|
|
|
|
|
|
|
| 397 |
|
| 398 |
with tab_list:
|
| 399 |
st.subheader("π― Priority Investigation")
|
| 400 |
if not filtered_df.empty:
|
| 401 |
+
targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
|
|
|
|
| 402 |
csv = targets.to_csv(index=False).encode('utf-8')
|
| 403 |
+
st.download_button("π₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
|
| 404 |
+
st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
|
| 405 |
+
column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
|
| 406 |
+
else: st.info("Waiting for data...")
|
|
|
|
|
|
|
| 407 |
|
| 408 |
with tab_charts:
|
| 409 |
c1, c2 = st.columns(2)
|
|
|
|
| 411 |
st.markdown("**Ghost ID Detection**")
|
| 412 |
if not filtered_df.empty:
|
| 413 |
fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
|
| 414 |
+
color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
|
| 415 |
fig.add_hline(y=0.2, line_dash="dash", line_color="red")
|
| 416 |
st.plotly_chart(fig, use_container_width=True)
|
| 417 |
with c2:
|
| 418 |
st.markdown("**Weekend Activity Analysis**")
|
| 419 |
if not filtered_df.empty:
|
| 420 |
+
wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
|
| 421 |
+
wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
|
| 422 |
+
fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
|
|
|
|
|
|
|
|
|
|
| 423 |
st.plotly_chart(fig, use_container_width=True)
|
| 424 |
|
| 425 |
with tab_insights:
|
| 426 |
st.subheader("π AI Detective Insights")
|
| 427 |
if not filtered_df.empty:
|
| 428 |
anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
|
| 429 |
+
st.info(f"π€ **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Ο from mean).")
|
| 430 |
+
|
|
|
|
| 431 |
c_i1, c_i2 = st.columns(2)
|
| 432 |
with c_i1:
|
| 433 |
st.markdown("#### π¨ Primary Risk Factors")
|
| 434 |
+
st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
|
| 435 |
+
st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
|
|
|
|
|
|
|
| 436 |
with c_i2:
|
| 437 |
st.markdown("#### π‘ Recommended Actions")
|
| 438 |
+
st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
|
| 439 |
+
st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")
|
|
|
|
|
|
|
| 440 |
|
| 441 |
st.markdown("---")
|
| 442 |
+
st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)
|