LovnishVerma commited on
Commit
f5f7959
·
verified ·
1 Parent(s): 538e361

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -141
app.py CHANGED
@@ -16,42 +16,33 @@ st.set_page_config(
16
  # 2. PROFESSIONAL STYLING (THEME OVERRIDE)
17
  st.markdown("""
18
  <style>
19
- /* IMPORT FONTS */
20
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
21
 
22
- /* FORCE LIGHT THEME BACKGROUNDS & TEXT */
23
  .stApp {
24
- background-color: #f8fafc; /* Light Blue-Grey */
25
- color: #0f172a; /* Slate 900 */
26
  font-family: 'Inter', sans-serif;
27
  }
28
 
29
- /* METRIC CARDS - GLASSMORPHISM */
30
  div[data-testid="stMetric"] {
31
  background-color: #ffffff;
32
  border: 1px solid #e2e8f0;
33
  border-radius: 8px;
34
  padding: 15px;
35
  box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
36
- transition: transform 0.2s;
37
  }
38
- div[data-testid="stMetric"]:hover {
39
- transform: translateY(-2px);
40
- box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
41
- }
42
-
43
- /* FORCE DARK TEXT FOR METRICS (Fixes White-on-White) */
44
  div[data-testid="stMetricValue"] {
45
  color: #0f172a !important;
46
  font-weight: 700 !important;
47
  }
48
  div[data-testid="stMetricLabel"] {
49
- color: #64748b !important; /* Slate 500 */
50
  }
51
 
52
- /* DATAFRAME STYLING (Fixes White-on-White) */
53
  div[data-testid="stDataFrame"] div[role="grid"] {
54
- color: #334155 !important; /* Slate 700 */
55
  background-color: white !important;
56
  }
57
  div[data-testid="stDataFrame"] div[role="columnheader"] {
@@ -60,25 +51,20 @@ st.markdown("""
60
  background-color: #f1f5f9 !important;
61
  }
62
 
63
- /* SIDEBAR STYLING */
64
  [data-testid="stSidebar"] {
65
- background-color: #1e293b; /* Slate 800 */
66
  }
67
  [data-testid="stSidebar"] * {
68
- color: #f8fafc !important; /* Light text for sidebar */
69
  }
70
  [data-testid="stSidebar"] .stSelectbox label,
71
  [data-testid="stSidebar"] .stMultiSelect label {
72
  color: #94a3b8 !important;
73
  }
74
 
75
- /* HEADERS */
76
- h1, h2, h3 {
77
- color: #0f172a !important;
78
- font-weight: 700 !important;
79
- }
80
 
81
- /* CUSTOM BADGES */
82
  .status-badge {
83
  display: inline-flex;
84
  align-items: center;
@@ -90,26 +76,24 @@ st.markdown("""
90
  .bg-red { background-color: #fee2e2; color: #991b1b; }
91
  .bg-green { background-color: #dcfce7; color: #166534; }
92
 
93
- /* MAP CANVAS FIX */
94
- .js-plotly-plot .plotly .main-svg {
95
- background-color: rgba(0,0,0,0) !important;
96
- }
97
  </style>
98
  """, unsafe_allow_html=True)
99
 
100
  # 3. SMART DATA LOADING (MAPPING)
101
  @st.cache_data
102
  def load_data():
103
- # 1. Load or Generate Data
104
  try:
105
  df = pd.read_csv('analyzed_aadhaar_data.csv')
106
  except FileNotFoundError:
107
- # Dummy Data Generator if file missing
108
  dates = pd.date_range(start="2025-01-01", periods=200)
 
 
 
109
  df = pd.DataFrame({
110
  'date': dates,
111
- 'state': np.random.choice(['Maharashtra', 'Uttar Pradesh', 'Bihar', 'Karnataka', 'Delhi', 'West Bengal', 'Tamil Nadu', 'Gujarat', 'Rajasthan', 'Kerala'], 200),
112
- 'district': np.random.choice(['North', 'South', 'East', 'West', 'Central', 'Rural A', 'Urban B'], 200),
113
  'pincode': np.random.randint(110001, 800000, 200),
114
  'RISK_SCORE': np.random.uniform(15, 99, 200),
115
  'total_activity': np.random.randint(50, 800, 200),
@@ -118,12 +102,10 @@ def load_data():
118
  'is_weekend': np.random.choice([0, 1], 200, p=[0.7, 0.3])
119
  })
120
 
121
- # Standardize Date
122
  if 'date' in df.columns:
123
  df['date'] = pd.to_datetime(df['date'])
124
 
125
- # SMART GEO-CLUSTERING LOGIC
126
- # Comprehensive Center Points for Indian States & UTs
127
  state_centers = {
128
  'Andaman and Nicobar Islands': (11.7401, 92.6586),
129
  'Andhra Pradesh': (15.9129, 79.7400),
@@ -137,7 +119,7 @@ def load_data():
137
  'Goa': (15.2993, 74.1240),
138
  'Gujarat': (22.2587, 71.1924),
139
  'Haryana': (29.0588, 76.0856),
140
- 'Himachal Pradesh': (31.9579, 77.1095),
141
  'Jammu and Kashmir': (33.7782, 76.5762),
142
  'Jharkhand': (23.6102, 85.2799),
143
  'Karnataka': (15.3173, 75.7139),
@@ -163,70 +145,78 @@ def load_data():
163
  'West Bengal': (22.9868, 87.8550)
164
  }
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  def get_coords(row):
167
  state = row.get('state', 'Delhi')
168
  district = str(row.get('district', 'Unknown'))
169
 
170
- # 1. Get State Base Coordinates (Use your updated list)
171
  base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629))
 
172
 
173
- # 2. DEFINE STATE RADIUS SCALER (In Degrees)
174
- # Default is 0.5 (~55km) which is safer than 1.5
175
- default_radius = 0.5
 
176
 
177
- # Tighter constraints for small States/UTs
178
- radius_map = {
179
- 'Chandigarh': 0.04,
180
- 'Delhi': 0.15,
181
- 'Goa': 0.15,
182
- 'Puducherry': 0.1,
183
- 'Lakshadweep': 0.05,
184
- 'Daman and Diu': 0.05,
185
- 'Dadra and Nagar Haveli': 0.05,
186
- 'Kerala': 0.3, # Narrow state
187
- 'Haryana': 0.4,
188
- 'Punjab': 0.4,
189
- 'Tripura': 0.3,
190
- 'Sikkim': 0.15,
191
- 'Andaman and Nicobar Islands': 1.0 # Long archipelago
192
- }
193
 
194
- # Get the specific radius for this state
195
- radius = radius_map.get(state, default_radius)
 
 
196
 
197
- # 3. DETERMINISTIC HASHING
198
  district_hash = hash(state + district)
199
  np.random.seed(district_hash % 2**32)
200
 
201
- # Offset using the specific radius
202
- dist_lat_offset = np.random.uniform(-radius, radius)
203
- dist_lon_offset = np.random.uniform(-radius, radius)
204
 
205
- # 4. MICRO JITTER (Random noise for individual points)
206
- np.random.seed(None)
207
- noise_lat = np.random.normal(0, 0.02 * radius) # Scale noise relative to state size
208
- noise_lon = np.random.normal(0, 0.02 * radius)
 
 
209
 
210
  return pd.Series({
211
- 'lat': base_lat + dist_lat_offset + noise_lat,
212
- 'lon': base_lon + dist_lon_offset + noise_lon
213
  })
214
 
215
- # Apply coordinates
216
  coords = df.apply(get_coords, axis=1)
217
  df['lat'] = coords['lat']
218
  df['lon'] = coords['lon']
219
 
220
- # Risk Categories
221
  df['risk_category'] = pd.cut(
222
  df['RISK_SCORE'],
223
  bins=[-1, 50, 75, 85, 100],
224
  labels=['Low', 'Medium', 'High', 'Critical']
225
  )
226
-
227
  return df
228
 
229
- # Load Data
230
  df = load_data()
231
 
232
  # 4. SIDEBAR & FILTERS
@@ -234,11 +224,9 @@ with st.sidebar:
234
  st.markdown("### S.T.A.R.K AI Control")
235
  st.markdown("---")
236
 
237
- # State Filter
238
  state_list = ['All'] + sorted(df['state'].unique().tolist())
239
  selected_state = st.selectbox("Select State", state_list)
240
 
241
- # District Filter
242
  if selected_state != 'All':
243
  filtered_df = df[df['state'] == selected_state]
244
  district_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
@@ -252,32 +240,24 @@ with st.sidebar:
252
  filtered_df = filtered_df[filtered_df['district'] == selected_district]
253
 
254
  st.markdown("---")
255
-
256
- # Risk Filter
257
  risk_filter = st.multiselect(
258
  "Risk Level",
259
  options=['Low', 'Medium', 'High', 'Critical'],
260
  default=['High', 'Critical']
261
  )
262
-
263
  if risk_filter:
264
  filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
265
 
266
  st.markdown("---")
267
-
268
- # Links
269
- st.markdown("**Resources**")
270
  st.link_button("Open Notebook in Colab", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing")
271
-
272
  st.markdown("---")
273
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571")
274
 
275
- # 5. HEADER & KPI METRICS
276
  col1, col2 = st.columns([3, 1])
277
  with col1:
278
  st.title("Project S.T.A.R.K AI Dashboard")
279
  st.markdown("Context-Aware Fraud Detection System")
280
-
281
  with col2:
282
  st.markdown("""
283
  <div style="text-align: right; padding-top: 20px;">
@@ -288,7 +268,7 @@ with col2:
288
 
289
  st.markdown("---")
290
 
291
- # METRICS ROW
292
  m1, m2, m3, m4 = st.columns(4)
293
  total_centers = len(filtered_df)
294
  high_risk = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
@@ -300,29 +280,24 @@ m2.metric("High Risk Alerts", f"{high_risk}", delta="Action Required", delta_col
300
  m3.metric("Avg. Risk Score", f"{avg_risk:.1f}/100", border=True)
301
  m4.metric("Weekend Spikes", f"{weekend_alerts}", "Unauthorized", delta_color="off", border=True)
302
 
303
- st.markdown("##") # Spacer
304
 
305
- # 6. MAIN TABS
306
  tab_map, tab_list, tab_charts = st.tabs(["Geographic Risk", "Priority List", "Pattern Analytics"])
307
 
308
- # TAB 1: GEOGRAPHIC RISK (MAP)
309
  with tab_map:
310
  col_map, col_details = st.columns([3, 1])
311
-
312
  with col_map:
313
  if not filtered_df.empty:
314
- # Using Open-Street-Map for better contrast and no-token requirement
315
  fig_map = px.scatter_mapbox(
316
  filtered_df,
317
- lat="lat",
318
- lon="lon",
319
  color="RISK_SCORE",
320
  size="total_activity",
321
- # Traffic Light Colors: Green -> Yellow -> Red
322
  color_continuous_scale=["#22c55e", "#eab308", "#ef4444"],
323
  size_max=20,
324
  zoom=4.5 if selected_state != 'All' else 3.5,
325
- center={"lat": 22.0, "lon": 80.0}, # Center of India
326
  hover_name="pincode",
327
  hover_data={"district": True, "state": True, "RISK_SCORE": True, "lat": False, "lon": False},
328
  mapbox_style="open-street-map",
@@ -339,7 +314,6 @@ with tab_map:
339
  if not filtered_df.empty:
340
  top_districts = filtered_df.groupby('district')['RISK_SCORE'].mean().sort_values(ascending=False).head(5)
341
  for district, score in top_districts.items():
342
- # Color code the side bar
343
  color = "#ef4444" if score > 80 else "#f59e0b"
344
  st.markdown(f"""
345
  <div style="background: white; padding: 12px; border-radius: 8px; border-left: 5px solid {color}; margin-bottom: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
@@ -348,80 +322,41 @@ with tab_map:
348
  </div>
349
  """, unsafe_allow_html=True)
350
 
351
- # TAB 2: PRIORITY LIST (DATAFRAME)
352
  with tab_list:
353
  st.subheader("Target Investigation List")
354
  st.markdown("Filter: *Showing centers with Risk Score > 75*")
355
-
356
  target_list = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
357
-
358
  st.dataframe(
359
  target_list[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
360
  column_config={
361
- "RISK_SCORE": st.column_config.ProgressColumn(
362
- "Risk Probability",
363
- help="Probability of fraud based on context analysis",
364
- format="%d%%",
365
- min_value=0,
366
- max_value=100,
367
- ),
368
  "date": st.column_config.DateColumn("Date", format="DD MMM YYYY"),
369
  "total_activity": st.column_config.NumberColumn("Volume"),
370
  "enrol_adult": st.column_config.NumberColumn("Adult Enrols"),
371
  },
372
- use_container_width=True,
373
- hide_index=True,
374
- height=400
375
  )
376
-
377
- # Export Button
378
  csv = target_list.to_csv(index=False).encode('utf-8')
379
- st.download_button(
380
- "Download CSV",
381
- data=csv,
382
- file_name="uidai_stark_ai_priority_list.csv",
383
- mime="text/csv",
384
- type="primary"
385
- )
386
 
387
- # --- TAB 3: CHARTS ---
388
  with tab_charts:
389
  c1, c2 = st.columns(2)
390
-
391
  with c1:
392
- st.subheader("Ghost ID Pattern (Ratio Deviation)")
393
- # Scatter Plot
394
  fig_scatter = px.scatter(
395
- filtered_df,
396
- x="total_activity",
397
- y="ratio_deviation",
398
  color="risk_category",
399
  color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'},
400
  title="Deviation from District Baseline",
401
- labels={"ratio_deviation": "Deviation Score", "total_activity": "Daily Transactions"},
402
- hover_data=['pincode', 'district']
403
  )
404
  fig_scatter.add_hline(y=0.2, line_dash="dash", line_color="red", annotation_text="Fraud Threshold")
405
  st.plotly_chart(fig_scatter, use_container_width=True)
406
-
407
  with c2:
408
  st.subheader("Risk Distribution")
409
- # Histogram
410
- fig_hist = px.histogram(
411
- filtered_df,
412
- x="RISK_SCORE",
413
- nbins=20,
414
- color_discrete_sequence=['#3b82f6'],
415
- title="Frequency of Risk Scores"
416
- )
417
  fig_hist.update_layout(bargap=0.1)
418
  st.plotly_chart(fig_hist, use_container_width=True)
419
 
420
- # 7. FOOTER
421
  st.markdown("---")
422
- st.markdown("""
423
- <div style="text-align: center; font-size: 13px; color: #94a3b8;">
424
- <b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026 | Team UIDAI_4571<br>
425
- <i>Confidential - For Official Use Only</i>
426
- </div>
427
- """, unsafe_allow_html=True)
 
16
  # 2. PROFESSIONAL STYLING (THEME OVERRIDE)
17
  st.markdown("""
18
  <style>
 
19
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
20
 
 
21
  .stApp {
22
+ background-color: #f8fafc;
23
+ color: #0f172a;
24
  font-family: 'Inter', sans-serif;
25
  }
26
 
27
+ /* METRIC CARDS */
28
  div[data-testid="stMetric"] {
29
  background-color: #ffffff;
30
  border: 1px solid #e2e8f0;
31
  border-radius: 8px;
32
  padding: 15px;
33
  box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
 
34
  }
 
 
 
 
 
 
35
  div[data-testid="stMetricValue"] {
36
  color: #0f172a !important;
37
  font-weight: 700 !important;
38
  }
39
  div[data-testid="stMetricLabel"] {
40
+ color: #64748b !important;
41
  }
42
 
43
+ /* DATAFRAME */
44
  div[data-testid="stDataFrame"] div[role="grid"] {
45
+ color: #334155 !important;
46
  background-color: white !important;
47
  }
48
  div[data-testid="stDataFrame"] div[role="columnheader"] {
 
51
  background-color: #f1f5f9 !important;
52
  }
53
 
54
+ /* SIDEBAR */
55
  [data-testid="stSidebar"] {
56
+ background-color: #1e293b;
57
  }
58
  [data-testid="stSidebar"] * {
59
+ color: #f8fafc !important;
60
  }
61
  [data-testid="stSidebar"] .stSelectbox label,
62
  [data-testid="stSidebar"] .stMultiSelect label {
63
  color: #94a3b8 !important;
64
  }
65
 
66
+ h1, h2, h3 { color: #0f172a !important; font-weight: 700 !important; }
 
 
 
 
67
 
 
68
  .status-badge {
69
  display: inline-flex;
70
  align-items: center;
 
76
  .bg-red { background-color: #fee2e2; color: #991b1b; }
77
  .bg-green { background-color: #dcfce7; color: #166534; }
78
 
79
+ .js-plotly-plot .plotly .main-svg { background-color: rgba(0,0,0,0) !important; }
 
 
 
80
  </style>
81
  """, unsafe_allow_html=True)
82
 
83
  # 3. SMART DATA LOADING (MAPPING)
84
  @st.cache_data
85
  def load_data():
 
86
  try:
87
  df = pd.read_csv('analyzed_aadhaar_data.csv')
88
  except FileNotFoundError:
 
89
  dates = pd.date_range(start="2025-01-01", periods=200)
90
+ # Using realistic district names that imply direction for testing semantic logic
91
+ districts = ['North District', 'South Region', 'East Zone', 'West End', 'Central Hub', 'Rural A', 'Urban B']
92
+
93
  df = pd.DataFrame({
94
  'date': dates,
95
+ 'state': np.random.choice(['Maharashtra', 'Uttar Pradesh', 'Bihar', 'Karnataka', 'Delhi', 'West Bengal', 'Kerala', 'Assam', 'Rajasthan', 'Gujarat'], 200),
96
+ 'district': np.random.choice(districts, 200),
97
  'pincode': np.random.randint(110001, 800000, 200),
98
  'RISK_SCORE': np.random.uniform(15, 99, 200),
99
  'total_activity': np.random.randint(50, 800, 200),
 
102
  'is_weekend': np.random.choice([0, 1], 200, p=[0.7, 0.3])
103
  })
104
 
 
105
  if 'date' in df.columns:
106
  df['date'] = pd.to_datetime(df['date'])
107
 
108
+ # --- 1. PRECISE GEOMETRIC CENTERS ---
 
109
  state_centers = {
110
  'Andaman and Nicobar Islands': (11.7401, 92.6586),
111
  'Andhra Pradesh': (15.9129, 79.7400),
 
119
  'Goa': (15.2993, 74.1240),
120
  'Gujarat': (22.2587, 71.1924),
121
  'Haryana': (29.0588, 76.0856),
122
+ 'Himachal Pradesh': (31.9579, 77.1095), # Corrected
123
  'Jammu and Kashmir': (33.7782, 76.5762),
124
  'Jharkhand': (23.6102, 85.2799),
125
  'Karnataka': (15.3173, 75.7139),
 
145
  'West Bengal': (22.9868, 87.8550)
146
  }
147
 
148
+ # --- 2. ANISOTROPIC SPREADS (Shape of the State) ---
149
+ # format: (lat_spread, lon_spread) in degrees
150
+ # This prevents "Thin" states from spilling into the ocean/neighbors
151
+ state_spreads = {
152
+ 'Kerala': (1.2, 0.25), # Tall and Thin
153
+ 'West Bengal': (1.4, 0.4), # Tall and Thin
154
+ 'Assam': (0.4, 1.8), # Wide
155
+ 'Maharashtra': (1.2, 2.0), # Wide
156
+ 'Uttar Pradesh': (1.0, 2.2),# Wide
157
+ 'Bihar': (0.8, 1.5), # Wide
158
+ 'Delhi': (0.08, 0.1), # Tiny
159
+ 'Goa': (0.15, 0.15), # Tiny
160
+ 'Chandigarh': (0.03, 0.03), # City
161
+ 'Gujarat': (1.0, 1.3),
162
+ 'Rajasthan': (1.8, 1.8),
163
+ 'Madhya Pradesh': (1.5, 2.0),
164
+ 'Andaman and Nicobar Islands': (1.5, 0.2), # Archipelago (Tall)
165
+ 'Himachal Pradesh': (0.5, 0.6)
166
+ }
167
+
168
+ default_spread = (0.6, 0.6)
169
+
170
  def get_coords(row):
171
  state = row.get('state', 'Delhi')
172
  district = str(row.get('district', 'Unknown'))
173
 
 
174
  base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629))
175
+ lat_scale, lon_scale = state_spreads.get(state, default_spread)
176
 
177
+ # --- 3. SEMANTIC OFFSETTING ---
178
+ # If district name contains direction, bias the jitter
179
+ lat_bias, lon_bias = 0, 0
180
+ d_lower = district.lower()
181
 
182
+ # Bias factor (percent of scale)
183
+ bias_factor = 0.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ if 'north' in d_lower: lat_bias += lat_scale * bias_factor
186
+ if 'south' in d_lower: lat_bias -= lat_scale * bias_factor
187
+ if 'east' in d_lower: lon_bias += lon_scale * bias_factor
188
+ if 'west' in d_lower: lon_bias -= lon_scale * bias_factor
189
 
190
+ # --- 4. DETERMINISTIC RANDOMNESS ---
191
  district_hash = hash(state + district)
192
  np.random.seed(district_hash % 2**32)
193
 
194
+ # Random component (reduced if bias is present to keep it focused)
195
+ random_factor = 0.5 if (lat_bias != 0 or lon_bias != 0) else 1.0
 
196
 
197
+ dist_lat = np.random.uniform(-lat_scale * random_factor, lat_scale * random_factor)
198
+ dist_lon = np.random.uniform(-lon_scale * random_factor, lon_scale * random_factor)
199
+
200
+ # --- 5. MICRO NOISE (Avoid overlapping dots) ---
201
+ np.random.seed(None)
202
+ noise = 0.03
203
 
204
  return pd.Series({
205
+ 'lat': base_lat + lat_bias + dist_lat + np.random.normal(0, noise),
206
+ 'lon': base_lon + lon_bias + dist_lon + np.random.normal(0, noise)
207
  })
208
 
 
209
  coords = df.apply(get_coords, axis=1)
210
  df['lat'] = coords['lat']
211
  df['lon'] = coords['lon']
212
 
 
213
  df['risk_category'] = pd.cut(
214
  df['RISK_SCORE'],
215
  bins=[-1, 50, 75, 85, 100],
216
  labels=['Low', 'Medium', 'High', 'Critical']
217
  )
 
218
  return df
219
 
 
220
  df = load_data()
221
 
222
  # 4. SIDEBAR & FILTERS
 
224
  st.markdown("### S.T.A.R.K AI Control")
225
  st.markdown("---")
226
 
 
227
  state_list = ['All'] + sorted(df['state'].unique().tolist())
228
  selected_state = st.selectbox("Select State", state_list)
229
 
 
230
  if selected_state != 'All':
231
  filtered_df = df[df['state'] == selected_state]
232
  district_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
 
240
  filtered_df = filtered_df[filtered_df['district'] == selected_district]
241
 
242
  st.markdown("---")
 
 
243
  risk_filter = st.multiselect(
244
  "Risk Level",
245
  options=['Low', 'Medium', 'High', 'Critical'],
246
  default=['High', 'Critical']
247
  )
 
248
  if risk_filter:
249
  filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
250
 
251
  st.markdown("---")
 
 
 
252
  st.link_button("Open Notebook in Colab", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing")
 
253
  st.markdown("---")
254
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571")
255
 
256
+ # 5. HEADER
257
  col1, col2 = st.columns([3, 1])
258
  with col1:
259
  st.title("Project S.T.A.R.K AI Dashboard")
260
  st.markdown("Context-Aware Fraud Detection System")
 
261
  with col2:
262
  st.markdown("""
263
  <div style="text-align: right; padding-top: 20px;">
 
268
 
269
  st.markdown("---")
270
 
271
+ # METRICS
272
  m1, m2, m3, m4 = st.columns(4)
273
  total_centers = len(filtered_df)
274
  high_risk = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
 
280
  m3.metric("Avg. Risk Score", f"{avg_risk:.1f}/100", border=True)
281
  m4.metric("Weekend Spikes", f"{weekend_alerts}", "Unauthorized", delta_color="off", border=True)
282
 
283
+ st.markdown("##")
284
 
285
+ # 6. TABS
286
  tab_map, tab_list, tab_charts = st.tabs(["Geographic Risk", "Priority List", "Pattern Analytics"])
287
 
 
288
  with tab_map:
289
  col_map, col_details = st.columns([3, 1])
 
290
  with col_map:
291
  if not filtered_df.empty:
 
292
  fig_map = px.scatter_mapbox(
293
  filtered_df,
294
+ lat="lat", lon="lon",
 
295
  color="RISK_SCORE",
296
  size="total_activity",
 
297
  color_continuous_scale=["#22c55e", "#eab308", "#ef4444"],
298
  size_max=20,
299
  zoom=4.5 if selected_state != 'All' else 3.5,
300
+ center={"lat": 22.0, "lon": 80.0},
301
  hover_name="pincode",
302
  hover_data={"district": True, "state": True, "RISK_SCORE": True, "lat": False, "lon": False},
303
  mapbox_style="open-street-map",
 
314
  if not filtered_df.empty:
315
  top_districts = filtered_df.groupby('district')['RISK_SCORE'].mean().sort_values(ascending=False).head(5)
316
  for district, score in top_districts.items():
 
317
  color = "#ef4444" if score > 80 else "#f59e0b"
318
  st.markdown(f"""
319
  <div style="background: white; padding: 12px; border-radius: 8px; border-left: 5px solid {color}; margin-bottom: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
 
322
  </div>
323
  """, unsafe_allow_html=True)
324
 
 
325
  with tab_list:
326
  st.subheader("Target Investigation List")
327
  st.markdown("Filter: *Showing centers with Risk Score > 75*")
 
328
  target_list = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
 
329
  st.dataframe(
330
  target_list[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
331
  column_config={
332
+ "RISK_SCORE": st.column_config.ProgressColumn("Risk Probability", format="%d%%", min_value=0, max_value=100),
 
 
 
 
 
 
333
  "date": st.column_config.DateColumn("Date", format="DD MMM YYYY"),
334
  "total_activity": st.column_config.NumberColumn("Volume"),
335
  "enrol_adult": st.column_config.NumberColumn("Adult Enrols"),
336
  },
337
+ use_container_width=True, hide_index=True, height=400
 
 
338
  )
 
 
339
  csv = target_list.to_csv(index=False).encode('utf-8')
340
+ st.download_button("Download CSV", data=csv, file_name="uidai_stark_priority_list.csv", mime="text/csv", type="primary")
 
 
 
 
 
 
341
 
 
342
  with tab_charts:
343
  c1, c2 = st.columns(2)
 
344
  with c1:
345
+ st.subheader("Ghost ID Pattern")
 
346
  fig_scatter = px.scatter(
347
+ filtered_df, x="total_activity", y="ratio_deviation",
 
 
348
  color="risk_category",
349
  color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'},
350
  title="Deviation from District Baseline",
351
+ labels={"ratio_deviation": "Deviation Score", "total_activity": "Daily Transactions"}
 
352
  )
353
  fig_scatter.add_hline(y=0.2, line_dash="dash", line_color="red", annotation_text="Fraud Threshold")
354
  st.plotly_chart(fig_scatter, use_container_width=True)
 
355
  with c2:
356
  st.subheader("Risk Distribution")
357
+ fig_hist = px.histogram(filtered_df, x="RISK_SCORE", nbins=20, color_discrete_sequence=['#3b82f6'], title="Frequency of Risk Scores")
 
 
 
 
 
 
 
358
  fig_hist.update_layout(bargap=0.1)
359
  st.plotly_chart(fig_hist, use_container_width=True)
360
 
 
361
  st.markdown("---")
362
+ st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)