LovnishVerma commited on
Commit
79c14a3
Β·
verified Β·
1 Parent(s): 3cb671d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -76
app.py CHANGED
@@ -63,13 +63,76 @@ st.markdown("""
63
  # 3. ENHANCED DATA LOADING
64
  @st.cache_data(ttl=300)
65
  def load_data():
66
- # Strictly load data from CSV
67
- df = pd.read_csv('analyzed_aadhaar_data.csv')
68
- # Removed st.toast from inside cached function to prevent CacheReplayClosureError
 
 
 
69
 
70
  if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
71
 
72
- # Precise Geometric Centers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  state_centers = {
74
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
75
  'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
@@ -84,75 +147,60 @@ def load_data():
84
  'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
85
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
86
  }
87
-
88
- # EXPANDED Aspect Ratio Definitions (Lat spread, Lon spread)
89
- state_spreads = {
90
- 'Kerala': (1.2, 0.25), 'West Bengal': (1.4, 0.4), 'Assam': (0.4, 1.8),
91
- 'Maharashtra': (1.8, 2.2), 'Uttar Pradesh': (1.2, 2.5), 'Bihar': (0.8, 1.5),
92
- 'Delhi': (0.1, 0.12), 'Goa': (0.15, 0.15), 'Chandigarh': (0.04, 0.04),
93
- 'Gujarat': (1.5, 1.8), 'Rajasthan': (2.0, 2.0), 'Madhya Pradesh': (1.8, 2.5),
94
- 'Himachal Pradesh': (0.6, 0.8), 'Punjab': (0.8, 0.9), 'Haryana': (0.9, 0.8),
95
- 'Tamil Nadu': (1.2, 1.0), 'Karnataka': (1.5, 1.2), 'Telangana': (1.0, 1.0),
96
- 'Andhra Pradesh': (1.5, 1.5), 'Odisha': (1.2, 1.2), 'Chhattisgarh': (1.5, 0.9),
97
- 'Jharkhand': (0.8, 1.0), 'Jammu and Kashmir': (1.0, 1.5), 'Ladakh': (1.0, 1.5),
98
- 'Uttarakhand': (0.7, 0.8)
99
- }
100
 
101
  def get_coords(row):
102
- state = row.get('state', 'Delhi')
103
- district = str(row.get('district', 'Unknown')).lower()
104
- base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629))
105
-
106
- # Safer Default if state not found
107
- lat_scale, lon_scale = state_spreads.get(state, (0.7, 0.7))
108
-
109
- lat_bias, lon_bias = 0, 0
110
- bias = 0.6
111
-
112
- if 'north' in district: lat_bias += lat_scale * bias
113
- if 'south' in district: lat_bias -= lat_scale * bias
114
- if 'east' in district: lon_bias += lon_scale * bias
115
- if 'west' in district: lon_bias -= lon_scale * bias
116
 
 
 
 
 
 
 
 
 
117
  np.random.seed(hash(state + district) % 2**32)
118
- rf = 0.5 if (lat_bias or lon_bias) else 1.0
119
-
120
  return pd.Series({
121
- 'lat': base_lat + lat_bias + np.random.uniform(-lat_scale*rf, lat_scale*rf) + np.random.normal(0, 0.04),
122
- 'lon': base_lon + lon_bias + np.random.uniform(-lon_scale*rf, lon_scale*rf) + np.random.normal(0, 0.04)
123
  })
124
 
125
  coords = df.apply(get_coords, axis=1)
126
  df['lat'], df['lon'] = coords['lat'], coords['lon']
 
 
127
  df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
128
  return df
129
 
130
  with st.spinner('Loading S.T.A.R.K AI System...'):
131
  df = load_data()
132
- # Toast moved outside cached function
133
- # st.toast("βœ… Data loaded successfully", icon="βœ…")
134
 
135
  # 4. SIDEBAR & FILTERS
136
  with st.sidebar:
137
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
138
  st.markdown("---")
139
- if 'date' in df.columns:
140
- min_d, max_d = df['date'].min().date(), df['date'].max().date()
141
- dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
142
- if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
143
-
144
- state_list = ['All'] + sorted(df['state'].unique().tolist())
145
- sel_state = st.selectbox("State", state_list)
146
- filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
147
-
148
- dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
149
- sel_dist = st.selectbox("District", dist_list)
150
- if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
151
-
152
- st.markdown("---")
153
- risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
154
- if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  st.markdown("---")
157
  st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
158
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
@@ -166,13 +214,18 @@ with col2:
166
  st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
167
 
168
  st.markdown("---")
169
- m1, m2, m3, m4, m5 = st.columns(5)
170
- total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
171
- m1.metric("Total Centers", f"{total:,}", border=True)
172
- m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
173
- m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
174
- m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
175
- m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
 
 
 
 
 
176
  st.markdown("##")
177
 
178
  # 6. TABS
@@ -183,11 +236,11 @@ with tab_map:
183
  with c_map:
184
  if not filtered_df.empty:
185
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
186
- color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=4.8 if sel_state != 'All' else 3.8,
187
- center={"lat": 22.0, "lon": 80.0}, hover_name="district", mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
188
  fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
189
  st.plotly_chart(fig, use_container_width=True)
190
- else: st.warning("No data found.")
191
 
192
  with c_det:
193
  st.subheader("πŸ”₯ Top Hotspots")
@@ -199,26 +252,30 @@ with tab_map:
199
 
200
  with tab_list:
201
  st.subheader("🎯 Priority Investigation")
202
- targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
203
- csv = targets.to_csv(index=False).encode('utf-8')
204
- st.download_button("πŸ“₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
205
- st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
206
- column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
 
 
207
 
208
  with tab_charts:
209
  c1, c2 = st.columns(2)
210
  with c1:
211
  st.markdown("**Ghost ID Detection**")
212
- fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
213
- color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
214
- fig.add_hline(y=0.2, line_dash="dash", line_color="red")
215
- st.plotly_chart(fig, use_container_width=True)
 
216
  with c2:
217
  st.markdown("**Weekend Activity Analysis**")
218
- wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
219
- wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
220
- fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
221
- st.plotly_chart(fig, use_container_width=True)
 
222
 
223
  with tab_insights:
224
  st.subheader("πŸ” AI Detective Insights")
 
63
  # 3. ENHANCED DATA LOADING
64
  @st.cache_data(ttl=300)
65
  def load_data():
66
+ # Strictly load data from CSV - NO RANDOM GENERATION
67
+ try:
68
+ df = pd.read_csv('analyzed_aadhaar_data.csv')
69
+ except FileNotFoundError:
70
+ st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the file.")
71
+ return pd.DataFrame()
72
 
73
  if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
74
 
75
+ # --- PRECISE DISTRICT GEOLOCATION DATABASE ---
76
+ # Manually curated high-precision coordinates for known districts in the dataset
77
+ district_coords = {
78
+ # High Priority Districts from Snippet
79
+ 'Gautam Buddha Nagar': (28.39, 77.65), # Uttar Pradesh
80
+ 'West Jaintia Hills': (25.55, 92.38), # Meghalaya
81
+ 'West Khasi Hills': (25.56, 91.29), # Meghalaya
82
+ 'Bijapur': (18.80, 80.82), # Chhattisgarh
83
+ 'Dhule': (20.90, 74.77), # Maharashtra
84
+ 'Dhamtari': (20.71, 81.55), # Chhattisgarh
85
+ 'Udupi': (13.34, 74.75), # Karnataka
86
+ 'Supaul': (26.29, 86.82), # Bihar
87
+ 'Puruliya': (23.25, 86.50), # West Bengal
88
+
89
+ # Major Metros & Hubs (Commonly appear)
90
+ 'Mumbai': (19.0760, 72.8777),
91
+ 'Pune': (18.5204, 73.8567),
92
+ 'Nagpur': (21.1458, 79.0882),
93
+ 'Thane': (19.2183, 72.9781),
94
+ 'Nashik': (19.9975, 73.7898),
95
+ 'Lucknow': (26.8467, 80.9462),
96
+ 'Kanpur': (26.4499, 80.3319),
97
+ 'Ghaziabad': (28.6692, 77.4538),
98
+ 'Agra': (27.1767, 78.0081),
99
+ 'Varanasi': (25.3176, 82.9739),
100
+ 'Patna': (25.5941, 85.1376),
101
+ 'Gaya': (24.7914, 85.0002),
102
+ 'Muzaffarpur': (26.1197, 85.3910),
103
+ 'Bangalore': (12.9716, 77.5946), 'Bengaluru': (12.9716, 77.5946),
104
+ 'Mysore': (12.2958, 76.6394),
105
+ 'Hubli': (15.3647, 75.1240),
106
+ 'Mangalore': (12.9141, 74.8560),
107
+ 'Belgaum': (15.8497, 74.4977),
108
+ 'Chennai': (13.0827, 80.2707),
109
+ 'Coimbatore': (11.0168, 76.9558),
110
+ 'Madurai': (9.9252, 78.1198),
111
+ 'Kolkata': (22.5726, 88.3639),
112
+ 'Howrah': (22.5958, 88.2636),
113
+ 'Darjeeling': (27.0410, 88.2663),
114
+ 'Ahmedabad': (23.0225, 72.5714),
115
+ 'Surat': (21.1702, 72.8311),
116
+ 'Vadodara': (22.3072, 73.1812),
117
+ 'Rajkot': (22.3039, 70.8022),
118
+ 'Jaipur': (26.9124, 75.7873),
119
+ 'Jodhpur': (26.2389, 73.0243),
120
+ 'Udaipur': (24.5854, 73.7125),
121
+ 'Hyderabad': (17.3850, 78.4867),
122
+ 'Warangal': (17.9689, 79.5941),
123
+ 'Bhopal': (23.2599, 77.4126),
124
+ 'Indore': (22.7196, 75.8577),
125
+ 'Raipur': (21.2514, 81.6296),
126
+ 'Bilaspur': (22.0797, 82.1409),
127
+ 'Guwahati': (26.1445, 91.7362),
128
+ 'Visakhapatnam': (17.6868, 83.2185),
129
+ 'Vijayawada': (16.5062, 80.6480),
130
+ 'Thiruvananthapuram': (8.5241, 76.9366),
131
+ 'Kochi': (9.9312, 76.2673),
132
+ 'Kozhikode': (11.2588, 75.7804)
133
+ }
134
+
135
+ # Fallback State Centers (Only used if District is NOT in above list)
136
  state_centers = {
137
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
138
  'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
 
147
  'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
148
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
149
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
  def get_coords(row):
152
+ district = str(row.get('district', '')).strip()
153
+ state = row.get('state', '')
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ # 1. Try Exact District Match
156
+ if district in district_coords:
157
+ base_lat, base_lon = district_coords[district]
158
+ # Tiny jitter just to separate overlapping dots from same district
159
+ return pd.Series({'lat': base_lat + np.random.normal(0, 0.005), 'lon': base_lon + np.random.normal(0, 0.005)})
160
+
161
+ # 2. Fallback to State Center with Randomized Jitter (Only if district unknown)
162
+ center = state_centers.get(state, (20.5937, 78.9629))
163
  np.random.seed(hash(state + district) % 2**32)
 
 
164
  return pd.Series({
165
+ 'lat': center[0] + np.random.uniform(-0.5, 0.5),
166
+ 'lon': center[1] + np.random.uniform(-0.5, 0.5)
167
  })
168
 
169
  coords = df.apply(get_coords, axis=1)
170
  df['lat'], df['lon'] = coords['lat'], coords['lon']
171
+
172
+ # Recalculate Risk Category based on real data
173
  df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
174
  return df
175
 
176
  with st.spinner('Loading S.T.A.R.K AI System...'):
177
  df = load_data()
 
 
178
 
179
  # 4. SIDEBAR & FILTERS
180
  with st.sidebar:
181
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
182
  st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ if not df.empty:
185
+ if 'date' in df.columns:
186
+ min_d, max_d = df['date'].min().date(), df['date'].max().date()
187
+ dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
188
+ if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
189
+
190
+ state_list = ['All'] + sorted(df['state'].unique().tolist())
191
+ sel_state = st.selectbox("State", state_list)
192
+ filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
193
+
194
+ dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
195
+ sel_dist = st.selectbox("District", dist_list)
196
+ if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
197
+
198
+ st.markdown("---")
199
+ risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
200
+ if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
201
+ else:
202
+ filtered_df = pd.DataFrame()
203
+
204
  st.markdown("---")
205
  st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
206
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
 
214
  st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
215
 
216
  st.markdown("---")
217
+
218
+ if not filtered_df.empty:
219
+ m1, m2, m3, m4, m5 = st.columns(5)
220
+ total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
221
+ m1.metric("Total Centers", f"{total:,}", border=True)
222
+ m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
223
+ m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
224
+ m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
225
+ m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
226
+ else:
227
+ st.warning("No data available to calculate metrics.")
228
+
229
  st.markdown("##")
230
 
231
  # 6. TABS
 
236
  with c_map:
237
  if not filtered_df.empty:
238
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
239
+ color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=3.8 if sel_state == 'All' else 5.5,
240
+ center={"lat": 22.0, "lon": 80.0}, hover_name="district", hover_data={"state":True, "pincode":True}, mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
241
  fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
242
  st.plotly_chart(fig, use_container_width=True)
243
+ else: st.warning("No data found to map.")
244
 
245
  with c_det:
246
  st.subheader("πŸ”₯ Top Hotspots")
 
252
 
253
  with tab_list:
254
  st.subheader("🎯 Priority Investigation")
255
+ if not filtered_df.empty:
256
+ targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
257
+ csv = targets.to_csv(index=False).encode('utf-8')
258
+ st.download_button("πŸ“₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
259
+ st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
260
+ column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
261
+ else: st.warning("No data available.")
262
 
263
  with tab_charts:
264
  c1, c2 = st.columns(2)
265
  with c1:
266
  st.markdown("**Ghost ID Detection**")
267
+ if not filtered_df.empty:
268
+ fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
269
+ color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
270
+ fig.add_hline(y=0.2, line_dash="dash", line_color="red")
271
+ st.plotly_chart(fig, use_container_width=True)
272
  with c2:
273
  st.markdown("**Weekend Activity Analysis**")
274
+ if not filtered_df.empty:
275
+ wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
276
+ wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
277
+ fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
278
+ st.plotly_chart(fig, use_container_width=True)
279
 
280
  with tab_insights:
281
  st.subheader("πŸ” AI Detective Insights")