LovnishVerma commited on
Commit
d89ad44
Β·
verified Β·
1 Parent(s): c81a2cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -105
app.py CHANGED
@@ -3,6 +3,8 @@ import pandas as pd
3
  import plotly.express as px
4
  import plotly.graph_objects as go
5
  import numpy as np
 
 
6
  from datetime import datetime, timedelta
7
 
8
  # 1. PAGE CONFIGURATION
@@ -60,10 +62,79 @@ st.markdown("""
60
  </style>
61
  """, unsafe_allow_html=True)
62
 
63
- # 3. ENHANCED DATA LOADING
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  @st.cache_data(ttl=300)
65
  def load_data():
66
- # Strictly load data from CSV - NO RANDOM GENERATION
67
  try:
68
  df = pd.read_csv('analyzed_aadhaar_data.csv')
69
  except FileNotFoundError:
@@ -72,87 +143,17 @@ def load_data():
72
 
73
  if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
74
 
75
- # --- PRECISE DISTRICT GEOLOCATION DATABASE ---
76
- # Manually curated high-precision coordinates for known districts in the dataset
77
- district_coords = {
78
- # High Priority Districts from Snippet
79
- 'Gautam Buddha Nagar': (28.39, 77.65), # Uttar Pradesh
80
- 'West Jaintia Hills': (25.55, 92.38), # Meghalaya
81
- 'West Khasi Hills': (25.56, 91.29), # Meghalaya
82
- 'Bijapur': (18.80, 80.82), # Chhattisgarh
83
- 'Dhule': (20.90, 74.77), # Maharashtra
84
- 'Dhamtari': (20.71, 81.55), # Chhattisgarh
85
- 'Udupi': (13.34, 74.75), # Karnataka
86
- 'Supaul': (26.29, 86.82), # Bihar
87
- 'Puruliya': (23.25, 86.50), # West Bengal
88
-
89
- # Major Metros & Hubs (Commonly appear)
90
- 'Mumbai': (19.0760, 72.8777),
91
- 'Pune': (18.5204, 73.8567),
92
- 'Nagpur': (21.1458, 79.0882),
93
- 'Thane': (19.2183, 72.9781),
94
- 'Nashik': (19.9975, 73.7898),
95
- 'Lucknow': (26.8467, 80.9462),
96
- 'Kanpur': (26.4499, 80.3319),
97
- 'Ghaziabad': (28.6692, 77.4538),
98
- 'Agra': (27.1767, 78.0081),
99
- 'Varanasi': (25.3176, 82.9739),
100
- 'Patna': (25.5941, 85.1376),
101
- 'Gaya': (24.7914, 85.0002),
102
- 'Muzaffarpur': (26.1197, 85.3910),
103
- 'Bangalore': (12.9716, 77.5946), 'Bengaluru': (12.9716, 77.5946),
104
- 'Mysore': (12.2958, 76.6394),
105
- 'Hubli': (15.3647, 75.1240),
106
- 'Mangalore': (12.9141, 74.8560),
107
- 'Belgaum': (15.8497, 74.4977),
108
- 'Chennai': (13.0827, 80.2707),
109
- 'Coimbatore': (11.0168, 76.9558),
110
- 'Madurai': (9.9252, 78.1198),
111
- 'Kolkata': (22.5726, 88.3639),
112
- 'Howrah': (22.5958, 88.2636),
113
- 'Darjeeling': (27.0410, 88.2663),
114
- 'Ahmedabad': (23.0225, 72.5714),
115
- 'Surat': (21.1702, 72.8311),
116
- 'Vadodara': (22.3072, 73.1812),
117
- 'Rajkot': (22.3039, 70.8022),
118
- 'Jaipur': (26.9124, 75.7873),
119
- 'Jodhpur': (26.2389, 73.0243),
120
- 'Udaipur': (24.5854, 73.7125),
121
- 'Hyderabad': (17.3850, 78.4867),
122
- 'Warangal': (17.9689, 79.5941),
123
- 'Bhopal': (23.2599, 77.4126),
124
- 'Indore': (22.7196, 75.8577),
125
- 'Raipur': (21.2514, 81.6296),
126
- 'Bilaspur': (22.0797, 82.1409),
127
- 'Guwahati': (26.1445, 91.7362),
128
- 'Visakhapatnam': (17.6868, 83.2185),
129
- 'Vijayawada': (16.5062, 80.6480),
130
- 'Thiruvananthapuram': (8.5241, 76.9366),
131
- 'Kochi': (9.9312, 76.2673),
132
- 'Kozhikode': (11.2588, 75.7804),
133
- 'Shimla': (31.1048, 77.1734),
134
- 'Dehradun': (30.3165, 78.0322),
135
- 'Ranchi': (23.3441, 85.3096),
136
- 'Bhubaneswar': (20.2961, 85.8245),
137
- 'Chandigarh': (30.7333, 76.7794),
138
- 'Gandhinagar': (23.2156, 72.6369),
139
- 'Panaji': (15.4909, 73.8278),
140
- 'Srinagar': (34.0837, 74.7973),
141
- 'Jammu': (32.7266, 74.8570),
142
- 'Imphal': (24.8170, 93.9368),
143
- 'Shillong': (25.5788, 91.8933),
144
- 'Aizawl': (23.7271, 92.7176),
145
- 'Kohima': (25.6751, 94.1086),
146
- 'Gangtok': (27.3389, 88.6065),
147
- 'Agartala': (23.8315, 91.2868),
148
- 'Port Blair': (11.6234, 92.7265),
149
- 'Kavaratti': (10.5667, 72.6417),
150
- 'Puducherry': (11.9416, 79.8083),
151
- 'Silvassa': (20.2763, 73.0083),
152
- 'Daman': (20.3974, 72.8328)
153
- }
154
-
155
- # Fallback State Centers (Only used if District is NOT in above list)
156
  state_centers = {
157
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
158
  'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
@@ -168,35 +169,34 @@ def load_data():
168
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
169
  }
170
 
171
- def get_coords(row):
172
- district = str(row.get('district', '')).strip()
173
- state = row.get('state', '')
174
 
175
- # 1. Try Exact District Match
176
- if district in district_coords:
177
- base_lat, base_lon = district_coords[district]
178
- # Tiny jitter just to separate overlapping dots from same district
179
- return pd.Series({'lat': base_lat + np.random.normal(0, 0.005), 'lon': base_lon + np.random.normal(0, 0.005)})
180
 
181
- # 2. Fallback to State Center with Randomized Jitter (Only if district unknown)
182
- center = state_centers.get(state, (20.5937, 78.9629))
183
- np.random.seed(hash(state + district) % 2**32)
184
  return pd.Series({
185
- 'lat': center[0] + np.random.uniform(-0.5, 0.5),
186
  'lon': center[1] + np.random.uniform(-0.5, 0.5)
187
  })
 
 
 
 
188
 
189
- coords = df.apply(get_coords, axis=1)
190
- df['lat'], df['lon'] = coords['lat'], coords['lon']
191
-
192
- # Recalculate Risk Category based on real data
193
  df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
194
  return df
195
 
196
- with st.spinner('Loading S.T.A.R.K AI System...'):
197
  df = load_data()
198
 
199
- # 4. SIDEBAR & FILTERS
200
  with st.sidebar:
201
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
202
  st.markdown("---")
@@ -225,7 +225,7 @@ with st.sidebar:
225
  st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
226
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
227
 
228
- # 5. HEADER & METRICS
229
  col1, col2 = st.columns([3, 1])
230
  with col1:
231
  st.title("πŸ›‘οΈ S.T.A.R.K AI Dashboard")
@@ -248,16 +248,24 @@ else:
248
 
249
  st.markdown("##")
250
 
251
- # 6. TABS
252
  tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
253
 
254
  with tab_map:
255
  c_map, c_det = st.columns([3, 1])
256
  with c_map:
257
  if not filtered_df.empty:
 
 
 
 
 
258
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
259
- color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=3.8 if sel_state == 'All' else 5.5,
260
- center={"lat": 22.0, "lon": 80.0}, hover_name="district", hover_data={"state":True, "pincode":True}, mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
 
 
 
261
  fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
262
  st.plotly_chart(fig, use_container_width=True)
263
  else: st.warning("No data found to map.")
 
3
  import plotly.express as px
4
  import plotly.graph_objects as go
5
  import numpy as np
6
+ import requests
7
+ import time
8
  from datetime import datetime, timedelta
9
 
10
  # 1. PAGE CONFIGURATION
 
62
  </style>
63
  """, unsafe_allow_html=True)
64
 
65
+ # 3. DYNAMIC GEOCODING ENGINE
66
+ @st.cache_data(show_spinner=False)
67
+ def fetch_coordinates_batch(unique_locations):
68
+ """
69
+ Fetches coordinates from OpenStreetMap Nominatim API.
70
+ unique_locations: List of tuples (District, State)
71
+ Returns: Dictionary {(District, State): (lat, lon)}
72
+ """
73
+ # 1. Pre-filled Cache (For speed & redundancy)
74
+ coords_map = {
75
+ ('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
76
+ ('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
77
+ ('West Khasi Hills', 'Meghalaya'): (25.56, 91.29),
78
+ ('Bijapur', 'Chhattisgarh'): (18.80, 80.82),
79
+ ('Dhule', 'Maharashtra'): (20.90, 74.77),
80
+ ('Dhamtari', 'Chhattisgarh'): (20.71, 81.55),
81
+ ('Udupi', 'Karnataka'): (13.34, 74.75),
82
+ ('Supaul', 'Bihar'): (26.29, 86.82),
83
+ ('Puruliya', 'West Bengal'): (23.25, 86.50),
84
+ ('Mumbai', 'Maharashtra'): (19.0760, 72.8777),
85
+ ('Pune', 'Maharashtra'): (18.5204, 73.8567),
86
+ ('Bangalore', 'Karnataka'): (12.9716, 77.5946),
87
+ ('Bengaluru', 'Karnataka'): (12.9716, 77.5946),
88
+ ('Chennai', 'Tamil Nadu'): (13.0827, 80.2707),
89
+ ('Hyderabad', 'Telangana'): (17.3850, 78.4867),
90
+ ('Kolkata', 'West Bengal'): (22.5726, 88.3639),
91
+ ('Delhi', 'Delhi'): (28.7041, 77.1025),
92
+ ('Shimla', 'Himachal Pradesh'): (31.1048, 77.1734)
93
+ }
94
+
95
+ # 2. Identify missing locations
96
+ missing_locs = [loc for loc in unique_locations if loc not in coords_map]
97
+
98
+ if not missing_locs:
99
+ return coords_map
100
+
101
+ # 3. Dynamic Fetching for missing
102
+ progress_text = "πŸ“‘ Connecting to Satellite Geocoding API..."
103
+ my_bar = st.progress(0, text=progress_text)
104
+
105
+ headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
106
+
107
+ for i, (district, state) in enumerate(missing_locs):
108
+ try:
109
+ # Update Progress
110
+ my_bar.progress((i + 1) / len(missing_locs), text=f"πŸ“ Locating: {district}, {state}")
111
+
112
+ # API Call
113
+ query = f"{district}, {state}, India"
114
+ url = "https://nominatim.openstreetmap.org/search"
115
+ params = {'q': query, 'format': 'json', 'limit': 1}
116
+
117
+ response = requests.get(url, params=params, headers=headers, timeout=5)
118
+
119
+ if response.status_code == 200 and response.json():
120
+ data = response.json()[0]
121
+ coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
122
+ else:
123
+ # Fallback if API fails: Keep existing State Centers logic inside main loop later
124
+ pass
125
+
126
+ # Respect Rate Limiting (1 request per second)
127
+ time.sleep(1.1)
128
+
129
+ except Exception as e:
130
+ continue
131
+
132
+ my_bar.empty()
133
+ return coords_map
134
+
135
+ # 4. MAIN DATA LOADER
136
  @st.cache_data(ttl=300)
137
  def load_data():
 
138
  try:
139
  df = pd.read_csv('analyzed_aadhaar_data.csv')
140
  except FileNotFoundError:
 
143
 
144
  if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
145
 
146
+ # Clean Data
147
+ df['district'] = df['district'].astype(str).str.strip()
148
+ df['state'] = df['state'].astype(str).str.strip()
149
+
150
+ # Get Unique Locations
151
+ unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
152
+
153
+ # Fetch Coordinates (Cached)
154
+ coords_db = fetch_coordinates_batch(unique_locs)
155
+
156
+ # Fallback Centers (State Capitals)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  state_centers = {
158
  'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
159
  'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
 
169
  'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
170
  }
171
 
172
+ def get_lat_lon(row):
173
+ key = (row['district'], row['state'])
 
174
 
175
+ # 1. Check Exact Match from API/Cache
176
+ if key in coords_db:
177
+ lat, lon = coords_db[key]
178
+ # Tiny jitter to separate stacked points
179
+ return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
180
 
181
+ # 2. Fallback to State Center
182
+ center = state_centers.get(row['state'], (20.5937, 78.9629))
183
+ np.random.seed(hash(key) % 2**32)
184
  return pd.Series({
185
+ 'lat': center[0] + np.random.uniform(-0.5, 0.5),
186
  'lon': center[1] + np.random.uniform(-0.5, 0.5)
187
  })
188
+
189
+ coords = df.apply(get_lat_lon, axis=1)
190
+ df['lat'] = coords['lat']
191
+ df['lon'] = coords['lon']
192
 
 
 
 
 
193
  df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
194
  return df
195
 
196
+ with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
197
  df = load_data()
198
 
199
+ # 5. SIDEBAR & FILTERS
200
  with st.sidebar:
201
  st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
202
  st.markdown("---")
 
225
  st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
226
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
227
 
228
+ # 6. HEADER & METRICS
229
  col1, col2 = st.columns([3, 1])
230
  with col1:
231
  st.title("πŸ›‘οΈ S.T.A.R.K AI Dashboard")
 
248
 
249
  st.markdown("##")
250
 
251
+ # 7. TABS
252
  tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
253
 
254
  with tab_map:
255
  c_map, c_det = st.columns([3, 1])
256
  with c_map:
257
  if not filtered_df.empty:
258
+ # Dynamic Zoom based on selection
259
+ if sel_dist != 'All': zoom_lvl = 10
260
+ elif sel_state != 'All': zoom_lvl = 6
261
+ else: zoom_lvl = 3.8
262
+
263
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
264
+ color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
265
+ center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
266
+ hover_name="district", hover_data={"state":True, "pincode":True, "lat":False, "lon":False},
267
+ mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
268
+
269
  fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
270
  st.plotly_chart(fig, use_container_width=True)
271
  else: st.warning("No data found to map.")