LovnishVerma commited on
Commit
1ef7e77
Β·
verified Β·
1 Parent(s): a009ce9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -73
app.py CHANGED
@@ -9,7 +9,16 @@ import json
9
  import os
10
  from datetime import datetime, timedelta
11
 
 
 
 
 
 
 
 
 
12
  # 1. PAGE CONFIGURATION
 
13
  st.set_page_config(
14
  page_title="S.A.T.A.R.K AI | UIDAI Fraud Detection",
15
  page_icon="πŸ›‘οΈ",
@@ -17,13 +26,14 @@ st.set_page_config(
17
  initial_sidebar_state="expanded"
18
  )
19
 
 
20
  # 2. ROBUST CSS STYLING (Dark Mode Proof)
 
21
  st.markdown("""
22
  <style>
23
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
24
 
25
  /* --- 1. MAIN CONTENT AREA (Light Theme Enforced) --- */
26
- /* Target only the main content, NOT the sidebar */
27
  .stApp > header { background-color: transparent !important; }
28
 
29
  div[data-testid="stAppViewContainer"] {
@@ -51,17 +61,16 @@ st.markdown("""
51
  border-right: 1px solid #334155;
52
  }
53
 
54
- /* NUCLEAR OPTION: Force ALL text in sidebar to be White */
55
  section[data-testid="stSidebar"] * {
56
- color: #f8fafc !important; /* White Text */
57
  }
58
 
59
- /* EXCEPTION: Inputs inside Sidebar (Selectbox, DateInput) */
60
- /* These usually have white backgrounds, so we need Dark Text inside them */
61
  section[data-testid="stSidebar"] input,
62
  section[data-testid="stSidebar"] textarea,
63
  section[data-testid="stSidebar"] div[data-baseweb="select"] div {
64
- color: #0f172a !important; /* Dark Text for Inputs */
65
  -webkit-text-fill-color: #0f172a !important;
66
  }
67
 
@@ -98,7 +107,6 @@ st.markdown("""
98
  margin-bottom: 12px;
99
  box-shadow: 0 2px 4px rgba(0,0,0,0.05);
100
  }
101
- /* Since Hotspot Cards are in Main Area, text inherits Dark, which is good. */
102
 
103
  /* Status Badges */
104
  .status-badge {
@@ -111,82 +119,93 @@ st.markdown("""
111
  </style>
112
  """, unsafe_allow_html=True)
113
 
114
- # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
 
 
115
  @st.cache_data(show_spinner=False)
116
- def fetch_coordinates_batch(unique_locations):
117
- json_file = 'district_coords.json'
118
- coords_map = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
 
120
  if os.path.exists(json_file):
121
  try:
122
- with open(json_file, 'r') as f:
123
- loaded_data = json.load(f)
124
- for k, v in loaded_data.items():
125
- if "|" in k:
126
- d, s = k.split("|")
127
- coords_map[(d, s)] = tuple(v)
128
- except json.JSONDecodeError:
129
  pass
130
 
131
- prefills = {
132
- ('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
133
- ('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
134
- ('West Khasi Hills', 'Meghalaya'): (25.56, 91.29),
135
- ('Bijapur', 'Chhattisgarh'): (18.80, 80.82),
136
- ('Dhule', 'Maharashtra'): (20.90, 74.77),
137
- ('Dhamtari', 'Chhattisgarh'): (20.71, 81.55),
138
- ('Udupi', 'Karnataka'): (13.34, 74.75),
139
- ('Supaul', 'Bihar'): (26.29, 86.82),
140
- ('Puruliya', 'West Bengal'): (23.25, 86.50),
141
- ('Mumbai', 'Maharashtra'): (19.0760, 72.8777),
142
- ('Pune', 'Maharashtra'): (18.5204, 73.8567),
143
- ('Bangalore', 'Karnataka'): (12.9716, 77.5946),
144
- ('Bengaluru', 'Karnataka'): (12.9716, 77.5946),
145
- ('Chennai', 'Tamil Nadu'): (13.0827, 80.2707),
146
- ('Hyderabad', 'Telangana'): (17.3850, 78.4867),
147
- ('Kolkata', 'West Bengal'): (22.5726, 88.3639),
148
- ('Delhi', 'Delhi'): (28.7041, 77.1025),
149
- ('Shimla', 'Himachal Pradesh'): (31.1048, 77.1734)
150
- }
151
- for k, v in prefills.items():
152
- if k not in coords_map:
153
- coords_map[k] = v
154
 
155
- missing_locs = [loc for loc in unique_locations if loc not in coords_map]
156
- if not missing_locs:
157
  return coords_map
158
 
159
- progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
160
- my_bar = st.progress(0, text=progress_text)
161
- headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
162
- updated = False
 
 
163
 
164
- for i, (district, state) in enumerate(missing_locs):
 
165
  try:
166
- my_bar.progress((i + 1) / len(missing_locs), text=f"πŸ“ Locating: {district}, {state}")
167
- query = f"{district}, {state}, India"
168
  url = "https://nominatim.openstreetmap.org/search"
169
- params = {'q': query, 'format': 'json', 'limit': 1}
170
- response = requests.get(url, params=params, headers=headers, timeout=5)
171
-
172
- if response.status_code == 200 and response.json():
173
- data = response.json()[0]
174
- coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
 
 
 
175
  updated = True
176
- time.sleep(1.1)
 
 
 
 
 
 
177
  except Exception:
178
- continue
 
 
179
 
180
  my_bar.empty()
 
181
 
 
182
  if updated:
183
- save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
184
- with open(json_file, 'w') as f:
185
- json.dump(save_data, f)
186
 
187
  return coords_map
188
 
 
189
  # 4. MAIN DATA LOADER
 
190
  @st.cache_data(ttl=300)
191
  def load_data():
192
  try:
@@ -200,6 +219,11 @@ def load_data():
200
  df['district'] = df['district'].astype(str).str.strip()
201
  df['state'] = df['state'].astype(str).str.strip()
202
 
 
 
 
 
 
203
  state_mapping = {
204
  'Jammu & Kashmir': 'Jammu and Kashmir',
205
  'J&K': 'Jammu and Kashmir',
@@ -217,31 +241,37 @@ def load_data():
217
  }
218
  df['state'] = df['state'].replace(state_mapping)
219
 
220
- unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
221
- coords_db = fetch_coordinates_batch(unique_locs)
222
- state_centers = {
223
- 'Delhi': (28.7041, 77.1025), 'Maharashtra': (19.7515, 75.7139), 'Karnataka': (15.3173, 75.7139)
224
- }
225
 
226
  def get_lat_lon(row):
227
- key = (row['district'], row['state'])
228
- if key in coords_db:
229
- lat, lon = coords_db[key]
230
- return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
231
- center = state_centers.get(row['state'], (20.5937, 78.9629))
232
- np.random.seed(hash(key) % 2**32)
233
- return pd.Series({'lat': center[0] + np.random.uniform(-0.5, 0.5), 'lon': center[1] + np.random.uniform(-0.5, 0.5)})
 
 
 
234
 
235
  coords = df.apply(get_lat_lon, axis=1)
236
  df['lat'] = coords['lat']
237
  df['lon'] = coords['lon']
 
 
 
238
  df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
239
  return df
240
 
241
  with st.spinner('Initializing S.A.T.A.R.K AI...'):
242
  df = load_data()
243
 
 
244
  # 5. SIDEBAR & FILTERS
 
245
  with st.sidebar:
246
  st.markdown("### πŸ›‘οΈ S.A.T.A.R.K AI Control")
247
  st.markdown("---")
@@ -273,7 +303,9 @@ with st.sidebar:
273
  st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
274
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
275
 
 
276
  # 6. HEADER & METRICS
 
277
  col1, col2 = st.columns([3, 1])
278
  with col1:
279
  st.title("πŸ›‘οΈ S.A.T.A.R.K AI Dashboard")
@@ -299,7 +331,9 @@ else:
299
 
300
  st.markdown("##")
301
 
 
302
  # 7. TABS
 
303
  tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
304
 
305
  with tab_map:
@@ -307,6 +341,7 @@ with tab_map:
307
  with c_map:
308
  if not filtered_df.empty:
309
  zoom_lvl = 10 if sel_dist != 'All' else (6 if sel_state != 'All' else 3.8)
 
310
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
311
  color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
312
  center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
 
9
  import os
10
  from datetime import datetime, timedelta
11
 
12
+ # ==========================================
13
+ # 0. MAPBOX CONFIGURATION (FIX 1)
14
+ # ==========================================
15
+ # Required for stable map rendering. Replace with your own free token if needed.
16
+ # This sample token is a public default often used for demos.
17
+ px.set_mapbox_access_token("pk.eyJ1IjoiZGVtbyIsImEiOiJja2E0Z2QwZjgwMnZ5MnFwdWg1dm80YmQ1In0.SAMPLE")
18
+
19
+ # ==========================================
20
  # 1. PAGE CONFIGURATION
21
+ # ==========================================
22
  st.set_page_config(
23
  page_title="S.A.T.A.R.K AI | UIDAI Fraud Detection",
24
  page_icon="πŸ›‘οΈ",
 
26
  initial_sidebar_state="expanded"
27
  )
28
 
29
+ # ==========================================
30
  # 2. ROBUST CSS STYLING (Dark Mode Proof)
31
+ # ==========================================
32
  st.markdown("""
33
  <style>
34
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
35
 
36
  /* --- 1. MAIN CONTENT AREA (Light Theme Enforced) --- */
 
37
  .stApp > header { background-color: transparent !important; }
38
 
39
  div[data-testid="stAppViewContainer"] {
 
61
  border-right: 1px solid #334155;
62
  }
63
 
64
+ /* Force ALL text in sidebar to be White */
65
  section[data-testid="stSidebar"] * {
66
+ color: #f8fafc !important;
67
  }
68
 
69
+ /* EXCEPTION: Inputs inside Sidebar */
 
70
  section[data-testid="stSidebar"] input,
71
  section[data-testid="stSidebar"] textarea,
72
  section[data-testid="stSidebar"] div[data-baseweb="select"] div {
73
+ color: #0f172a !important;
74
  -webkit-text-fill-color: #0f172a !important;
75
  }
76
 
 
107
  margin-bottom: 12px;
108
  box-shadow: 0 2px 4px rgba(0,0,0,0.05);
109
  }
 
110
 
111
  /* Status Badges */
112
  .status-badge {
 
119
  </style>
120
  """, unsafe_allow_html=True)
121
 
122
+ # ==========================================
123
+ # 3. GEOCODING ENGINE (MASTER DB)
124
+ # ==========================================
125
  @st.cache_data(show_spinner=False)
126
+ def get_seed_data():
127
+ # Pre-loaded cache of critical pincodes from user data + major cities
128
+ # Ensures instant loading for known high-priority targets
129
+ return {
130
+ "783348": [26.13, 90.12], "785601": [26.51, 93.97], "783384": [26.48, 90.56],
131
+ "782124": [26.33, 92.75], "793150": [25.55, 92.38], "494444": [18.80, 80.82],
132
+ "201301": [28.58, 77.31], "424306": [20.90, 74.77], "793119": [25.56, 91.29],
133
+ "784115": [26.44, 92.00], "781123": [26.15, 91.22], "784505": [26.77, 92.70],
134
+ "493770": [20.71, 81.55], "576101": [13.34, 74.75], "854338": [26.29, 86.82],
135
+ "201305": [28.39, 77.65], "723146": [23.25, 86.50], "110001": [28.61, 77.21],
136
+ "560001": [12.97, 77.59], "400001": [18.93, 72.83], "700001": [22.57, 88.36],
137
+ "600001": [13.08, 80.27], "500001": [17.38, 78.48]
138
+ }
139
+
140
+ @st.cache_data(show_spinner=False)
141
+ def fetch_coordinates(unique_pincodes):
142
+ json_file = "india_pincode_master.json"
143
+ coords_map = get_seed_data() # Initialize with Seed Data
144
 
145
+ # 1. Load Master DB if exists
146
  if os.path.exists(json_file):
147
  try:
148
+ with open(json_file, "r") as f:
149
+ saved_map = json.load(f)
150
+ coords_map.update(saved_map) # Merge saved with seed
151
+ except:
 
 
 
152
  pass
153
 
154
+ # 2. Identify Missing Pincodes
155
+ # Convert to string to ensure consistent keys
156
+ missing = [str(p) for p in unique_pincodes if str(p) not in coords_map]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
+ if not missing:
 
159
  return coords_map
160
 
161
+ # 3. Fetch Missing from API
162
+ headers = {"User-Agent": "UIDAI-Sentinel-Dashboard/2.0 (Govt Hackathon)"}
163
+
164
+ # Placeholder for progress
165
+ progress_placeholder = st.empty()
166
+ my_bar = progress_placeholder.progress(0, text=f"πŸ“‘ Updating Master DB: 0/{len(missing)} pincodes...")
167
 
168
+ updated = False
169
+ for i, pin in enumerate(missing):
170
  try:
171
+ # Query format: "110001 India"
172
+ q = f"{pin} India"
173
  url = "https://nominatim.openstreetmap.org/search"
174
+ params = {"q": q, "format": "json", "limit": 1}
175
+
176
+ r = requests.get(url, params=params, headers=headers, timeout=5)
177
+
178
+ if r.status_code == 200 and r.json():
179
+ data = r.json()[0]
180
+ lat = float(data["lat"])
181
+ lon = float(data["lon"])
182
+ coords_map[str(pin)] = [lat, lon]
183
  updated = True
184
+ else:
185
+ # Fallback: India Centroid if not found
186
+ coords_map[str(pin)] = [20.5937, 78.9629]
187
+
188
+ # Respect API Rate Limits (Important for Free Tier)
189
+ time.sleep(1.0)
190
+
191
  except Exception:
192
+ coords_map[str(pin)] = [20.5937, 78.9629]
193
+
194
+ my_bar.progress((i + 1) / len(missing), text=f"πŸ“‘ Updating Master DB: {i+1}/{len(missing)}...")
195
 
196
  my_bar.empty()
197
+ progress_placeholder.empty()
198
 
199
+ # 4. Save Updates to Master DB
200
  if updated:
201
+ with open(json_file, "w") as f:
202
+ json.dump(coords_map, f)
 
203
 
204
  return coords_map
205
 
206
+ # ==========================================
207
  # 4. MAIN DATA LOADER
208
+ # ==========================================
209
  @st.cache_data(ttl=300)
210
  def load_data():
211
  try:
 
219
  df['district'] = df['district'].astype(str).str.strip()
220
  df['state'] = df['state'].astype(str).str.strip()
221
 
222
+ # FIX 3: PINCODE TYPE NORMALIZATION
223
+ # Ensures all pincodes are clean strings, removing junk/letters
224
+ df["pincode"] = df["pincode"].astype(str).str.extract(r"(\d{6})")[0]
225
+
226
+ # Normalize State Names
227
  state_mapping = {
228
  'Jammu & Kashmir': 'Jammu and Kashmir',
229
  'J&K': 'Jammu and Kashmir',
 
241
  }
242
  df['state'] = df['state'].replace(state_mapping)
243
 
244
+ # --- GEOCODING INTEGRATION START ---
245
+ unique_pincodes = df["pincode"].dropna().unique().tolist()
246
+ coords_db = fetch_coordinates(unique_pincodes)
 
 
247
 
248
  def get_lat_lon(row):
249
+ pin = str(row["pincode"])
250
+ # Default to India Centroid if somehow missing
251
+ lat, lon = coords_db.get(pin, [20.5937, 78.9629])
252
+
253
+ # FIX 2: SCALED JITTER (90m separation)
254
+ # Prevents overlap without distorting district view
255
+ jitter_lat = np.random.uniform(-0.0008, 0.0008)
256
+ jitter_lon = np.random.uniform(-0.0008, 0.0008)
257
+
258
+ return pd.Series({'lat': lat + jitter_lat, 'lon': lon + jitter_lon})
259
 
260
  coords = df.apply(get_lat_lon, axis=1)
261
  df['lat'] = coords['lat']
262
  df['lon'] = coords['lon']
263
+ # --- GEOCODING INTEGRATION END ---
264
+
265
+ # Categorize Risk
266
  df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
267
  return df
268
 
269
  with st.spinner('Initializing S.A.T.A.R.K AI...'):
270
  df = load_data()
271
 
272
+ # ==========================================
273
  # 5. SIDEBAR & FILTERS
274
+ # ==========================================
275
  with st.sidebar:
276
  st.markdown("### πŸ›‘οΈ S.A.T.A.R.K AI Control")
277
  st.markdown("---")
 
303
  st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
304
  st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
305
 
306
+ # ==========================================
307
  # 6. HEADER & METRICS
308
+ # ==========================================
309
  col1, col2 = st.columns([3, 1])
310
  with col1:
311
  st.title("πŸ›‘οΈ S.A.T.A.R.K AI Dashboard")
 
331
 
332
  st.markdown("##")
333
 
334
+ # ==========================================
335
  # 7. TABS
336
+ # ==========================================
337
  tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])
338
 
339
  with tab_map:
 
341
  with c_map:
342
  if not filtered_df.empty:
343
  zoom_lvl = 10 if sel_dist != 'All' else (6 if sel_state != 'All' else 3.8)
344
+ # Use Lat/Lon for plotting
345
  fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
346
  color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
347
  center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},