Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,8 @@ import pandas as pd
|
|
| 3 |
import plotly.express as px
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import numpy as np
|
|
|
|
|
|
|
| 6 |
from datetime import datetime, timedelta
|
| 7 |
|
| 8 |
# 1. PAGE CONFIGURATION
|
|
@@ -60,10 +62,79 @@ st.markdown("""
|
|
| 60 |
</style>
|
| 61 |
""", unsafe_allow_html=True)
|
| 62 |
|
| 63 |
-
# 3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
@st.cache_data(ttl=300)
|
| 65 |
def load_data():
|
| 66 |
-
# Strictly load data from CSV - NO RANDOM GENERATION
|
| 67 |
try:
|
| 68 |
df = pd.read_csv('analyzed_aadhaar_data.csv')
|
| 69 |
except FileNotFoundError:
|
|
@@ -72,87 +143,17 @@ def load_data():
|
|
| 72 |
|
| 73 |
if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
|
| 74 |
|
| 75 |
-
#
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
'Supaul': (26.29, 86.82), # Bihar
|
| 87 |
-
'Puruliya': (23.25, 86.50), # West Bengal
|
| 88 |
-
|
| 89 |
-
# Major Metros & Hubs (Commonly appear)
|
| 90 |
-
'Mumbai': (19.0760, 72.8777),
|
| 91 |
-
'Pune': (18.5204, 73.8567),
|
| 92 |
-
'Nagpur': (21.1458, 79.0882),
|
| 93 |
-
'Thane': (19.2183, 72.9781),
|
| 94 |
-
'Nashik': (19.9975, 73.7898),
|
| 95 |
-
'Lucknow': (26.8467, 80.9462),
|
| 96 |
-
'Kanpur': (26.4499, 80.3319),
|
| 97 |
-
'Ghaziabad': (28.6692, 77.4538),
|
| 98 |
-
'Agra': (27.1767, 78.0081),
|
| 99 |
-
'Varanasi': (25.3176, 82.9739),
|
| 100 |
-
'Patna': (25.5941, 85.1376),
|
| 101 |
-
'Gaya': (24.7914, 85.0002),
|
| 102 |
-
'Muzaffarpur': (26.1197, 85.3910),
|
| 103 |
-
'Bangalore': (12.9716, 77.5946), 'Bengaluru': (12.9716, 77.5946),
|
| 104 |
-
'Mysore': (12.2958, 76.6394),
|
| 105 |
-
'Hubli': (15.3647, 75.1240),
|
| 106 |
-
'Mangalore': (12.9141, 74.8560),
|
| 107 |
-
'Belgaum': (15.8497, 74.4977),
|
| 108 |
-
'Chennai': (13.0827, 80.2707),
|
| 109 |
-
'Coimbatore': (11.0168, 76.9558),
|
| 110 |
-
'Madurai': (9.9252, 78.1198),
|
| 111 |
-
'Kolkata': (22.5726, 88.3639),
|
| 112 |
-
'Howrah': (22.5958, 88.2636),
|
| 113 |
-
'Darjeeling': (27.0410, 88.2663),
|
| 114 |
-
'Ahmedabad': (23.0225, 72.5714),
|
| 115 |
-
'Surat': (21.1702, 72.8311),
|
| 116 |
-
'Vadodara': (22.3072, 73.1812),
|
| 117 |
-
'Rajkot': (22.3039, 70.8022),
|
| 118 |
-
'Jaipur': (26.9124, 75.7873),
|
| 119 |
-
'Jodhpur': (26.2389, 73.0243),
|
| 120 |
-
'Udaipur': (24.5854, 73.7125),
|
| 121 |
-
'Hyderabad': (17.3850, 78.4867),
|
| 122 |
-
'Warangal': (17.9689, 79.5941),
|
| 123 |
-
'Bhopal': (23.2599, 77.4126),
|
| 124 |
-
'Indore': (22.7196, 75.8577),
|
| 125 |
-
'Raipur': (21.2514, 81.6296),
|
| 126 |
-
'Bilaspur': (22.0797, 82.1409),
|
| 127 |
-
'Guwahati': (26.1445, 91.7362),
|
| 128 |
-
'Visakhapatnam': (17.6868, 83.2185),
|
| 129 |
-
'Vijayawada': (16.5062, 80.6480),
|
| 130 |
-
'Thiruvananthapuram': (8.5241, 76.9366),
|
| 131 |
-
'Kochi': (9.9312, 76.2673),
|
| 132 |
-
'Kozhikode': (11.2588, 75.7804),
|
| 133 |
-
'Shimla': (31.1048, 77.1734),
|
| 134 |
-
'Dehradun': (30.3165, 78.0322),
|
| 135 |
-
'Ranchi': (23.3441, 85.3096),
|
| 136 |
-
'Bhubaneswar': (20.2961, 85.8245),
|
| 137 |
-
'Chandigarh': (30.7333, 76.7794),
|
| 138 |
-
'Gandhinagar': (23.2156, 72.6369),
|
| 139 |
-
'Panaji': (15.4909, 73.8278),
|
| 140 |
-
'Srinagar': (34.0837, 74.7973),
|
| 141 |
-
'Jammu': (32.7266, 74.8570),
|
| 142 |
-
'Imphal': (24.8170, 93.9368),
|
| 143 |
-
'Shillong': (25.5788, 91.8933),
|
| 144 |
-
'Aizawl': (23.7271, 92.7176),
|
| 145 |
-
'Kohima': (25.6751, 94.1086),
|
| 146 |
-
'Gangtok': (27.3389, 88.6065),
|
| 147 |
-
'Agartala': (23.8315, 91.2868),
|
| 148 |
-
'Port Blair': (11.6234, 92.7265),
|
| 149 |
-
'Kavaratti': (10.5667, 72.6417),
|
| 150 |
-
'Puducherry': (11.9416, 79.8083),
|
| 151 |
-
'Silvassa': (20.2763, 73.0083),
|
| 152 |
-
'Daman': (20.3974, 72.8328)
|
| 153 |
-
}
|
| 154 |
-
|
| 155 |
-
# Fallback State Centers (Only used if District is NOT in above list)
|
| 156 |
state_centers = {
|
| 157 |
'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
|
| 158 |
'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
|
|
@@ -168,35 +169,34 @@ def load_data():
|
|
| 168 |
'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
|
| 169 |
}
|
| 170 |
|
| 171 |
-
def
|
| 172 |
-
|
| 173 |
-
state = row.get('state', '')
|
| 174 |
|
| 175 |
-
# 1.
|
| 176 |
-
if
|
| 177 |
-
|
| 178 |
-
# Tiny jitter
|
| 179 |
-
return pd.Series({'lat':
|
| 180 |
|
| 181 |
-
# 2. Fallback to State Center
|
| 182 |
-
center = state_centers.get(state, (20.5937, 78.9629))
|
| 183 |
-
np.random.seed(hash(
|
| 184 |
return pd.Series({
|
| 185 |
-
'lat': center[0] + np.random.uniform(-0.5, 0.5),
|
| 186 |
'lon': center[1] + np.random.uniform(-0.5, 0.5)
|
| 187 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
-
coords = df.apply(get_coords, axis=1)
|
| 190 |
-
df['lat'], df['lon'] = coords['lat'], coords['lon']
|
| 191 |
-
|
| 192 |
-
# Recalculate Risk Category based on real data
|
| 193 |
df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
|
| 194 |
return df
|
| 195 |
|
| 196 |
-
with st.spinner('
|
| 197 |
df = load_data()
|
| 198 |
|
| 199 |
-
#
|
| 200 |
with st.sidebar:
|
| 201 |
st.markdown("### π‘οΈ S.T.A.R.K AI Control")
|
| 202 |
st.markdown("---")
|
|
@@ -225,7 +225,7 @@ with st.sidebar:
|
|
| 225 |
st.link_button("π Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
|
| 226 |
st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
|
| 227 |
|
| 228 |
-
#
|
| 229 |
col1, col2 = st.columns([3, 1])
|
| 230 |
with col1:
|
| 231 |
st.title("π‘οΈ S.T.A.R.K AI Dashboard")
|
|
@@ -248,16 +248,24 @@ else:
|
|
| 248 |
|
| 249 |
st.markdown("##")
|
| 250 |
|
| 251 |
-
#
|
| 252 |
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πΊοΈ Geographic Risk", "π Priority List", "π Patterns", "π AI Insights"])
|
| 253 |
|
| 254 |
with tab_map:
|
| 255 |
c_map, c_det = st.columns([3, 1])
|
| 256 |
with c_map:
|
| 257 |
if not filtered_df.empty:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
|
| 259 |
-
color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=
|
| 260 |
-
center={"lat":
|
|
|
|
|
|
|
|
|
|
| 261 |
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
|
| 262 |
st.plotly_chart(fig, use_container_width=True)
|
| 263 |
else: st.warning("No data found to map.")
|
|
|
|
| 3 |
import plotly.express as px
|
| 4 |
import plotly.graph_objects as go
|
| 5 |
import numpy as np
|
| 6 |
+
import requests
|
| 7 |
+
import time
|
| 8 |
from datetime import datetime, timedelta
|
| 9 |
|
| 10 |
# 1. PAGE CONFIGURATION
|
|
|
|
| 62 |
</style>
|
| 63 |
""", unsafe_allow_html=True)
|
| 64 |
|
| 65 |
+
# 3. DYNAMIC GEOCODING ENGINE
|
| 66 |
+
@st.cache_data(show_spinner=False)
|
| 67 |
+
def fetch_coordinates_batch(unique_locations):
|
| 68 |
+
"""
|
| 69 |
+
Fetches coordinates from OpenStreetMap Nominatim API.
|
| 70 |
+
unique_locations: List of tuples (District, State)
|
| 71 |
+
Returns: Dictionary {(District, State): (lat, lon)}
|
| 72 |
+
"""
|
| 73 |
+
# 1. Pre-filled Cache (For speed & redundancy)
|
| 74 |
+
coords_map = {
|
| 75 |
+
('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
|
| 76 |
+
('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
|
| 77 |
+
('West Khasi Hills', 'Meghalaya'): (25.56, 91.29),
|
| 78 |
+
('Bijapur', 'Chhattisgarh'): (18.80, 80.82),
|
| 79 |
+
('Dhule', 'Maharashtra'): (20.90, 74.77),
|
| 80 |
+
('Dhamtari', 'Chhattisgarh'): (20.71, 81.55),
|
| 81 |
+
('Udupi', 'Karnataka'): (13.34, 74.75),
|
| 82 |
+
('Supaul', 'Bihar'): (26.29, 86.82),
|
| 83 |
+
('Puruliya', 'West Bengal'): (23.25, 86.50),
|
| 84 |
+
('Mumbai', 'Maharashtra'): (19.0760, 72.8777),
|
| 85 |
+
('Pune', 'Maharashtra'): (18.5204, 73.8567),
|
| 86 |
+
('Bangalore', 'Karnataka'): (12.9716, 77.5946),
|
| 87 |
+
('Bengaluru', 'Karnataka'): (12.9716, 77.5946),
|
| 88 |
+
('Chennai', 'Tamil Nadu'): (13.0827, 80.2707),
|
| 89 |
+
('Hyderabad', 'Telangana'): (17.3850, 78.4867),
|
| 90 |
+
('Kolkata', 'West Bengal'): (22.5726, 88.3639),
|
| 91 |
+
('Delhi', 'Delhi'): (28.7041, 77.1025),
|
| 92 |
+
('Shimla', 'Himachal Pradesh'): (31.1048, 77.1734)
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
# 2. Identify missing locations
|
| 96 |
+
missing_locs = [loc for loc in unique_locations if loc not in coords_map]
|
| 97 |
+
|
| 98 |
+
if not missing_locs:
|
| 99 |
+
return coords_map
|
| 100 |
+
|
| 101 |
+
# 3. Dynamic Fetching for missing
|
| 102 |
+
progress_text = "π‘ Connecting to Satellite Geocoding API..."
|
| 103 |
+
my_bar = st.progress(0, text=progress_text)
|
| 104 |
+
|
| 105 |
+
headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
|
| 106 |
+
|
| 107 |
+
for i, (district, state) in enumerate(missing_locs):
|
| 108 |
+
try:
|
| 109 |
+
# Update Progress
|
| 110 |
+
my_bar.progress((i + 1) / len(missing_locs), text=f"π Locating: {district}, {state}")
|
| 111 |
+
|
| 112 |
+
# API Call
|
| 113 |
+
query = f"{district}, {state}, India"
|
| 114 |
+
url = "https://nominatim.openstreetmap.org/search"
|
| 115 |
+
params = {'q': query, 'format': 'json', 'limit': 1}
|
| 116 |
+
|
| 117 |
+
response = requests.get(url, params=params, headers=headers, timeout=5)
|
| 118 |
+
|
| 119 |
+
if response.status_code == 200 and response.json():
|
| 120 |
+
data = response.json()[0]
|
| 121 |
+
coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
|
| 122 |
+
else:
|
| 123 |
+
# Fallback if API fails: Keep existing State Centers logic inside main loop later
|
| 124 |
+
pass
|
| 125 |
+
|
| 126 |
+
# Respect Rate Limiting (1 request per second)
|
| 127 |
+
time.sleep(1.1)
|
| 128 |
+
|
| 129 |
+
except Exception as e:
|
| 130 |
+
continue
|
| 131 |
+
|
| 132 |
+
my_bar.empty()
|
| 133 |
+
return coords_map
|
| 134 |
+
|
| 135 |
+
# 4. MAIN DATA LOADER
|
| 136 |
@st.cache_data(ttl=300)
|
| 137 |
def load_data():
|
|
|
|
| 138 |
try:
|
| 139 |
df = pd.read_csv('analyzed_aadhaar_data.csv')
|
| 140 |
except FileNotFoundError:
|
|
|
|
| 143 |
|
| 144 |
if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
|
| 145 |
|
| 146 |
+
# Clean Data
|
| 147 |
+
df['district'] = df['district'].astype(str).str.strip()
|
| 148 |
+
df['state'] = df['state'].astype(str).str.strip()
|
| 149 |
+
|
| 150 |
+
# Get Unique Locations
|
| 151 |
+
unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
|
| 152 |
+
|
| 153 |
+
# Fetch Coordinates (Cached)
|
| 154 |
+
coords_db = fetch_coordinates_batch(unique_locs)
|
| 155 |
+
|
| 156 |
+
# Fallback Centers (State Capitals)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
state_centers = {
|
| 158 |
'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
|
| 159 |
'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
|
|
|
|
| 169 |
'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
|
| 170 |
}
|
| 171 |
|
| 172 |
+
def get_lat_lon(row):
|
| 173 |
+
key = (row['district'], row['state'])
|
|
|
|
| 174 |
|
| 175 |
+
# 1. Check Exact Match from API/Cache
|
| 176 |
+
if key in coords_db:
|
| 177 |
+
lat, lon = coords_db[key]
|
| 178 |
+
# Tiny jitter to separate stacked points
|
| 179 |
+
return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
|
| 180 |
|
| 181 |
+
# 2. Fallback to State Center
|
| 182 |
+
center = state_centers.get(row['state'], (20.5937, 78.9629))
|
| 183 |
+
np.random.seed(hash(key) % 2**32)
|
| 184 |
return pd.Series({
|
| 185 |
+
'lat': center[0] + np.random.uniform(-0.5, 0.5),
|
| 186 |
'lon': center[1] + np.random.uniform(-0.5, 0.5)
|
| 187 |
})
|
| 188 |
+
|
| 189 |
+
coords = df.apply(get_lat_lon, axis=1)
|
| 190 |
+
df['lat'] = coords['lat']
|
| 191 |
+
df['lon'] = coords['lon']
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
|
| 194 |
return df
|
| 195 |
|
| 196 |
+
with st.spinner('Initializing S.T.A.R.K AI & Geocoding...'):
|
| 197 |
df = load_data()
|
| 198 |
|
| 199 |
+
# 5. SIDEBAR & FILTERS
|
| 200 |
with st.sidebar:
|
| 201 |
st.markdown("### π‘οΈ S.T.A.R.K AI Control")
|
| 202 |
st.markdown("---")
|
|
|
|
| 225 |
st.link_button("π Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
|
| 226 |
st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
|
| 227 |
|
| 228 |
+
# 6. HEADER & METRICS
|
| 229 |
col1, col2 = st.columns([3, 1])
|
| 230 |
with col1:
|
| 231 |
st.title("π‘οΈ S.T.A.R.K AI Dashboard")
|
|
|
|
| 248 |
|
| 249 |
st.markdown("##")
|
| 250 |
|
| 251 |
+
# 7. TABS
|
| 252 |
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πΊοΈ Geographic Risk", "π Priority List", "π Patterns", "π AI Insights"])
|
| 253 |
|
| 254 |
with tab_map:
|
| 255 |
c_map, c_det = st.columns([3, 1])
|
| 256 |
with c_map:
|
| 257 |
if not filtered_df.empty:
|
| 258 |
+
# Dynamic Zoom based on selection
|
| 259 |
+
if sel_dist != 'All': zoom_lvl = 10
|
| 260 |
+
elif sel_state != 'All': zoom_lvl = 6
|
| 261 |
+
else: zoom_lvl = 3.8
|
| 262 |
+
|
| 263 |
fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
|
| 264 |
+
color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
|
| 265 |
+
center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
|
| 266 |
+
hover_name="district", hover_data={"state":True, "pincode":True, "lat":False, "lon":False},
|
| 267 |
+
mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
|
| 268 |
+
|
| 269 |
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
|
| 270 |
st.plotly_chart(fig, use_container_width=True)
|
| 271 |
else: st.warning("No data found to map.")
|