Spaces:

LovnishVerma
/

UIDAI

Sleeping

App Files Files Community

LovnishVerma commited on Jan 12

Commit

79c14a3

verified ·

1 Parent(s): 3cb671d

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -76

app.py CHANGED Viewed

@@ -63,13 +63,76 @@ st.markdown("""
 # 3. ENHANCED DATA LOADING
 @st.cache_data(ttl=300)
 def load_data():
-    # Strictly load data from CSV
-    df = pd.read_csv('analyzed_aadhaar_data.csv')
-    # Removed st.toast from inside cached function to prevent CacheReplayClosureError
     if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
-    # Precise Geometric Centers
     state_centers = {
         'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
         'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
@@ -84,75 +147,60 @@ def load_data():
         'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
         'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
     }
-    # EXPANDED Aspect Ratio Definitions (Lat spread, Lon spread)
-    state_spreads = {
-        'Kerala': (1.2, 0.25), 'West Bengal': (1.4, 0.4), 'Assam': (0.4, 1.8),
-        'Maharashtra': (1.8, 2.2), 'Uttar Pradesh': (1.2, 2.5), 'Bihar': (0.8, 1.5),
-        'Delhi': (0.1, 0.12), 'Goa': (0.15, 0.15), 'Chandigarh': (0.04, 0.04),
-        'Gujarat': (1.5, 1.8), 'Rajasthan': (2.0, 2.0), 'Madhya Pradesh': (1.8, 2.5),
-        'Himachal Pradesh': (0.6, 0.8), 'Punjab': (0.8, 0.9), 'Haryana': (0.9, 0.8),
-        'Tamil Nadu': (1.2, 1.0), 'Karnataka': (1.5, 1.2), 'Telangana': (1.0, 1.0),
-        'Andhra Pradesh': (1.5, 1.5), 'Odisha': (1.2, 1.2), 'Chhattisgarh': (1.5, 0.9),
-        'Jharkhand': (0.8, 1.0), 'Jammu and Kashmir': (1.0, 1.5), 'Ladakh': (1.0, 1.5),
-        'Uttarakhand': (0.7, 0.8)
-    }
     def get_coords(row):
-        state = row.get('state', 'Delhi')
-        district = str(row.get('district', 'Unknown')).lower()
-        base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629))
-        # Safer Default if state not found
-        lat_scale, lon_scale = state_spreads.get(state, (0.7, 0.7))
-        lat_bias, lon_bias = 0, 0
-        bias = 0.6
-        if 'north' in district: lat_bias += lat_scale * bias
-        if 'south' in district: lat_bias -= lat_scale * bias
-        if 'east' in district: lon_bias += lon_scale * bias
-        if 'west' in district: lon_bias -= lon_scale * bias
         np.random.seed(hash(state + district) % 2**32)
-        rf = 0.5 if (lat_bias or lon_bias) else 1.0
         return pd.Series({
-            'lat': base_lat + lat_bias + np.random.uniform(-lat_scale*rf, lat_scale*rf) + np.random.normal(0, 0.04),
-            'lon': base_lon + lon_bias + np.random.uniform(-lon_scale*rf, lon_scale*rf) + np.random.normal(0, 0.04)
         })
     coords = df.apply(get_coords, axis=1)
     df['lat'], df['lon'] = coords['lat'], coords['lon']
     df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
     return df
 with st.spinner('Loading S.T.A.R.K AI System...'):
     df = load_data()
-    # Toast moved outside cached function
-    # st.toast("✅ Data loaded successfully", icon="✅")
 # 4. SIDEBAR & FILTERS
 with st.sidebar:
     st.markdown("### 🛡️ S.T.A.R.K AI Control")
     st.markdown("---")
-    if 'date' in df.columns:
-        min_d, max_d = df['date'].min().date(), df['date'].max().date()
-        dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
-        if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
-    state_list = ['All'] + sorted(df['state'].unique().tolist())
-    sel_state = st.selectbox("State", state_list)
-    filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
-    dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
-    sel_dist = st.selectbox("District", dist_list)
-    if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
-    st.markdown("---")
-    risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
-    if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
     st.markdown("---")
     st.link_button("📓 Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
     st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
@@ -166,13 +214,18 @@ with col2:
     st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
 st.markdown("---")
-m1, m2, m3, m4, m5 = st.columns(5)
-total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
-m1.metric("Total Centers", f"{total:,}", border=True)
-m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
-m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
-m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
-m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
 st.markdown("##")
 # 6. TABS
@@ -183,11 +236,11 @@ with tab_map:
     with c_map:
         if not filtered_df.empty:
             fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
-                color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=4.8 if sel_state != 'All' else 3.8,
-                center={"lat": 22.0, "lon": 80.0}, hover_name="district", mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
             fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
             st.plotly_chart(fig, use_container_width=True)
-        else: st.warning("No data found.")
     with c_det:
         st.subheader("🔥 Top Hotspots")
@@ -199,26 +252,30 @@ with tab_map:
 with tab_list:
     st.subheader("🎯 Priority Investigation")
-    targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
-    csv = targets.to_csv(index=False).encode('utf-8')
-    st.download_button("📥 Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
-    st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
-        column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
 with tab_charts:
     c1, c2 = st.columns(2)
     with c1:
         st.markdown("**Ghost ID Detection**")
-        fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
-            color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
-        fig.add_hline(y=0.2, line_dash="dash", line_color="red")
-        st.plotly_chart(fig, use_container_width=True)
     with c2:
         st.markdown("**Weekend Activity Analysis**")
-        wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
-        wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
-        fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
-        st.plotly_chart(fig, use_container_width=True)
 with tab_insights:
     st.subheader("🔍 AI Detective Insights")

 # 3. ENHANCED DATA LOADING
 @st.cache_data(ttl=300)
 def load_data():
+    # Strictly load data from CSV - NO RANDOM GENERATION
+    try:
+        df = pd.read_csv('analyzed_aadhaar_data.csv')
+    except FileNotFoundError:
+        st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the file.")
+        return pd.DataFrame()
     if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
+    # --- PRECISE DISTRICT GEOLOCATION DATABASE ---
+    # Manually curated high-precision coordinates for known districts in the dataset
+    district_coords = {
+        # High Priority Districts from Snippet
+        'Gautam Buddha Nagar': (28.39, 77.65), # Uttar Pradesh
+        'West Jaintia Hills': (25.55, 92.38),  # Meghalaya
+        'West Khasi Hills': (25.56, 91.29),    # Meghalaya
+        'Bijapur': (18.80, 80.82),             # Chhattisgarh
+        'Dhule': (20.90, 74.77),               # Maharashtra
+        'Dhamtari': (20.71, 81.55),            # Chhattisgarh
+        'Udupi': (13.34, 74.75),               # Karnataka
+        'Supaul': (26.29, 86.82),              # Bihar
+        'Puruliya': (23.25, 86.50),            # West Bengal
+        # Major Metros & Hubs (Commonly appear)
+        'Mumbai': (19.0760, 72.8777),
+        'Pune': (18.5204, 73.8567),
+        'Nagpur': (21.1458, 79.0882),
+        'Thane': (19.2183, 72.9781),
+        'Nashik': (19.9975, 73.7898),
+        'Lucknow': (26.8467, 80.9462),
+        'Kanpur': (26.4499, 80.3319),
+        'Ghaziabad': (28.6692, 77.4538),
+        'Agra': (27.1767, 78.0081),
+        'Varanasi': (25.3176, 82.9739),
+        'Patna': (25.5941, 85.1376),
+        'Gaya': (24.7914, 85.0002),
+        'Muzaffarpur': (26.1197, 85.3910),
+        'Bangalore': (12.9716, 77.5946), 'Bengaluru': (12.9716, 77.5946),
+        'Mysore': (12.2958, 76.6394),
+        'Hubli': (15.3647, 75.1240),
+        'Mangalore': (12.9141, 74.8560),
+        'Belgaum': (15.8497, 74.4977),
+        'Chennai': (13.0827, 80.2707),
+        'Coimbatore': (11.0168, 76.9558),
+        'Madurai': (9.9252, 78.1198),
+        'Kolkata': (22.5726, 88.3639),
+        'Howrah': (22.5958, 88.2636),
+        'Darjeeling': (27.0410, 88.2663),
+        'Ahmedabad': (23.0225, 72.5714),
+        'Surat': (21.1702, 72.8311),
+        'Vadodara': (22.3072, 73.1812),
+        'Rajkot': (22.3039, 70.8022),
+        'Jaipur': (26.9124, 75.7873),
+        'Jodhpur': (26.2389, 73.0243),
+        'Udaipur': (24.5854, 73.7125),
+        'Hyderabad': (17.3850, 78.4867),
+        'Warangal': (17.9689, 79.5941),
+        'Bhopal': (23.2599, 77.4126),
+        'Indore': (22.7196, 75.8577),
+        'Raipur': (21.2514, 81.6296),
+        'Bilaspur': (22.0797, 82.1409),
+        'Guwahati': (26.1445, 91.7362),
+        'Visakhapatnam': (17.6868, 83.2185),
+        'Vijayawada': (16.5062, 80.6480),
+        'Thiruvananthapuram': (8.5241, 76.9366),
+        'Kochi': (9.9312, 76.2673),
+        'Kozhikode': (11.2588, 75.7804)
+    }
+    # Fallback State Centers (Only used if District is NOT in above list)
     state_centers = {
         'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
         'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
         'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
         'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
     }
     def get_coords(row):
+        district = str(row.get('district', '')).strip()
+        state = row.get('state', '')
+        # 1. Try Exact District Match
+        if district in district_coords:
+            base_lat, base_lon = district_coords[district]
+            # Tiny jitter just to separate overlapping dots from same district
+            return pd.Series({'lat': base_lat + np.random.normal(0, 0.005), 'lon': base_lon + np.random.normal(0, 0.005)})
+        # 2. Fallback to State Center with Randomized Jitter (Only if district unknown)
+        center = state_centers.get(state, (20.5937, 78.9629))
         np.random.seed(hash(state + district) % 2**32)
         return pd.Series({
+            'lat': center[0] + np.random.uniform(-0.5, 0.5),
+            'lon': center[1] + np.random.uniform(-0.5, 0.5)
         })
     coords = df.apply(get_coords, axis=1)
     df['lat'], df['lon'] = coords['lat'], coords['lon']
+    # Recalculate Risk Category based on real data
     df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
     return df
 with st.spinner('Loading S.T.A.R.K AI System...'):
     df = load_data()
 # 4. SIDEBAR & FILTERS
 with st.sidebar:
     st.markdown("### 🛡️ S.T.A.R.K AI Control")
     st.markdown("---")
+    if not df.empty:
+        if 'date' in df.columns:
+            min_d, max_d = df['date'].min().date(), df['date'].max().date()
+            dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
+            if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
+        state_list = ['All'] + sorted(df['state'].unique().tolist())
+        sel_state = st.selectbox("State", state_list)
+        filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
+        dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
+        sel_dist = st.selectbox("District", dist_list)
+        if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
+        st.markdown("---")
+        risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
+        if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
+    else:
+        filtered_df = pd.DataFrame()
     st.markdown("---")
     st.link_button("📓 Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
     st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
     st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
 st.markdown("---")
+if not filtered_df.empty:
+    m1, m2, m3, m4, m5 = st.columns(5)
+    total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
+    m1.metric("Total Centers", f"{total:,}", border=True)
+    m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
+    m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
+    m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
+    m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
+else:
+    st.warning("No data available to calculate metrics.")
 st.markdown("##")
 # 6. TABS
     with c_map:
         if not filtered_df.empty:
             fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
+                color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=3.8 if sel_state == 'All' else 5.5,
+                center={"lat": 22.0, "lon": 80.0}, hover_name="district", hover_data={"state":True, "pincode":True}, mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
             fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
             st.plotly_chart(fig, use_container_width=True)
+        else: st.warning("No data found to map.")
     with c_det:
         st.subheader("🔥 Top Hotspots")
 with tab_list:
     st.subheader("🎯 Priority Investigation")
+    if not filtered_df.empty:
+        targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
+        csv = targets.to_csv(index=False).encode('utf-8')
+        st.download_button("📥 Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
+        st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
+            column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
+    else: st.warning("No data available.")
 with tab_charts:
     c1, c2 = st.columns(2)
     with c1:
         st.markdown("**Ghost ID Detection**")
+        if not filtered_df.empty:
+            fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
+                color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
+            fig.add_hline(y=0.2, line_dash="dash", line_color="red")
+            st.plotly_chart(fig, use_container_width=True)
     with c2:
         st.markdown("**Weekend Activity Analysis**")
+        if not filtered_df.empty:
+            wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
+            wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
+            fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
+            st.plotly_chart(fig, use_container_width=True)
 with tab_insights:
     st.subheader("🔍 AI Detective Insights")