Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| import requests | |
| import time | |
| import json | |
| import os | |
| from datetime import datetime, timedelta | |
| # 1. PAGE CONFIGURATION | |
| st.set_page_config( | |
| page_title="S.A.T.A.R.K AI | UIDAI Fraud Detection", | |
| page_icon="π‘οΈ", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # 2. ROBUST CSS STYLING (Dark Mode Proof) | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap'); | |
| /* --- 1. MAIN CONTENT AREA (Light Theme Enforced) --- */ | |
| /* Target only the main content, NOT the sidebar */ | |
| .stApp > header { background-color: transparent !important; } | |
| div[data-testid="stAppViewContainer"] { | |
| background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); | |
| } | |
| /* Force Dark Text in Main Area */ | |
| section[data-testid="stMain"] * { | |
| color: #0f172a; /* Dark Blue Text */ | |
| } | |
| /* Metric Cards in Main Area */ | |
| div[data-testid="stMetric"] { | |
| background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%); | |
| border: 1px solid #e2e8f0; | |
| border-radius: 12px; | |
| box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); | |
| } | |
| div[data-testid="stMetricValue"] { color: #0f172a !important; } | |
| div[data-testid="stMetricLabel"] { color: #475569 !important; } | |
| /* --- 2. SIDEBAR (Dark Theme Enforced) --- */ | |
| section[data-testid="stSidebar"] { | |
| background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%); | |
| border-right: 1px solid #334155; | |
| } | |
| /* NUCLEAR OPTION: Force ALL text in sidebar to be White */ | |
| section[data-testid="stSidebar"] * { | |
| color: #f8fafc !important; /* White Text */ | |
| } | |
| /* EXCEPTION: Inputs inside Sidebar (Selectbox, DateInput) */ | |
| /* These usually have white backgrounds, so we need Dark Text inside them */ | |
| section[data-testid="stSidebar"] input, | |
| section[data-testid="stSidebar"] textarea, | |
| section[data-testid="stSidebar"] div[data-baseweb="select"] div { | |
| color: #0f172a !important; /* Dark Text for Inputs */ | |
| -webkit-text-fill-color: #0f172a !important; | |
| } | |
| /* Specific fix for the 'Selected Option' in dropdowns */ | |
| div[role="listbox"] div { | |
| color: #0f172a !important; | |
| } | |
| /* --- 3. COMMON ELEMENTS --- */ | |
| /* DataFrame Headers */ | |
| div[data-testid="stDataFrame"] div[role="columnheader"] { | |
| background-color: #f1f5f9; | |
| color: #0f172a !important; | |
| } | |
| /* Link Button Style */ | |
| section[data-testid="stSidebar"] a { | |
| background-color: #3b82f6 !important; | |
| color: white !important; | |
| text-decoration: none; | |
| padding: 8px 16px; | |
| border-radius: 8px; | |
| display: block; | |
| text-align: center; | |
| border: 1px solid #2563eb; | |
| } | |
| /* Hotspot Cards */ | |
| .hotspot-card { | |
| background: white; | |
| padding: 16px; | |
| border-radius: 10px; | |
| border-left: 5px solid; | |
| margin-bottom: 12px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.05); | |
| } | |
| /* Since Hotspot Cards are in Main Area, text inherits Dark, which is good. */ | |
| /* Status Badges */ | |
| .status-badge { | |
| display: inline-flex; align-items: center; | |
| padding: 6px 14px; border-radius: 9999px; | |
| font-size: 12px; font-weight: 700; | |
| text-transform: uppercase; | |
| } | |
| .bg-green { background: #dcfce7; color: #166534 !important; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON | |
| def fetch_coordinates_batch(unique_locations): | |
| json_file = 'district_coords.json' | |
| coords_map = {} | |
| if os.path.exists(json_file): | |
| try: | |
| with open(json_file, 'r') as f: | |
| loaded_data = json.load(f) | |
| for k, v in loaded_data.items(): | |
| if "|" in k: | |
| d, s = k.split("|") | |
| coords_map[(d, s)] = tuple(v) | |
| except json.JSONDecodeError: | |
| pass | |
| prefills = { | |
| ('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65), | |
| ('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38), | |
| ('West Khasi Hills', 'Meghalaya'): (25.56, 91.29), | |
| ('Bijapur', 'Chhattisgarh'): (18.80, 80.82), | |
| ('Dhule', 'Maharashtra'): (20.90, 74.77), | |
| ('Dhamtari', 'Chhattisgarh'): (20.71, 81.55), | |
| ('Udupi', 'Karnataka'): (13.34, 74.75), | |
| ('Supaul', 'Bihar'): (26.29, 86.82), | |
| ('Puruliya', 'West Bengal'): (23.25, 86.50), | |
| ('Mumbai', 'Maharashtra'): (19.0760, 72.8777), | |
| ('Pune', 'Maharashtra'): (18.5204, 73.8567), | |
| ('Bangalore', 'Karnataka'): (12.9716, 77.5946), | |
| ('Bengaluru', 'Karnataka'): (12.9716, 77.5946), | |
| ('Chennai', 'Tamil Nadu'): (13.0827, 80.2707), | |
| ('Hyderabad', 'Telangana'): (17.3850, 78.4867), | |
| ('Kolkata', 'West Bengal'): (22.5726, 88.3639), | |
| ('Delhi', 'Delhi'): (28.7041, 77.1025), | |
| ('Shimla', 'Himachal Pradesh'): (31.1048, 77.1734) | |
| } | |
| for k, v in prefills.items(): | |
| if k not in coords_map: | |
| coords_map[k] = v | |
| missing_locs = [loc for loc in unique_locations if loc not in coords_map] | |
| if not missing_locs: | |
| return coords_map | |
| progress_text = "π‘ New locations found. Fetching coordinates..." | |
| my_bar = st.progress(0, text=progress_text) | |
| headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'} | |
| updated = False | |
| for i, (district, state) in enumerate(missing_locs): | |
| try: | |
| my_bar.progress((i + 1) / len(missing_locs), text=f"π Locating: {district}, {state}") | |
| query = f"{district}, {state}, India" | |
| url = "https://nominatim.openstreetmap.org/search" | |
| params = {'q': query, 'format': 'json', 'limit': 1} | |
| response = requests.get(url, params=params, headers=headers, timeout=5) | |
| if response.status_code == 200 and response.json(): | |
| data = response.json()[0] | |
| coords_map[(district, state)] = (float(data['lat']), float(data['lon'])) | |
| updated = True | |
| time.sleep(1.1) | |
| except Exception: | |
| continue | |
| my_bar.empty() | |
| if updated: | |
| save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()} | |
| with open(json_file, 'w') as f: | |
| json.dump(save_data, f) | |
| return coords_map | |
| # 4. MAIN DATA LOADER | |
| def load_data(): | |
| try: | |
| df = pd.read_csv('analyzed_aadhaar_data.csv') | |
| except FileNotFoundError: | |
| return pd.DataFrame() | |
| if 'date' in df.columns: | |
| df['date'] = pd.to_datetime(df['date']) | |
| df['district'] = df['district'].astype(str).str.strip() | |
| df['state'] = df['state'].astype(str).str.strip() | |
| state_mapping = { | |
| 'Jammu & Kashmir': 'Jammu and Kashmir', | |
| 'J&K': 'Jammu and Kashmir', | |
| 'Jammu And Kashmir': 'Jammu and Kashmir', | |
| 'Andaman & Nicobar Islands': 'Andaman and Nicobar Islands', | |
| 'Dadra and Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu', | |
| 'Dadra & Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu', | |
| 'Daman and Diu': 'Dadra and Nagar Haveli and Daman and Diu', | |
| 'Daman & Diu': 'Dadra and Nagar Haveli and Daman and Diu', | |
| 'The Dadra And Nagar Haveli And The Daman And Diu': 'Dadra and Nagar Haveli and Daman and Diu', | |
| 'Orissa': 'Odisha', | |
| 'Chattisgarh': 'Chhattisgarh', | |
| 'Telengana': 'Telangana', | |
| 'Pondicherry': 'Puducherry' | |
| } | |
| df['state'] = df['state'].replace(state_mapping) | |
| unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None)) | |
| coords_db = fetch_coordinates_batch(unique_locs) | |
| state_centers = { | |
| 'Delhi': (28.7041, 77.1025), 'Maharashtra': (19.7515, 75.7139), 'Karnataka': (15.3173, 75.7139) | |
| } | |
| def get_lat_lon(row): | |
| key = (row['district'], row['state']) | |
| if key in coords_db: | |
| lat, lon = coords_db[key] | |
| return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)}) | |
| center = state_centers.get(row['state'], (20.5937, 78.9629)) | |
| np.random.seed(hash(key) % 2**32) | |
| return pd.Series({'lat': center[0] + np.random.uniform(-0.5, 0.5), 'lon': center[1] + np.random.uniform(-0.5, 0.5)}) | |
| coords = df.apply(get_lat_lon, axis=1) | |
| df['lat'] = coords['lat'] | |
| df['lon'] = coords['lon'] | |
| df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical']) | |
| return df | |
| with st.spinner('Initializing S.A.T.A.R.K AI...'): | |
| df = load_data() | |
| # 5. SIDEBAR & FILTERS | |
| with st.sidebar: | |
| st.markdown("### π‘οΈ S.A.T.A.R.K AI Control") | |
| st.markdown("---") | |
| if not df.empty: | |
| if 'date' in df.columns: | |
| min_d, max_d = df['date'].min().date(), df['date'].max().date() | |
| dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d) | |
| if len(dr) == 2: | |
| df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])] | |
| state_list = ['All'] + sorted(df['state'].unique().tolist()) | |
| sel_state = st.selectbox("State", state_list) | |
| filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy() | |
| dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist()) | |
| sel_dist = st.selectbox("District", dist_list) | |
| if sel_dist != 'All': | |
| filtered_df = filtered_df[filtered_df['district'] == sel_dist] | |
| st.markdown("---") | |
| risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical']) | |
| if risk_filter: | |
| filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)] | |
| else: | |
| filtered_df = pd.DataFrame() | |
| st.markdown("---") | |
| st.link_button("π Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True) | |
| st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}") | |
| # 6. HEADER & METRICS | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| st.title("π‘οΈ S.A.T.A.R.K AI Dashboard") | |
| st.markdown("**Context-Aware Fraud Detection & Prevention System**") | |
| with col2: | |
| st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">β System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True) | |
| st.markdown("---") | |
| if not filtered_df.empty: | |
| m1, m2, m3, m4, m5 = st.columns(5) | |
| total = len(filtered_df) | |
| high = len(filtered_df[filtered_df['RISK_SCORE'] > 75]) | |
| crit = len(filtered_df[filtered_df['RISK_SCORE'] > 85]) | |
| m1.metric("Total Centers", f"{total:,}", border=True) | |
| m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True) | |
| m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True) | |
| m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100", border=True) | |
| m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True) | |
| else: | |
| st.error("β Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.") | |
| st.markdown("##") | |
| # 7. TABS | |
| tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πΊοΈ Geographic Risk", "π Priority List", "π Patterns", "π AI Insights"]) | |
| with tab_map: | |
| c_map, c_det = st.columns([3, 1]) | |
| with c_map: | |
| if not filtered_df.empty: | |
| zoom_lvl = 10 if sel_dist != 'All' else (6 if sel_state != 'All' else 3.8) | |
| fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity", | |
| color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl, | |
| center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()}, | |
| hover_name="district", hover_data={"state": True, "pincode": True}, | |
| mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>") | |
| fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0}) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with c_det: | |
| st.subheader("π₯ Top Hotspots") | |
| if not filtered_df.empty: | |
| top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5) | |
| for i, (d, r) in enumerate(top.iterrows(), 1): | |
| clr = "#ef4444" if r['RISK_SCORE'] > 85 else "#f97316" | |
| st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True) | |
| with tab_list: | |
| st.subheader("π― Priority Investigation") | |
| if not filtered_df.empty: | |
| targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False) | |
| csv = targets.to_csv(index=False).encode('utf-8') | |
| st.download_button("π₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary") | |
| st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']], | |
| column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True) | |
| with tab_charts: | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.markdown("**Ghost ID Detection**") | |
| if not filtered_df.empty: | |
| fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE", | |
| color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350) | |
| fig.add_hline(y=0.2, line_dash="dash", line_color="red") | |
| st.plotly_chart(fig, use_container_width=True) | |
| with c2: | |
| st.markdown("**Weekend Activity Analysis**") | |
| if not filtered_df.empty: | |
| wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index() | |
| wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'}) | |
| fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tab_insights: | |
| st.subheader("π AI Detective Insights") | |
| if not filtered_df.empty: | |
| anom = filtered_df[filtered_df['ratio_deviation'] > 0.4] | |
| st.info(f"π€ **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Ο from mean).") | |
| c_i1, c_i2 = st.columns(2) | |
| with c_i1: | |
| st.markdown("#### π¨ Primary Risk Factors") | |
| st.markdown("- **High Volume on Weekends:** 28% correlation with fraud") | |
| st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs") | |
| with c_i2: | |
| st.markdown("#### π‘ Recommended Actions") | |
| st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score") | |
| st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster") | |
| st.markdown("---") | |
| st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.A.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True) |