Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| # 1. PAGE CONFIGURATION | |
| st.set_page_config( | |
| page_title="S.T.A.R.K AI | UIDAI Fraud Detection", | |
| page_icon="π‘οΈ", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # 2. ENHANCED PROFESSIONAL STYLING (Optimized) | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap'); | |
| .stApp { background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); color: #0f172a; font-family: 'Inter', sans-serif; } | |
| /* METRIC CARDS */ | |
| div[data-testid="stMetric"] { | |
| background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%); | |
| border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px; | |
| box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); transition: transform 0.2s; | |
| } | |
| div[data-testid="stMetric"]:hover { transform: translateY(-2px); box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1); } | |
| div[data-testid="stMetricValue"] { color: #0f172a !important; font-weight: 800 !important; font-size: 2rem !important; } | |
| div[data-testid="stMetricLabel"] { color: #64748b !important; font-weight: 600 !important; text-transform: uppercase; font-size: 0.75rem; letter-spacing: 0.05em; } | |
| /* DATAFRAME */ | |
| div[data-testid="stDataFrame"] { border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1); } | |
| div[data-testid="stDataFrame"] div[role="columnheader"] { | |
| background: linear-gradient(to bottom, #f8fafc, #f1f5f9) !important; | |
| color: #0f172a !important; font-weight: 700 !important; border-bottom: 2px solid #cbd5e1 !important; | |
| } | |
| /* SIDEBAR */ | |
| [data-testid="stSidebar"] { background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%); border-right: 1px solid #334155; } | |
| [data-testid="stSidebar"] * { color: #f8fafc !important; } | |
| [data-testid="stSidebar"] .stSelectbox label { color: #cbd5e1 !important; } | |
| /* UI ELEMENTS */ | |
| h1 { background: linear-gradient(135deg, #0f172a 0%, #334155 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800 !important; } | |
| .status-badge { display: inline-flex; align-items: center; padding: 6px 14px; border-radius: 9999px; font-size: 12px; font-weight: 700; text-transform: uppercase; box-shadow: 0 1px 3px rgba(0,0,0,0.1); } | |
| .bg-red { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); color: #991b1b; } | |
| .bg-green { background: linear-gradient(135deg, #dcfce7 0%, #bbf7d0 100%); color: #166534; } | |
| .bg-amber { background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); color: #92400e; } | |
| /* TABS & BUTTONS */ | |
| .stTabs [data-baseweb="tab-list"] { gap: 8px; } | |
| .stTabs [aria-selected="true"] { background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%); color: white !important; } | |
| .stButton button { border-radius: 8px; font-weight: 600; } | |
| /* HOTSPOTS */ | |
| .hotspot-card { background: white; padding: 16px; border-radius: 10px; border-left: 5px solid; margin-bottom: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); transition: all 0.2s; } | |
| .hotspot-card:hover { transform: translateX(4px); box-shadow: 0 4px 6px rgba(0,0,0,0.1); } | |
| .js-plotly-plot .plotly .main-svg { background-color: rgba(0,0,0,0) !important; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # 3. ENHANCED DATA LOADING | |
| def load_data(): | |
| # Strictly load data from CSV | |
| df = pd.read_csv('analyzed_aadhaar_data.csv') | |
| # Removed st.toast from inside cached function to prevent CacheReplayClosureError | |
| if 'date' in df.columns: df['date'] = pd.to_datetime(df['date']) | |
| # Precise Geometric Centers | |
| state_centers = { | |
| 'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400), | |
| 'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131), | |
| 'Chandigarh': (30.7333, 76.7794), 'Chhattisgarh': (21.2787, 81.8661), 'Delhi': (28.7041, 77.1025), | |
| 'Goa': (15.2993, 74.1240), 'Gujarat': (22.2587, 71.1924), 'Haryana': (29.0588, 76.0856), | |
| 'Himachal Pradesh': (31.9579, 77.1095), 'Jammu and Kashmir': (33.7782, 76.5762), 'Jharkhand': (23.6102, 85.2799), | |
| 'Karnataka': (15.3173, 75.7139), 'Kerala': (10.8505, 76.2711), 'Ladakh': (34.1526, 77.5770), | |
| 'Madhya Pradesh': (22.9734, 78.6569), 'Maharashtra': (19.7515, 75.7139), 'Manipur': (24.6637, 93.9063), | |
| 'Meghalaya': (25.4670, 91.3662), 'Mizoram': (23.1645, 92.9376), 'Nagaland': (26.1584, 94.5624), | |
| 'Odisha': (20.9517, 85.0985), 'Puducherry': (11.9416, 79.8083), 'Punjab': (31.1471, 75.3412), | |
| 'Rajasthan': (27.0238, 74.2179), 'Sikkim': (27.5330, 88.5122), 'Tamil Nadu': (11.1271, 78.6569), | |
| 'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462), | |
| 'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550) | |
| } | |
| # EXPANDED Aspect Ratio Definitions (Lat spread, Lon spread) | |
| state_spreads = { | |
| 'Kerala': (1.2, 0.25), 'West Bengal': (1.4, 0.4), 'Assam': (0.4, 1.8), | |
| 'Maharashtra': (1.8, 2.2), 'Uttar Pradesh': (1.2, 2.5), 'Bihar': (0.8, 1.5), | |
| 'Delhi': (0.1, 0.12), 'Goa': (0.15, 0.15), 'Chandigarh': (0.04, 0.04), | |
| 'Gujarat': (1.5, 1.8), 'Rajasthan': (2.0, 2.0), 'Madhya Pradesh': (1.8, 2.5), | |
| 'Himachal Pradesh': (0.6, 0.8), 'Punjab': (0.8, 0.9), 'Haryana': (0.9, 0.8), | |
| 'Tamil Nadu': (1.2, 1.0), 'Karnataka': (1.5, 1.2), 'Telangana': (1.0, 1.0), | |
| 'Andhra Pradesh': (1.5, 1.5), 'Odisha': (1.2, 1.2), 'Chhattisgarh': (1.5, 0.9), | |
| 'Jharkhand': (0.8, 1.0), 'Jammu and Kashmir': (1.0, 1.5), 'Ladakh': (1.0, 1.5), | |
| 'Uttarakhand': (0.7, 0.8) | |
| } | |
| def get_coords(row): | |
| state = row.get('state', 'Delhi') | |
| district = str(row.get('district', 'Unknown')).lower() | |
| base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629)) | |
| # Safer Default if state not found | |
| lat_scale, lon_scale = state_spreads.get(state, (0.7, 0.7)) | |
| lat_bias, lon_bias = 0, 0 | |
| bias = 0.6 | |
| if 'north' in district: lat_bias += lat_scale * bias | |
| if 'south' in district: lat_bias -= lat_scale * bias | |
| if 'east' in district: lon_bias += lon_scale * bias | |
| if 'west' in district: lon_bias -= lon_scale * bias | |
| np.random.seed(hash(state + district) % 2**32) | |
| rf = 0.5 if (lat_bias or lon_bias) else 1.0 | |
| return pd.Series({ | |
| 'lat': base_lat + lat_bias + np.random.uniform(-lat_scale*rf, lat_scale*rf) + np.random.normal(0, 0.04), | |
| 'lon': base_lon + lon_bias + np.random.uniform(-lon_scale*rf, lon_scale*rf) + np.random.normal(0, 0.04) | |
| }) | |
| coords = df.apply(get_coords, axis=1) | |
| df['lat'], df['lon'] = coords['lat'], coords['lon'] | |
| df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical']) | |
| return df | |
| with st.spinner('Loading S.T.A.R.K AI System...'): | |
| df = load_data() | |
| # Toast moved outside cached function | |
| # st.toast("β Data loaded successfully", icon="β ") | |
| # 4. SIDEBAR & FILTERS | |
| with st.sidebar: | |
| st.markdown("### π‘οΈ S.T.A.R.K AI Control") | |
| st.markdown("---") | |
| if 'date' in df.columns: | |
| min_d, max_d = df['date'].min().date(), df['date'].max().date() | |
| dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d) | |
| if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])] | |
| state_list = ['All'] + sorted(df['state'].unique().tolist()) | |
| sel_state = st.selectbox("State", state_list) | |
| filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy() | |
| dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist()) | |
| sel_dist = st.selectbox("District", dist_list) | |
| if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist] | |
| st.markdown("---") | |
| risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical']) | |
| if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)] | |
| st.markdown("---") | |
| st.link_button("π Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True) | |
| st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}") | |
| # 5. HEADER & METRICS | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| st.title("π‘οΈ S.T.A.R.K AI Dashboard") | |
| st.markdown("**Context-Aware Fraud Detection & Prevention System**") | |
| with col2: | |
| st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">β System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True) | |
| st.markdown("---") | |
| m1, m2, m3, m4, m5 = st.columns(5) | |
| total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85]) | |
| m1.metric("Total Centers", f"{total:,}", border=True) | |
| m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True) | |
| m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True) | |
| m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True) | |
| m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True) | |
| st.markdown("##") | |
| # 6. TABS | |
| tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πΊοΈ Geographic Risk", "π Priority List", "π Patterns", "π AI Insights"]) | |
| with tab_map: | |
| c_map, c_det = st.columns([3, 1]) | |
| with c_map: | |
| if not filtered_df.empty: | |
| fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity", | |
| color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=4.8 if sel_state != 'All' else 3.8, | |
| center={"lat": 22.0, "lon": 80.0}, hover_name="district", mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>") | |
| fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0}) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: st.warning("No data found.") | |
| with c_det: | |
| st.subheader("π₯ Top Hotspots") | |
| if not filtered_df.empty: | |
| top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5) | |
| for i, (d, r) in enumerate(top.iterrows(), 1): | |
| clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else ("#f97316", "HIGH") | |
| st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True) | |
| with tab_list: | |
| st.subheader("π― Priority Investigation") | |
| targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False) | |
| csv = targets.to_csv(index=False).encode('utf-8') | |
| st.download_button("π₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary") | |
| st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']], | |
| column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True) | |
| with tab_charts: | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.markdown("**Ghost ID Detection**") | |
| fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE", | |
| color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350) | |
| fig.add_hline(y=0.2, line_dash="dash", line_color="red") | |
| st.plotly_chart(fig, use_container_width=True) | |
| with c2: | |
| st.markdown("**Weekend Activity Analysis**") | |
| wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index() | |
| wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'}) | |
| fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tab_insights: | |
| st.subheader("π AI Detective Insights") | |
| if not filtered_df.empty: | |
| anom = filtered_df[filtered_df['ratio_deviation'] > 0.4] | |
| st.info(f"π€ **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Ο from mean).") | |
| c_i1, c_i2 = st.columns(2) | |
| with c_i1: | |
| st.markdown("#### π¨ Primary Risk Factors") | |
| st.markdown("- **High Volume on Weekends:** 28% correlation with fraud") | |
| st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs") | |
| with c_i2: | |
| st.markdown("#### π‘ Recommended Actions") | |
| st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score") | |
| st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster") | |
| st.markdown("---") | |
| st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True) |