Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| from datetime import datetime | |
| # 1. PAGE CONFIGURATION | |
| st.set_page_config( | |
| page_title="S.T.A.R.K AI | UIDAI Fraud Detection", | |
| page_icon="", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # 2. PROFESSIONAL STYLING (THEME OVERRIDE) | |
| st.markdown(""" | |
| <style> | |
| /* IMPORT FONTS */ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); | |
| /* FORCE LIGHT THEME BACKGROUNDS & TEXT */ | |
| .stApp { | |
| background-color: #f8fafc; /* Light Blue-Grey */ | |
| color: #0f172a; /* Slate 900 */ | |
| font-family: 'Inter', sans-serif; | |
| } | |
| /* METRIC CARDS - GLASSMORPHISM */ | |
| div[data-testid="stMetric"] { | |
| background-color: #ffffff; | |
| border: 1px solid #e2e8f0; | |
| border-radius: 8px; | |
| padding: 15px; | |
| box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); | |
| transition: transform 0.2s; | |
| } | |
| div[data-testid="stMetric"]:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1); | |
| } | |
| /* FORCE DARK TEXT FOR METRICS (Fixes White-on-White) */ | |
| div[data-testid="stMetricValue"] { | |
| color: #0f172a !important; | |
| font-weight: 700 !important; | |
| } | |
| div[data-testid="stMetricLabel"] { | |
| color: #64748b !important; /* Slate 500 */ | |
| } | |
| /* DATAFRAME STYLING (Fixes White-on-White) */ | |
| div[data-testid="stDataFrame"] div[role="grid"] { | |
| color: #334155 !important; /* Slate 700 */ | |
| background-color: white !important; | |
| } | |
| div[data-testid="stDataFrame"] div[role="columnheader"] { | |
| color: #0f172a !important; | |
| font-weight: 600 !important; | |
| background-color: #f1f5f9 !important; | |
| } | |
| /* SIDEBAR STYLING */ | |
| [data-testid="stSidebar"] { | |
| background-color: #1e293b; /* Slate 800 */ | |
| } | |
| [data-testid="stSidebar"] * { | |
| color: #f8fafc !important; /* Light text for sidebar */ | |
| } | |
| [data-testid="stSidebar"] .stSelectbox label, | |
| [data-testid="stSidebar"] .stMultiSelect label { | |
| color: #94a3b8 !important; | |
| } | |
| /* HEADERS */ | |
| h1, h2, h3 { | |
| color: #0f172a !important; | |
| font-weight: 700 !important; | |
| } | |
| /* CUSTOM BADGES */ | |
| .status-badge { | |
| display: inline-flex; | |
| align-items: center; | |
| padding: 4px 12px; | |
| border-radius: 9999px; | |
| font-size: 12px; | |
| font-weight: 600; | |
| } | |
| .bg-red { background-color: #fee2e2; color: #991b1b; } | |
| .bg-green { background-color: #dcfce7; color: #166534; } | |
| /* MAP CANVAS FIX */ | |
| .js-plotly-plot .plotly .main-svg { | |
| background-color: rgba(0,0,0,0) !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # 3. SMART DATA LOADING (MAPPING) | |
| def load_data(): | |
| # 1. Load or Generate Data | |
| try: | |
| df = pd.read_csv('analyzed_aadhaar_data.csv') | |
| except FileNotFoundError: | |
| # Dummy Data Generator if file missing | |
| dates = pd.date_range(start="2025-01-01", periods=200) | |
| df = pd.DataFrame({ | |
| 'date': dates, | |
| 'state': np.random.choice(['Maharashtra', 'Uttar Pradesh', 'Bihar', 'Karnataka', 'Delhi', 'West Bengal', 'Tamil Nadu', 'Gujarat', 'Rajasthan', 'Kerala'], 200), | |
| 'district': np.random.choice(['North', 'South', 'East', 'West', 'Central', 'Rural A', 'Urban B'], 200), | |
| 'pincode': np.random.randint(110001, 800000, 200), | |
| 'RISK_SCORE': np.random.uniform(15, 99, 200), | |
| 'total_activity': np.random.randint(50, 800, 200), | |
| 'enrol_adult': np.random.randint(10, 400, 200), | |
| 'ratio_deviation': np.random.uniform(-0.15, 0.6, 200), | |
| 'is_weekend': np.random.choice([0, 1], 200, p=[0.7, 0.3]) | |
| }) | |
| # Standardize Date | |
| if 'date' in df.columns: | |
| df['date'] = pd.to_datetime(df['date']) | |
| # SMART GEO-CLUSTERING LOGIC | |
| # Comprehensive Center Points for Indian States & UTs | |
| state_centers = { | |
| 'Andaman and Nicobar Islands': (11.7401, 92.6586), | |
| 'Andhra Pradesh': (15.9129, 79.7400), | |
| 'Arunachal Pradesh': (28.2180, 94.7278), | |
| 'Assam': (26.2006, 92.9376), | |
| 'Bihar': (25.0961, 85.3131), | |
| 'Chandigarh': (30.7333, 76.7794), | |
| 'Chhattisgarh': (21.2787, 81.8661), | |
| 'Dadra and Nagar Haveli and Daman and Diu': (20.4283, 72.8397), | |
| 'Delhi': (28.7041, 77.1025), | |
| 'Goa': (15.2993, 74.1240), | |
| 'Gujarat': (22.2587, 71.1924), | |
| 'Haryana': (29.0588, 76.0856), | |
| 'Himachal Pradesh': (31.9579, 77.1095), | |
| 'Jammu and Kashmir': (33.7782, 76.5762), | |
| 'Jharkhand': (23.6102, 85.2799), | |
| 'Karnataka': (15.3173, 75.7139), | |
| 'Kerala': (10.8505, 76.2711), | |
| 'Ladakh': (34.1526, 77.5770), | |
| 'Lakshadweep': (10.5667, 72.6417), | |
| 'Madhya Pradesh': (22.9734, 78.6569), | |
| 'Maharashtra': (19.7515, 75.7139), | |
| 'Manipur': (24.6637, 93.9063), | |
| 'Meghalaya': (25.4670, 91.3662), | |
| 'Mizoram': (23.1645, 92.9376), | |
| 'Nagaland': (26.1584, 94.5624), | |
| 'Odisha': (20.9517, 85.0985), | |
| 'Puducherry': (11.9416, 79.8083), | |
| 'Punjab': (31.1471, 75.3412), | |
| 'Rajasthan': (27.0238, 74.2179), | |
| 'Sikkim': (27.5330, 88.5122), | |
| 'Tamil Nadu': (11.1271, 78.6569), | |
| 'Telangana': (18.1124, 79.0193), | |
| 'Tripura': (23.9408, 91.9882), | |
| 'Uttar Pradesh': (26.8467, 80.9462), | |
| 'Uttarakhand': (30.0668, 79.0193), | |
| 'West Bengal': (22.9868, 87.8550) | |
| } | |
| def get_coords(row): | |
| state = row.get('state', 'Delhi') | |
| district = str(row.get('district', 'Unknown')) | |
| # 1. Get State Base Coordinates | |
| base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629)) # Default to India Center | |
| # 2. DETERMINISTIC HASHING FOR DISTRICT | |
| # This ensures "District A" is ALWAYS in the same spot relative to the State Center | |
| # Creates distinct clusters instead of random noise | |
| district_hash = hash(state + district) | |
| np.random.seed(district_hash % 2**32) | |
| # Offset the district center by up to 1.5 degrees (~150km) from state center | |
| dist_lat_offset = np.random.uniform(-1.5, 1.5) | |
| dist_lon_offset = np.random.uniform(-1.5, 1.5) | |
| # 3. INDIVIDUAL CENTER JITTER | |
| # Add tiny random noise (~4km) so points don't stack perfectly | |
| # We re-seed with None to get true randomness for the jitter | |
| np.random.seed(None) | |
| noise_lat = np.random.normal(0, 0.04) | |
| noise_lon = np.random.normal(0, 0.04) | |
| return pd.Series({ | |
| 'lat': base_lat + dist_lat_offset + noise_lat, | |
| 'lon': base_lon + dist_lon_offset + noise_lon | |
| }) | |
| # Apply coordinates | |
| coords = df.apply(get_coords, axis=1) | |
| df['lat'] = coords['lat'] | |
| df['lon'] = coords['lon'] | |
| # Risk Categories | |
| df['risk_category'] = pd.cut( | |
| df['RISK_SCORE'], | |
| bins=[-1, 50, 75, 85, 100], | |
| labels=['Low', 'Medium', 'High', 'Critical'] | |
| ) | |
| return df | |
| # Load Data | |
| df = load_data() | |
| # 4. SIDEBAR & FILTERS | |
| with st.sidebar: | |
| st.markdown("### S.T.A.R.K AI Control") | |
| st.markdown("---") | |
| # State Filter | |
| state_list = ['All'] + sorted(df['state'].unique().tolist()) | |
| selected_state = st.selectbox("Select State", state_list) | |
| # District Filter | |
| if selected_state != 'All': | |
| filtered_df = df[df['state'] == selected_state] | |
| district_list = ['All'] + sorted(filtered_df['district'].unique().tolist()) | |
| else: | |
| filtered_df = df.copy() | |
| district_list = ['All'] | |
| selected_district = st.selectbox("Select District", district_list) | |
| if selected_district != 'All': | |
| filtered_df = filtered_df[filtered_df['district'] == selected_district] | |
| st.markdown("---") | |
| # Risk Filter | |
| risk_filter = st.multiselect( | |
| "Risk Level", | |
| options=['Low', 'Medium', 'High', 'Critical'], | |
| default=['High', 'Critical'] | |
| ) | |
| if risk_filter: | |
| filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)] | |
| st.markdown("---") | |
| # Links | |
| st.markdown("**Resources**") | |
| st.link_button("Open Notebook in Colab", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing") | |
| st.markdown("---") | |
| st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571") | |
| # 5. HEADER & KPI METRICS | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| st.title("Project S.T.A.R.K AI Dashboard") | |
| st.markdown("Context-Aware Fraud Detection System") | |
| with col2: | |
| st.markdown(""" | |
| <div style="text-align: right; padding-top: 20px;"> | |
| <span class="status-badge bg-green">System Online</span> | |
| <div style="font-size: 12px; color: #64748b; margin-top: 5px;">Live Monitor</div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown("---") | |
| # METRICS ROW | |
| m1, m2, m3, m4 = st.columns(4) | |
| total_centers = len(filtered_df) | |
| high_risk = len(filtered_df[filtered_df['RISK_SCORE'] > 75]) | |
| avg_risk = filtered_df['RISK_SCORE'].mean() if not filtered_df.empty else 0 | |
| weekend_alerts = len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)]) | |
| m1.metric("Total Centers", f"{total_centers:,}", border=True) | |
| m2.metric("High Risk Alerts", f"{high_risk}", delta="Action Required", delta_color="inverse", border=True) | |
| m3.metric("Avg. Risk Score", f"{avg_risk:.1f}/100", border=True) | |
| m4.metric("Weekend Spikes", f"{weekend_alerts}", "Unauthorized", delta_color="off", border=True) | |
| st.markdown("##") # Spacer | |
| # 6. MAIN TABS | |
| tab_map, tab_list, tab_charts = st.tabs(["Geographic Risk", "Priority List", "Pattern Analytics"]) | |
| # TAB 1: GEOGRAPHIC RISK (MAP) | |
| with tab_map: | |
| col_map, col_details = st.columns([3, 1]) | |
| with col_map: | |
| if not filtered_df.empty: | |
| # Using Open-Street-Map for better contrast and no-token requirement | |
| fig_map = px.scatter_mapbox( | |
| filtered_df, | |
| lat="lat", | |
| lon="lon", | |
| color="RISK_SCORE", | |
| size="total_activity", | |
| # Traffic Light Colors: Green -> Yellow -> Red | |
| color_continuous_scale=["#22c55e", "#eab308", "#ef4444"], | |
| size_max=20, | |
| zoom=4.5 if selected_state != 'All' else 3.5, | |
| center={"lat": 22.0, "lon": 80.0}, # Center of India | |
| hover_name="pincode", | |
| hover_data={"district": True, "state": True, "RISK_SCORE": True, "lat": False, "lon": False}, | |
| mapbox_style="open-street-map", | |
| height=600, | |
| title="<b>Live Fraud Risk Heatmap</b>" | |
| ) | |
| fig_map.update_layout(margin={"r":0,"t":40,"l":0,"b":0}) | |
| st.plotly_chart(fig_map, use_container_width=True) | |
| else: | |
| st.warning("No data matches current filters.") | |
| with col_details: | |
| st.subheader("Top Hotspots") | |
| if not filtered_df.empty: | |
| top_districts = filtered_df.groupby('district')['RISK_SCORE'].mean().sort_values(ascending=False).head(5) | |
| for district, score in top_districts.items(): | |
| # Color code the side bar | |
| color = "#ef4444" if score > 80 else "#f59e0b" | |
| st.markdown(f""" | |
| <div style="background: white; padding: 12px; border-radius: 8px; border-left: 5px solid {color}; margin-bottom: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);"> | |
| <div style="font-weight: 600; color: #1e293b;">{district}</div> | |
| <div style="font-size: 13px; color: #64748b;">Avg Risk: <b>{score:.1f}</b></div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # TAB 2: PRIORITY LIST (DATAFRAME) | |
| with tab_list: | |
| st.subheader("Target Investigation List") | |
| st.markdown("Filter: *Showing centers with Risk Score > 75*") | |
| target_list = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False) | |
| st.dataframe( | |
| target_list[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']], | |
| column_config={ | |
| "RISK_SCORE": st.column_config.ProgressColumn( | |
| "Risk Probability", | |
| help="Probability of fraud based on context analysis", | |
| format="%d%%", | |
| min_value=0, | |
| max_value=100, | |
| ), | |
| "date": st.column_config.DateColumn("Date", format="DD MMM YYYY"), | |
| "total_activity": st.column_config.NumberColumn("Volume"), | |
| "enrol_adult": st.column_config.NumberColumn("Adult Enrols"), | |
| }, | |
| use_container_width=True, | |
| hide_index=True, | |
| height=400 | |
| ) | |
| # Export Button | |
| csv = target_list.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| "Download CSV", | |
| data=csv, | |
| file_name="uidai_S.T.A.R.K AI_priority_list.csv", | |
| mime="text/csv", | |
| type="primary" | |
| ) | |
| # --- TAB 3: CHARTS --- | |
| with tab_charts: | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.subheader("Ghost ID Pattern (Ratio Deviation)") | |
| # Scatter Plot | |
| fig_scatter = px.scatter( | |
| filtered_df, | |
| x="total_activity", | |
| y="ratio_deviation", | |
| color="risk_category", | |
| color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, | |
| title="Deviation from District Baseline", | |
| labels={"ratio_deviation": "Deviation Score", "total_activity": "Daily Transactions"}, | |
| hover_data=['pincode', 'district'] | |
| ) | |
| fig_scatter.add_hline(y=0.2, line_dash="dash", line_color="red", annotation_text="Fraud Threshold") | |
| st.plotly_chart(fig_scatter, use_container_width=True) | |
| with c2: | |
| st.subheader("Risk Distribution") | |
| # Histogram | |
| fig_hist = px.histogram( | |
| filtered_df, | |
| x="RISK_SCORE", | |
| nbins=20, | |
| color_discrete_sequence=['#3b82f6'], | |
| title="Frequency of Risk Scores" | |
| ) | |
| fig_hist.update_layout(bargap=0.1) | |
| st.plotly_chart(fig_hist, use_container_width=True) | |
| # 7. FOOTER | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style="text-align: center; font-size: 13px; color: #94a3b8;"> | |
| <b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026 | Team UIDAI_4571<br> | |
| <i>Confidential - For Official Use Only</i> | |
| </div> | |
| """, unsafe_allow_html=True) |