import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go import numpy as np from datetime import datetime # 1. PAGE CONFIGURATION st.set_page_config( page_title="S.T.A.R.K AI | UIDAI Fraud Detection", page_icon="", layout="wide", initial_sidebar_state="expanded" ) # 2. PROFESSIONAL STYLING (THEME OVERRIDE) st.markdown(""" """, unsafe_allow_html=True) # 3. SMART DATA LOADING (MAPPING) @st.cache_data def load_data(): # 1. Load or Generate Data try: df = pd.read_csv('analyzed_aadhaar_data.csv') except FileNotFoundError: # Dummy Data Generator if file missing dates = pd.date_range(start="2025-01-01", periods=200) df = pd.DataFrame({ 'date': dates, 'state': np.random.choice(['Maharashtra', 'Uttar Pradesh', 'Bihar', 'Karnataka', 'Delhi', 'West Bengal', 'Tamil Nadu', 'Gujarat', 'Rajasthan', 'Kerala'], 200), 'district': np.random.choice(['North', 'South', 'East', 'West', 'Central', 'Rural A', 'Urban B'], 200), 'pincode': np.random.randint(110001, 800000, 200), 'RISK_SCORE': np.random.uniform(15, 99, 200), 'total_activity': np.random.randint(50, 800, 200), 'enrol_adult': np.random.randint(10, 400, 200), 'ratio_deviation': np.random.uniform(-0.15, 0.6, 200), 'is_weekend': np.random.choice([0, 1], 200, p=[0.7, 0.3]) }) # Standardize Date if 'date' in df.columns: df['date'] = pd.to_datetime(df['date']) # SMART GEO-CLUSTERING LOGIC # Comprehensive Center Points for Indian States & UTs state_centers = { 'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400), 'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131), 'Chandigarh': (30.7333, 76.7794), 'Chhattisgarh': (21.2787, 81.8661), 'Dadra and Nagar Haveli and Daman and Diu': (20.4283, 72.8397), 'Delhi': (28.7041, 77.1025), 'Goa': (15.2993, 74.1240), 'Gujarat': (22.2587, 71.1924), 'Haryana': (29.0588, 76.0856), 'Himachal Pradesh': (31.9579, 77.1095), 'Jammu and Kashmir': (33.7782, 76.5762), 'Jharkhand': (23.6102, 85.2799), 'Karnataka': (15.3173, 75.7139), 'Kerala': (10.8505, 76.2711), 'Ladakh': (34.1526, 77.5770), 'Lakshadweep': (10.5667, 72.6417), 'Madhya Pradesh': (22.9734, 78.6569), 'Maharashtra': (19.7515, 75.7139), 'Manipur': (24.6637, 93.9063), 'Meghalaya': (25.4670, 91.3662), 'Mizoram': (23.1645, 92.9376), 'Nagaland': (26.1584, 94.5624), 'Odisha': (20.9517, 85.0985), 'Puducherry': (11.9416, 79.8083), 'Punjab': (31.1471, 75.3412), 'Rajasthan': (27.0238, 74.2179), 'Sikkim': (27.5330, 88.5122), 'Tamil Nadu': (11.1271, 78.6569), 'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462), 'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550) } def get_coords(row): state = row.get('state', 'Delhi') district = str(row.get('district', 'Unknown')) # 1. Get State Base Coordinates base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629)) # Default to India Center # 2. DETERMINISTIC HASHING FOR DISTRICT # This ensures "District A" is ALWAYS in the same spot relative to the State Center # Creates distinct clusters instead of random noise district_hash = hash(state + district) np.random.seed(district_hash % 2**32) # Offset the district center by up to 1.5 degrees (~150km) from state center dist_lat_offset = np.random.uniform(-1.5, 1.5) dist_lon_offset = np.random.uniform(-1.5, 1.5) # 3. INDIVIDUAL CENTER JITTER # Add tiny random noise (~4km) so points don't stack perfectly # We re-seed with None to get true randomness for the jitter np.random.seed(None) noise_lat = np.random.normal(0, 0.04) noise_lon = np.random.normal(0, 0.04) return pd.Series({ 'lat': base_lat + dist_lat_offset + noise_lat, 'lon': base_lon + dist_lon_offset + noise_lon }) # Apply coordinates coords = df.apply(get_coords, axis=1) df['lat'] = coords['lat'] df['lon'] = coords['lon'] # Risk Categories df['risk_category'] = pd.cut( df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'] ) return df # Load Data df = load_data() # 4. SIDEBAR & FILTERS with st.sidebar: st.markdown("### S.T.A.R.K AI Control") st.markdown("---") # State Filter state_list = ['All'] + sorted(df['state'].unique().tolist()) selected_state = st.selectbox("Select State", state_list) # District Filter if selected_state != 'All': filtered_df = df[df['state'] == selected_state] district_list = ['All'] + sorted(filtered_df['district'].unique().tolist()) else: filtered_df = df.copy() district_list = ['All'] selected_district = st.selectbox("Select District", district_list) if selected_district != 'All': filtered_df = filtered_df[filtered_df['district'] == selected_district] st.markdown("---") # Risk Filter risk_filter = st.multiselect( "Risk Level", options=['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'] ) if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)] st.markdown("---") # Links st.markdown("**Resources**") st.link_button("Open Notebook in Colab", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing") st.markdown("---") st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571") # 5. HEADER & KPI METRICS col1, col2 = st.columns([3, 1]) with col1: st.title("Project S.T.A.R.K AI Dashboard") st.markdown("Context-Aware Fraud Detection System") with col2: st.markdown("""