Spaces:

LovnishVerma
/

UIDAI

Sleeping

File size: 16,564 Bytes

db4a594
 
 
 
 
d89ad44
 
55e7233
 
30fc09f
db4a594
86265dd
db4a594
a53bb64
538e361
db4a594
 
 
 
a009ce9
db4a594
 
30fc09f
ec41653
a009ce9
6d804cc
a009ce9
 
 
 
837e300
 
a009ce9
 
 
837e300
a009ce9
 
86265dd
30fc09f
a009ce9
 
 
f2075fc
a009ce9
 
 
 
 
 
 
1489a05
a009ce9
6d804cc
a009ce9
6d804cc
837e300
a009ce9
6d804cc
 
a009ce9
 
 
6d804cc
a009ce9
837e300
7b77dfb
a009ce9
 
06659a7
837e300
a009ce9
 
 
 
 
837e300
 
7b77dfb
a009ce9
 
 
 
e179851
a009ce9
 
 
 
 
e179851
a009ce9
 
 
 
 
 
 
 
 
837e300
6d804cc
ec41653
a009ce9
 
 
 
 
 
 
 
db4a594
 
 
6d804cc
d89ad44
6d804cc
 
 
55e7233
 
 
6d804cc
 
 
 
 
 
 
a009ce9
55e7233
6d804cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31800ef
6d804cc
 
d89ad44
 
6d804cc
 
 
1ef7e77
6d804cc
 
d89ad44
6d804cc
 
d89ad44
6d804cc
 
 
 
 
 
55e7233
6d804cc
a009ce9
6d804cc
31800ef
d89ad44
31800ef
55e7233
6d804cc
 
 
31800ef
d89ad44
 
 
30fc09f
db4a594
79c14a3
 
 
a009ce9
31800ef
 
 
86265dd
d89ad44
 
31800ef
f49bfe4
 
dde2d7e
e0fab4e
 
 
 
 
 
 
f49bfe4
 
 
 
 
 
31800ef
6d804cc
 
 
 
 
31800ef
d89ad44
6d804cc
 
 
 
 
 
 
d89ad44
 
 
 
a009ce9
86265dd
ec41653
a009ce9
3cb671d
db4a594
d89ad44
db4a594
a53bb64
ec41653
31800ef
79c14a3
 
 
a009ce9
31800ef
a009ce9
31800ef
79c14a3
 
a009ce9
31800ef
79c14a3
 
31800ef
 
 
79c14a3
a009ce9
31800ef
a009ce9
79c14a3
 
31800ef
1b754da
a009ce9
 
db4a594
d89ad44
14bb62b
 
a53bb64
30fc09f
14bb62b
a009ce9
db4a594
14bb62b
79c14a3
 
 
a009ce9
 
 
 
79c14a3
a009ce9
 
 
 
79c14a3
a009ce9
79c14a3
f5f7959
db4a594
d89ad44
a009ce9
86265dd
14bb62b
30fc09f
 
86265dd
a009ce9
30fc09f
31800ef
a009ce9
 
31800ef
 
30fc09f
 
 
86265dd
a009ce9
30fc09f
a009ce9
 
db4a594
14bb62b
30fc09f
79c14a3
a009ce9
79c14a3
a009ce9
31800ef
 
ec41653
14bb62b
 
 
30fc09f
79c14a3
 
31800ef
79c14a3
 
14bb62b
30fc09f
79c14a3
a009ce9
 
 
79c14a3
30fc09f
 
 
 
 
a009ce9
30fc09f
 
 
a009ce9
 
30fc09f
 
a009ce9
 
ec41653
5a24c85
06659a7

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import requests
import time
import json
import os
from datetime import datetime, timedelta

# 1. PAGE CONFIGURATION
st.set_page_config(
    page_title="S.A.T.A.R.K AI | UIDAI Fraud Detection",
    page_icon="🛡️",
    layout="wide",
    initial_sidebar_state="expanded"
)

# 2. ROBUST CSS STYLING (Dark Mode Proof)
st.markdown("""
    <style>
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
        
        /* --- 1. MAIN CONTENT AREA (Light Theme Enforced) --- */
        /* Target only the main content, NOT the sidebar */
        .stApp > header { background-color: transparent !important; }
        
        div[data-testid="stAppViewContainer"] {
            background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
        }
        
        /* Force Dark Text in Main Area */
        section[data-testid="stMain"] * {
            color: #0f172a; /* Dark Blue Text */
        }
        
        /* Metric Cards in Main Area */
        div[data-testid="stMetric"] {
            background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
            border: 1px solid #e2e8f0; 
            border-radius: 12px; 
            box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);
        }
        div[data-testid="stMetricValue"] { color: #0f172a !important; }
        div[data-testid="stMetricLabel"] { color: #475569 !important; }

        /* --- 2. SIDEBAR (Dark Theme Enforced) --- */
        section[data-testid="stSidebar"] {
            background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%);
            border-right: 1px solid #334155;
        }

        /* NUCLEAR OPTION: Force ALL text in sidebar to be White */
        section[data-testid="stSidebar"] * {
            color: #f8fafc !important; /* White Text */
        }

        /* EXCEPTION: Inputs inside Sidebar (Selectbox, DateInput) */
        /* These usually have white backgrounds, so we need Dark Text inside them */
        section[data-testid="stSidebar"] input, 
        section[data-testid="stSidebar"] textarea, 
        section[data-testid="stSidebar"] div[data-baseweb="select"] div {
            color: #0f172a !important; /* Dark Text for Inputs */
            -webkit-text-fill-color: #0f172a !important;
        }
        
        /* Specific fix for the 'Selected Option' in dropdowns */
        div[role="listbox"] div {
            color: #0f172a !important;
        }

        /* --- 3. COMMON ELEMENTS --- */
        /* DataFrame Headers */
        div[data-testid="stDataFrame"] div[role="columnheader"] {
            background-color: #f1f5f9;
            color: #0f172a !important;
        }
        
        /* Link Button Style */
        section[data-testid="stSidebar"] a {
            background-color: #3b82f6 !important;
            color: white !important;
            text-decoration: none;
            padding: 8px 16px;
            border-radius: 8px;
            display: block;
            text-align: center;
            border: 1px solid #2563eb;
        }

        /* Hotspot Cards */
        .hotspot-card { 
            background: white; 
            padding: 16px; 
            border-radius: 10px; 
            border-left: 5px solid; 
            margin-bottom: 12px; 
            box-shadow: 0 2px 4px rgba(0,0,0,0.05); 
        }
        /* Since Hotspot Cards are in Main Area, text inherits Dark, which is good. */
        
        /* Status Badges */
        .status-badge { 
            display: inline-flex; align-items: center; 
            padding: 6px 14px; border-radius: 9999px; 
            font-size: 12px; font-weight: 700; 
            text-transform: uppercase; 
        }
        .bg-green { background: #dcfce7; color: #166534 !important; }
    </style>
""", unsafe_allow_html=True)

# 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
@st.cache_data(show_spinner=False)
def fetch_coordinates_batch(unique_locations):
    json_file = 'district_coords.json'
    coords_map = {}

    if os.path.exists(json_file):
        try:
            with open(json_file, 'r') as f:
                loaded_data = json.load(f)
                for k, v in loaded_data.items():
                    if "|" in k:
                        d, s = k.split("|")
                        coords_map[(d, s)] = tuple(v)
        except json.JSONDecodeError:
            pass

    prefills = {
        ('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
        ('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
        ('West Khasi Hills', 'Meghalaya'): (25.56, 91.29),
        ('Bijapur', 'Chhattisgarh'): (18.80, 80.82),
        ('Dhule', 'Maharashtra'): (20.90, 74.77),
        ('Dhamtari', 'Chhattisgarh'): (20.71, 81.55),
        ('Udupi', 'Karnataka'): (13.34, 74.75),
        ('Supaul', 'Bihar'): (26.29, 86.82),
        ('Puruliya', 'West Bengal'): (23.25, 86.50),
        ('Mumbai', 'Maharashtra'): (19.0760, 72.8777),
        ('Pune', 'Maharashtra'): (18.5204, 73.8567),
        ('Bangalore', 'Karnataka'): (12.9716, 77.5946),
        ('Bengaluru', 'Karnataka'): (12.9716, 77.5946),
        ('Chennai', 'Tamil Nadu'): (13.0827, 80.2707),
        ('Hyderabad', 'Telangana'): (17.3850, 78.4867),
        ('Kolkata', 'West Bengal'): (22.5726, 88.3639),
        ('Delhi', 'Delhi'): (28.7041, 77.1025),
        ('Shimla', 'Himachal Pradesh'): (31.1048, 77.1734)
    }
    for k, v in prefills.items():
        if k not in coords_map:
            coords_map[k] = v

    missing_locs = [loc for loc in unique_locations if loc not in coords_map]
    if not missing_locs:
        return coords_map

    progress_text = "📡 New locations found. Fetching coordinates..."
    my_bar = st.progress(0, text=progress_text)
    headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
    updated = False

    for i, (district, state) in enumerate(missing_locs):
        try:
            my_bar.progress((i + 1) / len(missing_locs), text=f"📍 Locating: {district}, {state}")
            query = f"{district}, {state}, India"
            url = "https://nominatim.openstreetmap.org/search"
            params = {'q': query, 'format': 'json', 'limit': 1}
            response = requests.get(url, params=params, headers=headers, timeout=5)

            if response.status_code == 200 and response.json():
                data = response.json()[0]
                coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
                updated = True
            time.sleep(1.1)
        except Exception:
            continue

    my_bar.empty()

    if updated:
        save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
        with open(json_file, 'w') as f:
            json.dump(save_data, f)

    return coords_map

# 4. MAIN DATA LOADER
@st.cache_data(ttl=300)
def load_data():
    try:
        df = pd.read_csv('analyzed_aadhaar_data.csv')
    except FileNotFoundError:
        return pd.DataFrame()

    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'])

    df['district'] = df['district'].astype(str).str.strip()
    df['state'] = df['state'].astype(str).str.strip()

    state_mapping = {
        'Jammu & Kashmir': 'Jammu and Kashmir',
        'J&K': 'Jammu and Kashmir',
        'Jammu And Kashmir': 'Jammu and Kashmir',
        'Andaman & Nicobar Islands': 'Andaman and Nicobar Islands',
        'Dadra and Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu',
        'Dadra & Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu',
        'Daman and Diu': 'Dadra and Nagar Haveli and Daman and Diu',
        'Daman & Diu': 'Dadra and Nagar Haveli and Daman and Diu',
        'The Dadra And Nagar Haveli And The Daman And Diu': 'Dadra and Nagar Haveli and Daman and Diu',
        'Orissa': 'Odisha',
        'Chattisgarh': 'Chhattisgarh',
        'Telengana': 'Telangana',
        'Pondicherry': 'Puducherry'
    }
    df['state'] = df['state'].replace(state_mapping)

    unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
    coords_db = fetch_coordinates_batch(unique_locs)
    state_centers = {
        'Delhi': (28.7041, 77.1025), 'Maharashtra': (19.7515, 75.7139), 'Karnataka': (15.3173, 75.7139)
    }

    def get_lat_lon(row):
        key = (row['district'], row['state'])
        if key in coords_db:
            lat, lon = coords_db[key]
            return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
        center = state_centers.get(row['state'], (20.5937, 78.9629))
        np.random.seed(hash(key) % 2**32)
        return pd.Series({'lat': center[0] + np.random.uniform(-0.5, 0.5), 'lon': center[1] + np.random.uniform(-0.5, 0.5)})

    coords = df.apply(get_lat_lon, axis=1)
    df['lat'] = coords['lat']
    df['lon'] = coords['lon']
    df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
    return df

with st.spinner('Initializing S.A.T.A.R.K AI...'):
    df = load_data()

# 5. SIDEBAR & FILTERS
with st.sidebar:
    st.markdown("### 🛡️ S.A.T.A.R.K AI Control")
    st.markdown("---")

    if not df.empty:
        if 'date' in df.columns:
            min_d, max_d = df['date'].min().date(), df['date'].max().date()
            dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
            if len(dr) == 2:
                df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]

        state_list = ['All'] + sorted(df['state'].unique().tolist())
        sel_state = st.selectbox("State", state_list)
        filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()

        dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
        sel_dist = st.selectbox("District", dist_list)
        if sel_dist != 'All':
            filtered_df = filtered_df[filtered_df['district'] == sel_dist]

        st.markdown("---")
        risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
        if risk_filter:
            filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
    else:
        filtered_df = pd.DataFrame()

    st.markdown("---")
    st.link_button("📓 Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
    st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")

# 6. HEADER & METRICS
col1, col2 = st.columns([3, 1])
with col1:
    st.title("🛡️ S.A.T.A.R.K AI Dashboard")
    st.markdown("**Context-Aware Fraud Detection & Prevention System**")
with col2:
    st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)

st.markdown("---")

if not filtered_df.empty:
    m1, m2, m3, m4, m5 = st.columns(5)
    total = len(filtered_df)
    high = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
    crit = len(filtered_df[filtered_df['RISK_SCORE'] > 85])
    
    m1.metric("Total Centers", f"{total:,}", border=True)
    m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
    m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
    m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100", border=True)
    m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
else:
    st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")

st.markdown("##")

# 7. TABS
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["🗺️ Geographic Risk", "📋 Priority List", "📊 Patterns", "🔍 AI Insights"])

with tab_map:
    c_map, c_det = st.columns([3, 1])
    with c_map:
        if not filtered_df.empty:
            zoom_lvl = 10 if sel_dist != 'All' else (6 if sel_state != 'All' else 3.8)
            fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
                                    color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
                                    center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
                                    hover_name="district", hover_data={"state": True, "pincode": True},
                                    mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
            fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
            st.plotly_chart(fig, use_container_width=True)
    with c_det:
        st.subheader("🔥 Top Hotspots")
        if not filtered_df.empty:
            top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
            for i, (d, r) in enumerate(top.iterrows(), 1):
                clr = "#ef4444" if r['RISK_SCORE'] > 85 else "#f97316"
                st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)

with tab_list:
    st.subheader("🎯 Priority Investigation")
    if not filtered_df.empty:
        targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
        csv = targets.to_csv(index=False).encode('utf-8')
        st.download_button("📥 Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
        st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
                     column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)

with tab_charts:
    c1, c2 = st.columns(2)
    with c1:
        st.markdown("**Ghost ID Detection**")
        if not filtered_df.empty:
            fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
                             color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
            fig.add_hline(y=0.2, line_dash="dash", line_color="red")
            st.plotly_chart(fig, use_container_width=True)
    with c2:
        st.markdown("**Weekend Activity Analysis**")
        if not filtered_df.empty:
            wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
            wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
            fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
            st.plotly_chart(fig, use_container_width=True)

with tab_insights:
    st.subheader("🔍 AI Detective Insights")
    if not filtered_df.empty:
        anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
        st.info(f"🤖 **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2σ from mean).")
        c_i1, c_i2 = st.columns(2)
        with c_i1:
            st.markdown("#### 🚨 Primary Risk Factors")
            st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
            st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
        with c_i2:
            st.markdown("#### 💡 Recommended Actions")
            st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
            st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")

st.markdown("---")
st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.A.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)