Spaces:

LovnishVerma
/

UIDAI

Sleeping

File size: 14,209 Bytes

db4a594
 
 
 
 
30fc09f
db4a594
86265dd
db4a594
6bd048f
538e361
db4a594
 
 
 
30fc09f
db4a594
 
30fc09f
 
ec41653
f5f7959
86265dd
30fc09f
 
 
db4a594
30fc09f
 
 
 
f5f7959
30fc09f
 
 
 
f2075fc
30fc09f
f5f7959
30fc09f
 
 
ec41653
30fc09f
 
 
 
 
 
5a24c85
30fc09f
 
 
 
 
 
 
 
f5f7959
db4a594
 
 
30fc09f
 
db4a594
3cb671d
 
 
86265dd
30fc09f
86265dd
30fc09f
14bb62b
30fc09f
 
 
 
 
 
 
 
 
 
 
 
f2075fc
14bb62b
30fc09f
f5f7959
30fc09f
 
 
 
 
 
 
 
 
f5f7959
 
538e361
 
30fc09f
538e361
 
30fc09f
 
538e361
30fc09f
 
538e361
30fc09f
 
 
 
f5f7959
30fc09f
 
538e361
 
30fc09f
 
538e361
f2075fc
14bb62b
30fc09f
 
86265dd
ec41653
3cb671d
 
 
 
db4a594
14bb62b
db4a594
30fc09f
ec41653
30fc09f
 
 
 
ec41653
86265dd
30fc09f
 
86265dd
30fc09f
 
 
ec41653
db4a594
30fc09f
 
ec41653
1b754da
30fc09f
 
db4a594
30fc09f
14bb62b
 
30fc09f
 
14bb62b
30fc09f
db4a594
14bb62b
30fc09f
 
 
 
 
 
 
f5f7959
db4a594
f5f7959
30fc09f
86265dd
14bb62b
30fc09f
 
86265dd
30fc09f
 
 
 
 
 
 
 
 
86265dd
30fc09f
 
 
 
db4a594
14bb62b
30fc09f
 
 
 
 
 
ec41653
14bb62b
 
 
30fc09f
 
 
 
 
14bb62b
30fc09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec41653
5a24c85
f5f7959

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from datetime import datetime, timedelta

# 1. PAGE CONFIGURATION
st.set_page_config(
    page_title="S.T.A.R.K AI | UIDAI Fraud Detection",
    page_icon="🛡️",
    layout="wide",
    initial_sidebar_state="expanded"
)

# 2. ENHANCED PROFESSIONAL STYLING (Optimized)
st.markdown("""
    <style>
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
        .stApp { background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); color: #0f172a; font-family: 'Inter', sans-serif; }
        
        /* METRIC CARDS */
        div[data-testid="stMetric"] {
            background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
            border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px;
            box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); transition: transform 0.2s;
        }
        div[data-testid="stMetric"]:hover { transform: translateY(-2px); box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1); }
        div[data-testid="stMetricValue"] { color: #0f172a !important; font-weight: 800 !important; font-size: 2rem !important; }
        div[data-testid="stMetricLabel"] { color: #64748b !important; font-weight: 600 !important; text-transform: uppercase; font-size: 0.75rem; letter-spacing: 0.05em; }
        
        /* DATAFRAME */
        div[data-testid="stDataFrame"] { border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
        div[data-testid="stDataFrame"] div[role="columnheader"] { 
            background: linear-gradient(to bottom, #f8fafc, #f1f5f9) !important; 
            color: #0f172a !important; font-weight: 700 !important; border-bottom: 2px solid #cbd5e1 !important; 
        }
        
        /* SIDEBAR */
        [data-testid="stSidebar"] { background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%); border-right: 1px solid #334155; }
        [data-testid="stSidebar"] * { color: #f8fafc !important; }
        [data-testid="stSidebar"] .stSelectbox label { color: #cbd5e1 !important; }
        
        /* UI ELEMENTS */
        h1 { background: linear-gradient(135deg, #0f172a 0%, #334155 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800 !important; }
        .status-badge { display: inline-flex; align-items: center; padding: 6px 14px; border-radius: 9999px; font-size: 12px; font-weight: 700; text-transform: uppercase; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
        .bg-red { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); color: #991b1b; }
        .bg-green { background: linear-gradient(135deg, #dcfce7 0%, #bbf7d0 100%); color: #166534; }
        .bg-amber { background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); color: #92400e; }
        
        /* TABS & BUTTONS */
        .stTabs [data-baseweb="tab-list"] { gap: 8px; }
        .stTabs [aria-selected="true"] { background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%); color: white !important; }
        .stButton button { border-radius: 8px; font-weight: 600; }
        
        /* HOTSPOTS */
        .hotspot-card { background: white; padding: 16px; border-radius: 10px; border-left: 5px solid; margin-bottom: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); transition: all 0.2s; }
        .hotspot-card:hover { transform: translateX(4px); box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
        .js-plotly-plot .plotly .main-svg { background-color: rgba(0,0,0,0) !important; }
    </style>
""", unsafe_allow_html=True)

# 3. ENHANCED DATA LOADING
@st.cache_data(ttl=300)
def load_data():
    # Strictly load data from CSV
    df = pd.read_csv('analyzed_aadhaar_data.csv')
    # Removed st.toast from inside cached function to prevent CacheReplayClosureError

    if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
    
    # Precise Geometric Centers
    state_centers = {
        'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
        'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
        'Chandigarh': (30.7333, 76.7794), 'Chhattisgarh': (21.2787, 81.8661), 'Delhi': (28.7041, 77.1025),
        'Goa': (15.2993, 74.1240), 'Gujarat': (22.2587, 71.1924), 'Haryana': (29.0588, 76.0856),
        'Himachal Pradesh': (31.9579, 77.1095), 'Jammu and Kashmir': (33.7782, 76.5762), 'Jharkhand': (23.6102, 85.2799),
        'Karnataka': (15.3173, 75.7139), 'Kerala': (10.8505, 76.2711), 'Ladakh': (34.1526, 77.5770),
        'Madhya Pradesh': (22.9734, 78.6569), 'Maharashtra': (19.7515, 75.7139), 'Manipur': (24.6637, 93.9063),
        'Meghalaya': (25.4670, 91.3662), 'Mizoram': (23.1645, 92.9376), 'Nagaland': (26.1584, 94.5624),
        'Odisha': (20.9517, 85.0985), 'Puducherry': (11.9416, 79.8083), 'Punjab': (31.1471, 75.3412),
        'Rajasthan': (27.0238, 74.2179), 'Sikkim': (27.5330, 88.5122), 'Tamil Nadu': (11.1271, 78.6569),
        'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
        'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
    }

    # EXPANDED Aspect Ratio Definitions (Lat spread, Lon spread)
    state_spreads = {
        'Kerala': (1.2, 0.25), 'West Bengal': (1.4, 0.4), 'Assam': (0.4, 1.8), 
        'Maharashtra': (1.8, 2.2), 'Uttar Pradesh': (1.2, 2.5), 'Bihar': (0.8, 1.5), 
        'Delhi': (0.1, 0.12), 'Goa': (0.15, 0.15), 'Chandigarh': (0.04, 0.04),
        'Gujarat': (1.5, 1.8), 'Rajasthan': (2.0, 2.0), 'Madhya Pradesh': (1.8, 2.5), 
        'Himachal Pradesh': (0.6, 0.8), 'Punjab': (0.8, 0.9), 'Haryana': (0.9, 0.8),
        'Tamil Nadu': (1.2, 1.0), 'Karnataka': (1.5, 1.2), 'Telangana': (1.0, 1.0),
        'Andhra Pradesh': (1.5, 1.5), 'Odisha': (1.2, 1.2), 'Chhattisgarh': (1.5, 0.9),
        'Jharkhand': (0.8, 1.0), 'Jammu and Kashmir': (1.0, 1.5), 'Ladakh': (1.0, 1.5),
        'Uttarakhand': (0.7, 0.8)
    }
    
    def get_coords(row):
        state = row.get('state', 'Delhi')
        district = str(row.get('district', 'Unknown')).lower()
        base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629))
        
        # Safer Default if state not found
        lat_scale, lon_scale = state_spreads.get(state, (0.7, 0.7))
        
        lat_bias, lon_bias = 0, 0
        bias = 0.6  
        
        if 'north' in district: lat_bias += lat_scale * bias
        if 'south' in district: lat_bias -= lat_scale * bias
        if 'east' in district: lon_bias += lon_scale * bias
        if 'west' in district: lon_bias -= lon_scale * bias
        
        np.random.seed(hash(state + district) % 2**32) 
        rf = 0.5 if (lat_bias or lon_bias) else 1.0
        
        return pd.Series({
            'lat': base_lat + lat_bias + np.random.uniform(-lat_scale*rf, lat_scale*rf) + np.random.normal(0, 0.04),
            'lon': base_lon + lon_bias + np.random.uniform(-lon_scale*rf, lon_scale*rf) + np.random.normal(0, 0.04)
        })
    
    coords = df.apply(get_coords, axis=1)
    df['lat'], df['lon'] = coords['lat'], coords['lon']
    df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
    return df

with st.spinner('Loading S.T.A.R.K AI System...'): 
    df = load_data()
    # Toast moved outside cached function
    # st.toast("✅ Data loaded successfully", icon="✅") 

# 4. SIDEBAR & FILTERS
with st.sidebar:
    st.markdown("### 🛡️ S.T.A.R.K AI Control")
    st.markdown("---")
    if 'date' in df.columns:
        min_d, max_d = df['date'].min().date(), df['date'].max().date()
        dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
        if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
    
    state_list = ['All'] + sorted(df['state'].unique().tolist())
    sel_state = st.selectbox("State", state_list)
    filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
    
    dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
    sel_dist = st.selectbox("District", dist_list)
    if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
    
    st.markdown("---")
    risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
    if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
    
    st.markdown("---")
    st.link_button("📓 Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
    st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")

# 5. HEADER & METRICS
col1, col2 = st.columns([3, 1])
with col1:
    st.title("🛡️ S.T.A.R.K AI Dashboard")
    st.markdown("**Context-Aware Fraud Detection & Prevention System**")
with col2:
    st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)

st.markdown("---")
m1, m2, m3, m4, m5 = st.columns(5)
total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
m1.metric("Total Centers", f"{total:,}", border=True)
m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
st.markdown("##")

# 6. TABS
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["🗺️ Geographic Risk", "📋 Priority List", "📊 Patterns", "🔍 AI Insights"])

with tab_map:
    c_map, c_det = st.columns([3, 1])
    with c_map:
        if not filtered_df.empty:
            fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
                color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=4.8 if sel_state != 'All' else 3.8,
                center={"lat": 22.0, "lon": 80.0}, hover_name="district", mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
            fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
            st.plotly_chart(fig, use_container_width=True)
        else: st.warning("No data found.")
    
    with c_det:
        st.subheader("🔥 Top Hotspots")
        if not filtered_df.empty:
            top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
            for i, (d, r) in enumerate(top.iterrows(), 1):
                clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else ("#f97316", "HIGH")
                st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)

with tab_list:
    st.subheader("🎯 Priority Investigation")
    targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
    csv = targets.to_csv(index=False).encode('utf-8')
    st.download_button("📥 Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
    st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']], 
        column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)

with tab_charts:
    c1, c2 = st.columns(2)
    with c1:
        st.markdown("**Ghost ID Detection**")
        fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
            color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
        fig.add_hline(y=0.2, line_dash="dash", line_color="red")
        st.plotly_chart(fig, use_container_width=True)
    with c2:
        st.markdown("**Weekend Activity Analysis**")
        wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
        wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
        fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
        st.plotly_chart(fig, use_container_width=True)

with tab_insights:
    st.subheader("🔍 AI Detective Insights")
    if not filtered_df.empty:
        anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
        st.info(f"🤖 **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2σ from mean).")
        
        c_i1, c_i2 = st.columns(2)
        with c_i1:
            st.markdown("#### 🚨 Primary Risk Factors")
            st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
            st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
        with c_i2:
            st.markdown("#### 💡 Recommended Actions")
            st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
            st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")

st.markdown("---")
st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)