File size: 14,209 Bytes
db4a594
 
 
 
 
30fc09f
db4a594
86265dd
db4a594
6bd048f
538e361
db4a594
 
 
 
30fc09f
db4a594
 
30fc09f
 
ec41653
f5f7959
86265dd
30fc09f
 
 
db4a594
30fc09f
 
 
 
f5f7959
30fc09f
 
 
 
f2075fc
30fc09f
f5f7959
30fc09f
 
 
ec41653
30fc09f
 
 
 
 
 
5a24c85
30fc09f
 
 
 
 
 
 
 
f5f7959
db4a594
 
 
30fc09f
 
db4a594
3cb671d
 
 
86265dd
30fc09f
86265dd
30fc09f
14bb62b
30fc09f
 
 
 
 
 
 
 
 
 
 
 
f2075fc
14bb62b
30fc09f
f5f7959
30fc09f
 
 
 
 
 
 
 
 
f5f7959
 
538e361
 
30fc09f
538e361
 
30fc09f
 
538e361
30fc09f
 
538e361
30fc09f
 
 
 
f5f7959
30fc09f
 
538e361
 
30fc09f
 
538e361
f2075fc
14bb62b
30fc09f
 
86265dd
ec41653
3cb671d
 
 
 
db4a594
14bb62b
db4a594
30fc09f
ec41653
30fc09f
 
 
 
ec41653
86265dd
30fc09f
 
86265dd
30fc09f
 
 
ec41653
db4a594
30fc09f
 
ec41653
1b754da
30fc09f
 
db4a594
30fc09f
14bb62b
 
30fc09f
 
14bb62b
30fc09f
db4a594
14bb62b
30fc09f
 
 
 
 
 
 
f5f7959
db4a594
f5f7959
30fc09f
86265dd
14bb62b
30fc09f
 
86265dd
30fc09f
 
 
 
 
 
 
 
 
86265dd
30fc09f
 
 
 
db4a594
14bb62b
30fc09f
 
 
 
 
 
ec41653
14bb62b
 
 
30fc09f
 
 
 
 
14bb62b
30fc09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec41653
5a24c85
f5f7959
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from datetime import datetime, timedelta

# 1. PAGE CONFIGURATION
st.set_page_config(
    page_title="S.T.A.R.K AI | UIDAI Fraud Detection",
    page_icon="πŸ›‘οΈ",
    layout="wide",
    initial_sidebar_state="expanded"
)

# 2. ENHANCED PROFESSIONAL STYLING (Optimized)
st.markdown("""
    <style>
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
        .stApp { background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); color: #0f172a; font-family: 'Inter', sans-serif; }
        
        /* METRIC CARDS */
        div[data-testid="stMetric"] {
            background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
            border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px;
            box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); transition: transform 0.2s;
        }
        div[data-testid="stMetric"]:hover { transform: translateY(-2px); box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1); }
        div[data-testid="stMetricValue"] { color: #0f172a !important; font-weight: 800 !important; font-size: 2rem !important; }
        div[data-testid="stMetricLabel"] { color: #64748b !important; font-weight: 600 !important; text-transform: uppercase; font-size: 0.75rem; letter-spacing: 0.05em; }
        
        /* DATAFRAME */
        div[data-testid="stDataFrame"] { border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
        div[data-testid="stDataFrame"] div[role="columnheader"] { 
            background: linear-gradient(to bottom, #f8fafc, #f1f5f9) !important; 
            color: #0f172a !important; font-weight: 700 !important; border-bottom: 2px solid #cbd5e1 !important; 
        }
        
        /* SIDEBAR */
        [data-testid="stSidebar"] { background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%); border-right: 1px solid #334155; }
        [data-testid="stSidebar"] * { color: #f8fafc !important; }
        [data-testid="stSidebar"] .stSelectbox label { color: #cbd5e1 !important; }
        
        /* UI ELEMENTS */
        h1 { background: linear-gradient(135deg, #0f172a 0%, #334155 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800 !important; }
        .status-badge { display: inline-flex; align-items: center; padding: 6px 14px; border-radius: 9999px; font-size: 12px; font-weight: 700; text-transform: uppercase; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
        .bg-red { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); color: #991b1b; }
        .bg-green { background: linear-gradient(135deg, #dcfce7 0%, #bbf7d0 100%); color: #166534; }
        .bg-amber { background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); color: #92400e; }
        
        /* TABS & BUTTONS */
        .stTabs [data-baseweb="tab-list"] { gap: 8px; }
        .stTabs [aria-selected="true"] { background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%); color: white !important; }
        .stButton button { border-radius: 8px; font-weight: 600; }
        
        /* HOTSPOTS */
        .hotspot-card { background: white; padding: 16px; border-radius: 10px; border-left: 5px solid; margin-bottom: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); transition: all 0.2s; }
        .hotspot-card:hover { transform: translateX(4px); box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
        .js-plotly-plot .plotly .main-svg { background-color: rgba(0,0,0,0) !important; }
    </style>
""", unsafe_allow_html=True)

# 3. ENHANCED DATA LOADING
@st.cache_data(ttl=300)
def load_data():
    # Strictly load data from CSV
    df = pd.read_csv('analyzed_aadhaar_data.csv')
    # Removed st.toast from inside cached function to prevent CacheReplayClosureError

    if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
    
    # Precise Geometric Centers
    state_centers = {
        'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
        'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
        'Chandigarh': (30.7333, 76.7794), 'Chhattisgarh': (21.2787, 81.8661), 'Delhi': (28.7041, 77.1025),
        'Goa': (15.2993, 74.1240), 'Gujarat': (22.2587, 71.1924), 'Haryana': (29.0588, 76.0856),
        'Himachal Pradesh': (31.9579, 77.1095), 'Jammu and Kashmir': (33.7782, 76.5762), 'Jharkhand': (23.6102, 85.2799),
        'Karnataka': (15.3173, 75.7139), 'Kerala': (10.8505, 76.2711), 'Ladakh': (34.1526, 77.5770),
        'Madhya Pradesh': (22.9734, 78.6569), 'Maharashtra': (19.7515, 75.7139), 'Manipur': (24.6637, 93.9063),
        'Meghalaya': (25.4670, 91.3662), 'Mizoram': (23.1645, 92.9376), 'Nagaland': (26.1584, 94.5624),
        'Odisha': (20.9517, 85.0985), 'Puducherry': (11.9416, 79.8083), 'Punjab': (31.1471, 75.3412),
        'Rajasthan': (27.0238, 74.2179), 'Sikkim': (27.5330, 88.5122), 'Tamil Nadu': (11.1271, 78.6569),
        'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
        'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
    }

    # EXPANDED Aspect Ratio Definitions (Lat spread, Lon spread)
    state_spreads = {
        'Kerala': (1.2, 0.25), 'West Bengal': (1.4, 0.4), 'Assam': (0.4, 1.8), 
        'Maharashtra': (1.8, 2.2), 'Uttar Pradesh': (1.2, 2.5), 'Bihar': (0.8, 1.5), 
        'Delhi': (0.1, 0.12), 'Goa': (0.15, 0.15), 'Chandigarh': (0.04, 0.04),
        'Gujarat': (1.5, 1.8), 'Rajasthan': (2.0, 2.0), 'Madhya Pradesh': (1.8, 2.5), 
        'Himachal Pradesh': (0.6, 0.8), 'Punjab': (0.8, 0.9), 'Haryana': (0.9, 0.8),
        'Tamil Nadu': (1.2, 1.0), 'Karnataka': (1.5, 1.2), 'Telangana': (1.0, 1.0),
        'Andhra Pradesh': (1.5, 1.5), 'Odisha': (1.2, 1.2), 'Chhattisgarh': (1.5, 0.9),
        'Jharkhand': (0.8, 1.0), 'Jammu and Kashmir': (1.0, 1.5), 'Ladakh': (1.0, 1.5),
        'Uttarakhand': (0.7, 0.8)
    }
    
    def get_coords(row):
        state = row.get('state', 'Delhi')
        district = str(row.get('district', 'Unknown')).lower()
        base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629))
        
        # Safer Default if state not found
        lat_scale, lon_scale = state_spreads.get(state, (0.7, 0.7))
        
        lat_bias, lon_bias = 0, 0
        bias = 0.6  
        
        if 'north' in district: lat_bias += lat_scale * bias
        if 'south' in district: lat_bias -= lat_scale * bias
        if 'east' in district: lon_bias += lon_scale * bias
        if 'west' in district: lon_bias -= lon_scale * bias
        
        np.random.seed(hash(state + district) % 2**32) 
        rf = 0.5 if (lat_bias or lon_bias) else 1.0
        
        return pd.Series({
            'lat': base_lat + lat_bias + np.random.uniform(-lat_scale*rf, lat_scale*rf) + np.random.normal(0, 0.04),
            'lon': base_lon + lon_bias + np.random.uniform(-lon_scale*rf, lon_scale*rf) + np.random.normal(0, 0.04)
        })
    
    coords = df.apply(get_coords, axis=1)
    df['lat'], df['lon'] = coords['lat'], coords['lon']
    df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
    return df

with st.spinner('Loading S.T.A.R.K AI System...'): 
    df = load_data()
    # Toast moved outside cached function
    # st.toast("βœ… Data loaded successfully", icon="βœ…") 

# 4. SIDEBAR & FILTERS
with st.sidebar:
    st.markdown("### πŸ›‘οΈ S.T.A.R.K AI Control")
    st.markdown("---")
    if 'date' in df.columns:
        min_d, max_d = df['date'].min().date(), df['date'].max().date()
        dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
        if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
    
    state_list = ['All'] + sorted(df['state'].unique().tolist())
    sel_state = st.selectbox("State", state_list)
    filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
    
    dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
    sel_dist = st.selectbox("District", dist_list)
    if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
    
    st.markdown("---")
    risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
    if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
    
    st.markdown("---")
    st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
    st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")

# 5. HEADER & METRICS
col1, col2 = st.columns([3, 1])
with col1:
    st.title("πŸ›‘οΈ S.T.A.R.K AI Dashboard")
    st.markdown("**Context-Aware Fraud Detection & Prevention System**")
with col2:
    st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)

st.markdown("---")
m1, m2, m3, m4, m5 = st.columns(5)
total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
m1.metric("Total Centers", f"{total:,}", border=True)
m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
st.markdown("##")

# 6. TABS
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])

with tab_map:
    c_map, c_det = st.columns([3, 1])
    with c_map:
        if not filtered_df.empty:
            fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
                color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=4.8 if sel_state != 'All' else 3.8,
                center={"lat": 22.0, "lon": 80.0}, hover_name="district", mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
            fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
            st.plotly_chart(fig, use_container_width=True)
        else: st.warning("No data found.")
    
    with c_det:
        st.subheader("πŸ”₯ Top Hotspots")
        if not filtered_df.empty:
            top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
            for i, (d, r) in enumerate(top.iterrows(), 1):
                clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else ("#f97316", "HIGH")
                st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)

with tab_list:
    st.subheader("🎯 Priority Investigation")
    targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
    csv = targets.to_csv(index=False).encode('utf-8')
    st.download_button("πŸ“₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
    st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']], 
        column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)

with tab_charts:
    c1, c2 = st.columns(2)
    with c1:
        st.markdown("**Ghost ID Detection**")
        fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
            color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
        fig.add_hline(y=0.2, line_dash="dash", line_color="red")
        st.plotly_chart(fig, use_container_width=True)
    with c2:
        st.markdown("**Weekend Activity Analysis**")
        wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
        wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
        fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
        st.plotly_chart(fig, use_container_width=True)

with tab_insights:
    st.subheader("πŸ” AI Detective Insights")
    if not filtered_df.empty:
        anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
        st.info(f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
        
        c_i1, c_i2 = st.columns(2)
        with c_i1:
            st.markdown("#### 🚨 Primary Risk Factors")
            st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
            st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
        with c_i2:
            st.markdown("#### πŸ’‘ Recommended Actions")
            st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
            st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")

st.markdown("---")
st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)