File size: 16,564 Bytes
db4a594
 
 
 
 
d89ad44
 
55e7233
 
30fc09f
db4a594
86265dd
db4a594
a53bb64
538e361
db4a594
 
 
 
a009ce9
db4a594
 
30fc09f
ec41653
a009ce9
6d804cc
a009ce9
 
 
 
837e300
 
a009ce9
 
 
837e300
a009ce9
 
86265dd
30fc09f
a009ce9
 
 
f2075fc
a009ce9
 
 
 
 
 
 
1489a05
a009ce9
6d804cc
a009ce9
6d804cc
837e300
a009ce9
6d804cc
 
a009ce9
 
 
6d804cc
a009ce9
837e300
7b77dfb
a009ce9
 
06659a7
837e300
a009ce9
 
 
 
 
837e300
 
7b77dfb
a009ce9
 
 
 
e179851
a009ce9
 
 
 
 
e179851
a009ce9
 
 
 
 
 
 
 
 
837e300
6d804cc
ec41653
a009ce9
 
 
 
 
 
 
 
db4a594
 
 
6d804cc
d89ad44
6d804cc
 
 
55e7233
 
 
6d804cc
 
 
 
 
 
 
a009ce9
55e7233
6d804cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31800ef
6d804cc
 
d89ad44
 
6d804cc
 
 
1ef7e77
6d804cc
 
d89ad44
6d804cc
 
d89ad44
6d804cc
 
 
 
 
 
55e7233
6d804cc
a009ce9
6d804cc
31800ef
d89ad44
31800ef
55e7233
6d804cc
 
 
31800ef
d89ad44
 
 
30fc09f
db4a594
79c14a3
 
 
a009ce9
31800ef
 
 
86265dd
d89ad44
 
31800ef
f49bfe4
 
dde2d7e
e0fab4e
 
 
 
 
 
 
f49bfe4
 
 
 
 
 
31800ef
6d804cc
 
 
 
 
31800ef
d89ad44
6d804cc
 
 
 
 
 
 
d89ad44
 
 
 
a009ce9
86265dd
ec41653
a009ce9
3cb671d
db4a594
d89ad44
db4a594
a53bb64
ec41653
31800ef
79c14a3
 
 
a009ce9
31800ef
a009ce9
31800ef
79c14a3
 
a009ce9
31800ef
79c14a3
 
31800ef
 
 
79c14a3
a009ce9
31800ef
a009ce9
79c14a3
 
31800ef
1b754da
a009ce9
 
db4a594
d89ad44
14bb62b
 
a53bb64
30fc09f
14bb62b
a009ce9
db4a594
14bb62b
79c14a3
 
 
a009ce9
 
 
 
79c14a3
a009ce9
 
 
 
79c14a3
a009ce9
79c14a3
f5f7959
db4a594
d89ad44
a009ce9
86265dd
14bb62b
30fc09f
 
86265dd
a009ce9
30fc09f
31800ef
a009ce9
 
31800ef
 
30fc09f
 
 
86265dd
a009ce9
30fc09f
a009ce9
 
db4a594
14bb62b
30fc09f
79c14a3
a009ce9
79c14a3
a009ce9
31800ef
 
ec41653
14bb62b
 
 
30fc09f
79c14a3
 
31800ef
79c14a3
 
14bb62b
30fc09f
79c14a3
a009ce9
 
 
79c14a3
30fc09f
 
 
 
 
a009ce9
30fc09f
 
 
a009ce9
 
30fc09f
 
a009ce9
 
ec41653
5a24c85
06659a7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import requests
import time
import json
import os
from datetime import datetime, timedelta

# 1. PAGE CONFIGURATION
st.set_page_config(
    page_title="S.A.T.A.R.K AI | UIDAI Fraud Detection",
    page_icon="πŸ›‘οΈ",
    layout="wide",
    initial_sidebar_state="expanded"
)

# 2. ROBUST CSS STYLING (Dark Mode Proof)
st.markdown("""
    <style>
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
        
        /* --- 1. MAIN CONTENT AREA (Light Theme Enforced) --- */
        /* Target only the main content, NOT the sidebar */
        .stApp > header { background-color: transparent !important; }
        
        div[data-testid="stAppViewContainer"] {
            background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
        }
        
        /* Force Dark Text in Main Area */
        section[data-testid="stMain"] * {
            color: #0f172a; /* Dark Blue Text */
        }
        
        /* Metric Cards in Main Area */
        div[data-testid="stMetric"] {
            background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
            border: 1px solid #e2e8f0; 
            border-radius: 12px; 
            box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);
        }
        div[data-testid="stMetricValue"] { color: #0f172a !important; }
        div[data-testid="stMetricLabel"] { color: #475569 !important; }

        /* --- 2. SIDEBAR (Dark Theme Enforced) --- */
        section[data-testid="stSidebar"] {
            background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%);
            border-right: 1px solid #334155;
        }

        /* NUCLEAR OPTION: Force ALL text in sidebar to be White */
        section[data-testid="stSidebar"] * {
            color: #f8fafc !important; /* White Text */
        }

        /* EXCEPTION: Inputs inside Sidebar (Selectbox, DateInput) */
        /* These usually have white backgrounds, so we need Dark Text inside them */
        section[data-testid="stSidebar"] input, 
        section[data-testid="stSidebar"] textarea, 
        section[data-testid="stSidebar"] div[data-baseweb="select"] div {
            color: #0f172a !important; /* Dark Text for Inputs */
            -webkit-text-fill-color: #0f172a !important;
        }
        
        /* Specific fix for the 'Selected Option' in dropdowns */
        div[role="listbox"] div {
            color: #0f172a !important;
        }

        /* --- 3. COMMON ELEMENTS --- */
        /* DataFrame Headers */
        div[data-testid="stDataFrame"] div[role="columnheader"] {
            background-color: #f1f5f9;
            color: #0f172a !important;
        }
        
        /* Link Button Style */
        section[data-testid="stSidebar"] a {
            background-color: #3b82f6 !important;
            color: white !important;
            text-decoration: none;
            padding: 8px 16px;
            border-radius: 8px;
            display: block;
            text-align: center;
            border: 1px solid #2563eb;
        }

        /* Hotspot Cards */
        .hotspot-card { 
            background: white; 
            padding: 16px; 
            border-radius: 10px; 
            border-left: 5px solid; 
            margin-bottom: 12px; 
            box-shadow: 0 2px 4px rgba(0,0,0,0.05); 
        }
        /* Since Hotspot Cards are in Main Area, text inherits Dark, which is good. */
        
        /* Status Badges */
        .status-badge { 
            display: inline-flex; align-items: center; 
            padding: 6px 14px; border-radius: 9999px; 
            font-size: 12px; font-weight: 700; 
            text-transform: uppercase; 
        }
        .bg-green { background: #dcfce7; color: #166534 !important; }
    </style>
""", unsafe_allow_html=True)

# 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
@st.cache_data(show_spinner=False)
def fetch_coordinates_batch(unique_locations):
    json_file = 'district_coords.json'
    coords_map = {}

    if os.path.exists(json_file):
        try:
            with open(json_file, 'r') as f:
                loaded_data = json.load(f)
                for k, v in loaded_data.items():
                    if "|" in k:
                        d, s = k.split("|")
                        coords_map[(d, s)] = tuple(v)
        except json.JSONDecodeError:
            pass

    prefills = {
        ('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
        ('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
        ('West Khasi Hills', 'Meghalaya'): (25.56, 91.29),
        ('Bijapur', 'Chhattisgarh'): (18.80, 80.82),
        ('Dhule', 'Maharashtra'): (20.90, 74.77),
        ('Dhamtari', 'Chhattisgarh'): (20.71, 81.55),
        ('Udupi', 'Karnataka'): (13.34, 74.75),
        ('Supaul', 'Bihar'): (26.29, 86.82),
        ('Puruliya', 'West Bengal'): (23.25, 86.50),
        ('Mumbai', 'Maharashtra'): (19.0760, 72.8777),
        ('Pune', 'Maharashtra'): (18.5204, 73.8567),
        ('Bangalore', 'Karnataka'): (12.9716, 77.5946),
        ('Bengaluru', 'Karnataka'): (12.9716, 77.5946),
        ('Chennai', 'Tamil Nadu'): (13.0827, 80.2707),
        ('Hyderabad', 'Telangana'): (17.3850, 78.4867),
        ('Kolkata', 'West Bengal'): (22.5726, 88.3639),
        ('Delhi', 'Delhi'): (28.7041, 77.1025),
        ('Shimla', 'Himachal Pradesh'): (31.1048, 77.1734)
    }
    for k, v in prefills.items():
        if k not in coords_map:
            coords_map[k] = v

    missing_locs = [loc for loc in unique_locations if loc not in coords_map]
    if not missing_locs:
        return coords_map

    progress_text = "πŸ“‘ New locations found. Fetching coordinates..."
    my_bar = st.progress(0, text=progress_text)
    headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
    updated = False

    for i, (district, state) in enumerate(missing_locs):
        try:
            my_bar.progress((i + 1) / len(missing_locs), text=f"πŸ“ Locating: {district}, {state}")
            query = f"{district}, {state}, India"
            url = "https://nominatim.openstreetmap.org/search"
            params = {'q': query, 'format': 'json', 'limit': 1}
            response = requests.get(url, params=params, headers=headers, timeout=5)

            if response.status_code == 200 and response.json():
                data = response.json()[0]
                coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
                updated = True
            time.sleep(1.1)
        except Exception:
            continue

    my_bar.empty()

    if updated:
        save_data = {f"{k[0]}|{k[1]}": v for k, v in coords_map.items()}
        with open(json_file, 'w') as f:
            json.dump(save_data, f)

    return coords_map

# 4. MAIN DATA LOADER
@st.cache_data(ttl=300)
def load_data():
    try:
        df = pd.read_csv('analyzed_aadhaar_data.csv')
    except FileNotFoundError:
        return pd.DataFrame()

    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'])

    df['district'] = df['district'].astype(str).str.strip()
    df['state'] = df['state'].astype(str).str.strip()

    state_mapping = {
        'Jammu & Kashmir': 'Jammu and Kashmir',
        'J&K': 'Jammu and Kashmir',
        'Jammu And Kashmir': 'Jammu and Kashmir',
        'Andaman & Nicobar Islands': 'Andaman and Nicobar Islands',
        'Dadra and Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu',
        'Dadra & Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu',
        'Daman and Diu': 'Dadra and Nagar Haveli and Daman and Diu',
        'Daman & Diu': 'Dadra and Nagar Haveli and Daman and Diu',
        'The Dadra And Nagar Haveli And The Daman And Diu': 'Dadra and Nagar Haveli and Daman and Diu',
        'Orissa': 'Odisha',
        'Chattisgarh': 'Chhattisgarh',
        'Telengana': 'Telangana',
        'Pondicherry': 'Puducherry'
    }
    df['state'] = df['state'].replace(state_mapping)

    unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
    coords_db = fetch_coordinates_batch(unique_locs)
    state_centers = {
        'Delhi': (28.7041, 77.1025), 'Maharashtra': (19.7515, 75.7139), 'Karnataka': (15.3173, 75.7139)
    }

    def get_lat_lon(row):
        key = (row['district'], row['state'])
        if key in coords_db:
            lat, lon = coords_db[key]
            return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
        center = state_centers.get(row['state'], (20.5937, 78.9629))
        np.random.seed(hash(key) % 2**32)
        return pd.Series({'lat': center[0] + np.random.uniform(-0.5, 0.5), 'lon': center[1] + np.random.uniform(-0.5, 0.5)})

    coords = df.apply(get_lat_lon, axis=1)
    df['lat'] = coords['lat']
    df['lon'] = coords['lon']
    df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
    return df

with st.spinner('Initializing S.A.T.A.R.K AI...'):
    df = load_data()

# 5. SIDEBAR & FILTERS
with st.sidebar:
    st.markdown("### πŸ›‘οΈ S.A.T.A.R.K AI Control")
    st.markdown("---")

    if not df.empty:
        if 'date' in df.columns:
            min_d, max_d = df['date'].min().date(), df['date'].max().date()
            dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
            if len(dr) == 2:
                df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]

        state_list = ['All'] + sorted(df['state'].unique().tolist())
        sel_state = st.selectbox("State", state_list)
        filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()

        dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
        sel_dist = st.selectbox("District", dist_list)
        if sel_dist != 'All':
            filtered_df = filtered_df[filtered_df['district'] == sel_dist]

        st.markdown("---")
        risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
        if risk_filter:
            filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
    else:
        filtered_df = pd.DataFrame()

    st.markdown("---")
    st.link_button("πŸ““ Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
    st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")

# 6. HEADER & METRICS
col1, col2 = st.columns([3, 1])
with col1:
    st.title("πŸ›‘οΈ S.A.T.A.R.K AI Dashboard")
    st.markdown("**Context-Aware Fraud Detection & Prevention System**")
with col2:
    st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)

st.markdown("---")

if not filtered_df.empty:
    m1, m2, m3, m4, m5 = st.columns(5)
    total = len(filtered_df)
    high = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
    crit = len(filtered_df[filtered_df['RISK_SCORE'] > 85])
    
    m1.metric("Total Centers", f"{total:,}", border=True)
    m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
    m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
    m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100", border=True)
    m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
else:
    st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")

st.markdown("##")

# 7. TABS
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πŸ—ΊοΈ Geographic Risk", "πŸ“‹ Priority List", "πŸ“Š Patterns", "πŸ” AI Insights"])

with tab_map:
    c_map, c_det = st.columns([3, 1])
    with c_map:
        if not filtered_df.empty:
            zoom_lvl = 10 if sel_dist != 'All' else (6 if sel_state != 'All' else 3.8)
            fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
                                    color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
                                    center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
                                    hover_name="district", hover_data={"state": True, "pincode": True},
                                    mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
            fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
            st.plotly_chart(fig, use_container_width=True)
    with c_det:
        st.subheader("πŸ”₯ Top Hotspots")
        if not filtered_df.empty:
            top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
            for i, (d, r) in enumerate(top.iterrows(), 1):
                clr = "#ef4444" if r['RISK_SCORE'] > 85 else "#f97316"
                st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)

with tab_list:
    st.subheader("🎯 Priority Investigation")
    if not filtered_df.empty:
        targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
        csv = targets.to_csv(index=False).encode('utf-8')
        st.download_button("πŸ“₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
        st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
                     column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)

with tab_charts:
    c1, c2 = st.columns(2)
    with c1:
        st.markdown("**Ghost ID Detection**")
        if not filtered_df.empty:
            fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
                             color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
            fig.add_hline(y=0.2, line_dash="dash", line_color="red")
            st.plotly_chart(fig, use_container_width=True)
    with c2:
        st.markdown("**Weekend Activity Analysis**")
        if not filtered_df.empty:
            wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
            wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
            fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
            st.plotly_chart(fig, use_container_width=True)

with tab_insights:
    st.subheader("πŸ” AI Detective Insights")
    if not filtered_df.empty:
        anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
        st.info(f"πŸ€– **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Οƒ from mean).")
        c_i1, c_i2 = st.columns(2)
        with c_i1:
            st.markdown("#### 🚨 Primary Risk Factors")
            st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
            st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
        with c_i2:
            st.markdown("#### πŸ’‘ Recommended Actions")
            st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
            st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")

st.markdown("---")
st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.A.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)