Spaces:
Sleeping
Sleeping
File size: 14,209 Bytes
db4a594 30fc09f db4a594 86265dd db4a594 6bd048f 538e361 db4a594 30fc09f db4a594 30fc09f ec41653 f5f7959 86265dd 30fc09f db4a594 30fc09f f5f7959 30fc09f f2075fc 30fc09f f5f7959 30fc09f ec41653 30fc09f 5a24c85 30fc09f f5f7959 db4a594 30fc09f db4a594 3cb671d 86265dd 30fc09f 86265dd 30fc09f 14bb62b 30fc09f f2075fc 14bb62b 30fc09f f5f7959 30fc09f f5f7959 538e361 30fc09f 538e361 30fc09f 538e361 30fc09f 538e361 30fc09f f5f7959 30fc09f 538e361 30fc09f 538e361 f2075fc 14bb62b 30fc09f 86265dd ec41653 3cb671d db4a594 14bb62b db4a594 30fc09f ec41653 30fc09f ec41653 86265dd 30fc09f 86265dd 30fc09f ec41653 db4a594 30fc09f ec41653 1b754da 30fc09f db4a594 30fc09f 14bb62b 30fc09f 14bb62b 30fc09f db4a594 14bb62b 30fc09f f5f7959 db4a594 f5f7959 30fc09f 86265dd 14bb62b 30fc09f 86265dd 30fc09f 86265dd 30fc09f db4a594 14bb62b 30fc09f ec41653 14bb62b 30fc09f 14bb62b 30fc09f ec41653 5a24c85 f5f7959 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from datetime import datetime, timedelta
# 1. PAGE CONFIGURATION
st.set_page_config(
page_title="S.T.A.R.K AI | UIDAI Fraud Detection",
page_icon="π‘οΈ",
layout="wide",
initial_sidebar_state="expanded"
)
# 2. ENHANCED PROFESSIONAL STYLING (Optimized)
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
.stApp { background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); color: #0f172a; font-family: 'Inter', sans-serif; }
/* METRIC CARDS */
div[data-testid="stMetric"] {
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px;
box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); transition: transform 0.2s;
}
div[data-testid="stMetric"]:hover { transform: translateY(-2px); box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1); }
div[data-testid="stMetricValue"] { color: #0f172a !important; font-weight: 800 !important; font-size: 2rem !important; }
div[data-testid="stMetricLabel"] { color: #64748b !important; font-weight: 600 !important; text-transform: uppercase; font-size: 0.75rem; letter-spacing: 0.05em; }
/* DATAFRAME */
div[data-testid="stDataFrame"] { border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
div[data-testid="stDataFrame"] div[role="columnheader"] {
background: linear-gradient(to bottom, #f8fafc, #f1f5f9) !important;
color: #0f172a !important; font-weight: 700 !important; border-bottom: 2px solid #cbd5e1 !important;
}
/* SIDEBAR */
[data-testid="stSidebar"] { background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%); border-right: 1px solid #334155; }
[data-testid="stSidebar"] * { color: #f8fafc !important; }
[data-testid="stSidebar"] .stSelectbox label { color: #cbd5e1 !important; }
/* UI ELEMENTS */
h1 { background: linear-gradient(135deg, #0f172a 0%, #334155 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-weight: 800 !important; }
.status-badge { display: inline-flex; align-items: center; padding: 6px 14px; border-radius: 9999px; font-size: 12px; font-weight: 700; text-transform: uppercase; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
.bg-red { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); color: #991b1b; }
.bg-green { background: linear-gradient(135deg, #dcfce7 0%, #bbf7d0 100%); color: #166534; }
.bg-amber { background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); color: #92400e; }
/* TABS & BUTTONS */
.stTabs [data-baseweb="tab-list"] { gap: 8px; }
.stTabs [aria-selected="true"] { background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%); color: white !important; }
.stButton button { border-radius: 8px; font-weight: 600; }
/* HOTSPOTS */
.hotspot-card { background: white; padding: 16px; border-radius: 10px; border-left: 5px solid; margin-bottom: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); transition: all 0.2s; }
.hotspot-card:hover { transform: translateX(4px); box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
.js-plotly-plot .plotly .main-svg { background-color: rgba(0,0,0,0) !important; }
</style>
""", unsafe_allow_html=True)
# 3. ENHANCED DATA LOADING
@st.cache_data(ttl=300)
def load_data():
# Strictly load data from CSV
df = pd.read_csv('analyzed_aadhaar_data.csv')
# Removed st.toast from inside cached function to prevent CacheReplayClosureError
if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])
# Precise Geometric Centers
state_centers = {
'Andaman and Nicobar Islands': (11.7401, 92.6586), 'Andhra Pradesh': (15.9129, 79.7400),
'Arunachal Pradesh': (28.2180, 94.7278), 'Assam': (26.2006, 92.9376), 'Bihar': (25.0961, 85.3131),
'Chandigarh': (30.7333, 76.7794), 'Chhattisgarh': (21.2787, 81.8661), 'Delhi': (28.7041, 77.1025),
'Goa': (15.2993, 74.1240), 'Gujarat': (22.2587, 71.1924), 'Haryana': (29.0588, 76.0856),
'Himachal Pradesh': (31.9579, 77.1095), 'Jammu and Kashmir': (33.7782, 76.5762), 'Jharkhand': (23.6102, 85.2799),
'Karnataka': (15.3173, 75.7139), 'Kerala': (10.8505, 76.2711), 'Ladakh': (34.1526, 77.5770),
'Madhya Pradesh': (22.9734, 78.6569), 'Maharashtra': (19.7515, 75.7139), 'Manipur': (24.6637, 93.9063),
'Meghalaya': (25.4670, 91.3662), 'Mizoram': (23.1645, 92.9376), 'Nagaland': (26.1584, 94.5624),
'Odisha': (20.9517, 85.0985), 'Puducherry': (11.9416, 79.8083), 'Punjab': (31.1471, 75.3412),
'Rajasthan': (27.0238, 74.2179), 'Sikkim': (27.5330, 88.5122), 'Tamil Nadu': (11.1271, 78.6569),
'Telangana': (18.1124, 79.0193), 'Tripura': (23.9408, 91.9882), 'Uttar Pradesh': (26.8467, 80.9462),
'Uttarakhand': (30.0668, 79.0193), 'West Bengal': (22.9868, 87.8550)
}
# EXPANDED Aspect Ratio Definitions (Lat spread, Lon spread)
state_spreads = {
'Kerala': (1.2, 0.25), 'West Bengal': (1.4, 0.4), 'Assam': (0.4, 1.8),
'Maharashtra': (1.8, 2.2), 'Uttar Pradesh': (1.2, 2.5), 'Bihar': (0.8, 1.5),
'Delhi': (0.1, 0.12), 'Goa': (0.15, 0.15), 'Chandigarh': (0.04, 0.04),
'Gujarat': (1.5, 1.8), 'Rajasthan': (2.0, 2.0), 'Madhya Pradesh': (1.8, 2.5),
'Himachal Pradesh': (0.6, 0.8), 'Punjab': (0.8, 0.9), 'Haryana': (0.9, 0.8),
'Tamil Nadu': (1.2, 1.0), 'Karnataka': (1.5, 1.2), 'Telangana': (1.0, 1.0),
'Andhra Pradesh': (1.5, 1.5), 'Odisha': (1.2, 1.2), 'Chhattisgarh': (1.5, 0.9),
'Jharkhand': (0.8, 1.0), 'Jammu and Kashmir': (1.0, 1.5), 'Ladakh': (1.0, 1.5),
'Uttarakhand': (0.7, 0.8)
}
def get_coords(row):
state = row.get('state', 'Delhi')
district = str(row.get('district', 'Unknown')).lower()
base_lat, base_lon = state_centers.get(state, (20.5937, 78.9629))
# Safer Default if state not found
lat_scale, lon_scale = state_spreads.get(state, (0.7, 0.7))
lat_bias, lon_bias = 0, 0
bias = 0.6
if 'north' in district: lat_bias += lat_scale * bias
if 'south' in district: lat_bias -= lat_scale * bias
if 'east' in district: lon_bias += lon_scale * bias
if 'west' in district: lon_bias -= lon_scale * bias
np.random.seed(hash(state + district) % 2**32)
rf = 0.5 if (lat_bias or lon_bias) else 1.0
return pd.Series({
'lat': base_lat + lat_bias + np.random.uniform(-lat_scale*rf, lat_scale*rf) + np.random.normal(0, 0.04),
'lon': base_lon + lon_bias + np.random.uniform(-lon_scale*rf, lon_scale*rf) + np.random.normal(0, 0.04)
})
coords = df.apply(get_coords, axis=1)
df['lat'], df['lon'] = coords['lat'], coords['lon']
df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
return df
with st.spinner('Loading S.T.A.R.K AI System...'):
df = load_data()
# Toast moved outside cached function
# st.toast("β
Data loaded successfully", icon="β
")
# 4. SIDEBAR & FILTERS
with st.sidebar:
st.markdown("### π‘οΈ S.T.A.R.K AI Control")
st.markdown("---")
if 'date' in df.columns:
min_d, max_d = df['date'].min().date(), df['date'].max().date()
dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
if len(dr) == 2: df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]
state_list = ['All'] + sorted(df['state'].unique().tolist())
sel_state = st.selectbox("State", state_list)
filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()
dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
sel_dist = st.selectbox("District", dist_list)
if sel_dist != 'All': filtered_df = filtered_df[filtered_df['district'] == sel_dist]
st.markdown("---")
risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
if risk_filter: filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
st.markdown("---")
st.link_button("π Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
st.info(f"**User:** UIDAI_Officer\n\n**Team:** UIDAI_4571\n\n**Update:** {datetime.now().strftime('%H:%M:%S')}")
# 5. HEADER & METRICS
col1, col2 = st.columns([3, 1])
with col1:
st.title("π‘οΈ S.T.A.R.K AI Dashboard")
st.markdown("**Context-Aware Fraud Detection & Prevention System**")
with col2:
st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">β System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)
st.markdown("---")
m1, m2, m3, m4, m5 = st.columns(5)
total, high, crit = len(filtered_df), len(filtered_df[filtered_df['RISK_SCORE'] > 75]), len(filtered_df[filtered_df['RISK_SCORE'] > 85])
m1.metric("Total Centers", f"{total:,}", border=True)
m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100" if not filtered_df.empty else "0", border=True)
m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
st.markdown("##")
# 6. TABS
tab_map, tab_list, tab_charts, tab_insights = st.tabs(["πΊοΈ Geographic Risk", "π Priority List", "π Patterns", "π AI Insights"])
with tab_map:
c_map, c_det = st.columns([3, 1])
with c_map:
if not filtered_df.empty:
fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=4.8 if sel_state != 'All' else 3.8,
center={"lat": 22.0, "lon": 80.0}, hover_name="district", mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
st.plotly_chart(fig, use_container_width=True)
else: st.warning("No data found.")
with c_det:
st.subheader("π₯ Top Hotspots")
if not filtered_df.empty:
top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
for i, (d, r) in enumerate(top.iterrows(), 1):
clr, bdg = ("#ef4444", "CRITICAL") if r['RISK_SCORE'] > 85 else ("#f97316", "HIGH")
st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> | Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)
with tab_list:
st.subheader("π― Priority Investigation")
targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
csv = targets.to_csv(index=False).encode('utf-8')
st.download_button("π₯ Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)
with tab_charts:
c1, c2 = st.columns(2)
with c1:
st.markdown("**Ghost ID Detection**")
fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
fig.add_hline(y=0.2, line_dash="dash", line_color="red")
st.plotly_chart(fig, use_container_width=True)
with c2:
st.markdown("**Weekend Activity Analysis**")
wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
st.plotly_chart(fig, use_container_width=True)
with tab_insights:
st.subheader("π AI Detective Insights")
if not filtered_df.empty:
anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
st.info(f"π€ **AI Analysis:** Detected {len(anom)} centers with statistically significant enrollment deviations (> 2Ο from mean).")
c_i1, c_i2 = st.columns(2)
with c_i1:
st.markdown("#### π¨ Primary Risk Factors")
st.markdown("- **High Volume on Weekends:** 28% correlation with fraud")
st.markdown("- **Adult Enrollment Spikes:** 45% correlation with ghost IDs")
with c_i2:
st.markdown("#### π‘ Recommended Actions")
st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")
st.markdown("---")
st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.T.A.R.K AI</b> | UIDAI Hackathon 2026</div>""", unsafe_allow_html=True) |