Spaces:

LovnishVerma
/

UIDAI

Sleeping

App Files Files Community

UIDAI / app.py

LovnishVerma

Update app.py

6d804cc verified about 1 month ago

raw

history blame contribute delete

16.6 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import numpy as np
	import requests
	import time
	import json
	import os
	from datetime import datetime, timedelta

	# 1. PAGE CONFIGURATION
	st.set_page_config(
	page_title="S.A.T.A.R.K AI \| UIDAI Fraud Detection",
	page_icon="🛡️",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# 2. ROBUST CSS STYLING (Dark Mode Proof)
	st.markdown("""
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');

	/* --- 1. MAIN CONTENT AREA (Light Theme Enforced) --- */
	/* Target only the main content, NOT the sidebar */
	.stApp > header { background-color: transparent !important; }

	div[data-testid="stAppViewContainer"] {
	background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
	}

	/* Force Dark Text in Main Area */
	section[data-testid="stMain"] * {
	color: #0f172a; /* Dark Blue Text */
	}

	/* Metric Cards in Main Area */
	div[data-testid="stMetric"] {
	background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
	border: 1px solid #e2e8f0;
	border-radius: 12px;
	box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);
	}
	div[data-testid="stMetricValue"] { color: #0f172a !important; }
	div[data-testid="stMetricLabel"] { color: #475569 !important; }

	/* --- 2. SIDEBAR (Dark Theme Enforced) --- */
	section[data-testid="stSidebar"] {
	background: linear-gradient(180deg, #1e293b 0%, #0f172a 100%);
	border-right: 1px solid #334155;
	}

	/* NUCLEAR OPTION: Force ALL text in sidebar to be White */
	section[data-testid="stSidebar"] * {
	color: #f8fafc !important; /* White Text */
	}

	/* EXCEPTION: Inputs inside Sidebar (Selectbox, DateInput) */
	/* These usually have white backgrounds, so we need Dark Text inside them */
	section[data-testid="stSidebar"] input,
	section[data-testid="stSidebar"] textarea,
	section[data-testid="stSidebar"] div[data-baseweb="select"] div {
	color: #0f172a !important; /* Dark Text for Inputs */
	-webkit-text-fill-color: #0f172a !important;
	}

	/* Specific fix for the 'Selected Option' in dropdowns */
	div[role="listbox"] div {
	color: #0f172a !important;
	}

	/* --- 3. COMMON ELEMENTS --- */
	/* DataFrame Headers */
	div[data-testid="stDataFrame"] div[role="columnheader"] {
	background-color: #f1f5f9;
	color: #0f172a !important;
	}

	/* Link Button Style */
	section[data-testid="stSidebar"] a {
	background-color: #3b82f6 !important;
	color: white !important;
	text-decoration: none;
	padding: 8px 16px;
	border-radius: 8px;
	display: block;
	text-align: center;
	border: 1px solid #2563eb;
	}

	/* Hotspot Cards */
	.hotspot-card {
	background: white;
	padding: 16px;
	border-radius: 10px;
	border-left: 5px solid;
	margin-bottom: 12px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.05);
	}
	/* Since Hotspot Cards are in Main Area, text inherits Dark, which is good. */

	/* Status Badges */
	.status-badge {
	display: inline-flex; align-items: center;
	padding: 6px 14px; border-radius: 9999px;
	font-size: 12px; font-weight: 700;
	text-transform: uppercase;
	}
	.bg-green { background: #dcfce7; color: #166534 !important; }
	</style>
	""", unsafe_allow_html=True)

	# 3. DYNAMIC GEOCODING ENGINE WITH PERSISTENT JSON
	@st.cache_data(show_spinner=False)
	def fetch_coordinates_batch(unique_locations):
	json_file = 'district_coords.json'
	coords_map = {}

	if os.path.exists(json_file):
	try:
	with open(json_file, 'r') as f:
	loaded_data = json.load(f)
	for k, v in loaded_data.items():
	if "\|" in k:
	d, s = k.split("\|")
	coords_map[(d, s)] = tuple(v)
	except json.JSONDecodeError:
	pass

	prefills = {
	('Gautam Buddha Nagar', 'Uttar Pradesh'): (28.39, 77.65),
	('West Jaintia Hills', 'Meghalaya'): (25.55, 92.38),
	('West Khasi Hills', 'Meghalaya'): (25.56, 91.29),
	('Bijapur', 'Chhattisgarh'): (18.80, 80.82),
	('Dhule', 'Maharashtra'): (20.90, 74.77),
	('Dhamtari', 'Chhattisgarh'): (20.71, 81.55),
	('Udupi', 'Karnataka'): (13.34, 74.75),
	('Supaul', 'Bihar'): (26.29, 86.82),
	('Puruliya', 'West Bengal'): (23.25, 86.50),
	('Mumbai', 'Maharashtra'): (19.0760, 72.8777),
	('Pune', 'Maharashtra'): (18.5204, 73.8567),
	('Bangalore', 'Karnataka'): (12.9716, 77.5946),
	('Bengaluru', 'Karnataka'): (12.9716, 77.5946),
	('Chennai', 'Tamil Nadu'): (13.0827, 80.2707),
	('Hyderabad', 'Telangana'): (17.3850, 78.4867),
	('Kolkata', 'West Bengal'): (22.5726, 88.3639),
	('Delhi', 'Delhi'): (28.7041, 77.1025),
	('Shimla', 'Himachal Pradesh'): (31.1048, 77.1734)
	}
	for k, v in prefills.items():
	if k not in coords_map:
	coords_map[k] = v

	missing_locs = [loc for loc in unique_locations if loc not in coords_map]
	if not missing_locs:
	return coords_map

	progress_text = "📡 New locations found. Fetching coordinates..."
	my_bar = st.progress(0, text=progress_text)
	headers = {'User-Agent': 'StarkDashboard/1.0 (Government Research Project)'}
	updated = False

	for i, (district, state) in enumerate(missing_locs):
	try:
	my_bar.progress((i + 1) / len(missing_locs), text=f"📍 Locating: {district}, {state}")
	query = f"{district}, {state}, India"
	url = "https://nominatim.openstreetmap.org/search"
	params = {'q': query, 'format': 'json', 'limit': 1}
	response = requests.get(url, params=params, headers=headers, timeout=5)

	if response.status_code == 200 and response.json():
	data = response.json()[0]
	coords_map[(district, state)] = (float(data['lat']), float(data['lon']))
	updated = True
	time.sleep(1.1)
	except Exception:
	continue

	my_bar.empty()

	if updated:
	save_data = {f"{k[0]}\|{k[1]}": v for k, v in coords_map.items()}
	with open(json_file, 'w') as f:
	json.dump(save_data, f)

	return coords_map

	# 4. MAIN DATA LOADER
	@st.cache_data(ttl=300)
	def load_data():
	try:
	df = pd.read_csv('analyzed_aadhaar_data.csv')
	except FileNotFoundError:
	return pd.DataFrame()

	if 'date' in df.columns:
	df['date'] = pd.to_datetime(df['date'])

	df['district'] = df['district'].astype(str).str.strip()
	df['state'] = df['state'].astype(str).str.strip()

	state_mapping = {
	'Jammu & Kashmir': 'Jammu and Kashmir',
	'J&K': 'Jammu and Kashmir',
	'Jammu And Kashmir': 'Jammu and Kashmir',
	'Andaman & Nicobar Islands': 'Andaman and Nicobar Islands',
	'Dadra and Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu',
	'Dadra & Nagar Haveli': 'Dadra and Nagar Haveli and Daman and Diu',
	'Daman and Diu': 'Dadra and Nagar Haveli and Daman and Diu',
	'Daman & Diu': 'Dadra and Nagar Haveli and Daman and Diu',
	'The Dadra And Nagar Haveli And The Daman And Diu': 'Dadra and Nagar Haveli and Daman and Diu',
	'Orissa': 'Odisha',
	'Chattisgarh': 'Chhattisgarh',
	'Telengana': 'Telangana',
	'Pondicherry': 'Puducherry'
	}
	df['state'] = df['state'].replace(state_mapping)

	unique_locs = list(df[['district', 'state']].drop_duplicates().itertuples(index=False, name=None))
	coords_db = fetch_coordinates_batch(unique_locs)
	state_centers = {
	'Delhi': (28.7041, 77.1025), 'Maharashtra': (19.7515, 75.7139), 'Karnataka': (15.3173, 75.7139)
	}

	def get_lat_lon(row):
	key = (row['district'], row['state'])
	if key in coords_db:
	lat, lon = coords_db[key]
	return pd.Series({'lat': lat + np.random.normal(0, 0.002), 'lon': lon + np.random.normal(0, 0.002)})
	center = state_centers.get(row['state'], (20.5937, 78.9629))
	np.random.seed(hash(key) % 2**32)
	return pd.Series({'lat': center[0] + np.random.uniform(-0.5, 0.5), 'lon': center[1] + np.random.uniform(-0.5, 0.5)})

	coords = df.apply(get_lat_lon, axis=1)
	df['lat'] = coords['lat']
	df['lon'] = coords['lon']
	df['risk_category'] = pd.cut(df['RISK_SCORE'], bins=[-1, 50, 75, 85, 100], labels=['Low', 'Medium', 'High', 'Critical'])
	return df

	with st.spinner('Initializing S.A.T.A.R.K AI...'):
	df = load_data()

	# 5. SIDEBAR & FILTERS
	with st.sidebar:
	st.markdown("### 🛡️ S.A.T.A.R.K AI Control")
	st.markdown("---")

	if not df.empty:
	if 'date' in df.columns:
	min_d, max_d = df['date'].min().date(), df['date'].max().date()
	dr = st.date_input("Date Range", value=(min_d, max_d), min_value=min_d, max_value=max_d)
	if len(dr) == 2:
	df = df[(df['date'].dt.date >= dr[0]) & (df['date'].dt.date <= dr[1])]

	state_list = ['All'] + sorted(df['state'].unique().tolist())
	sel_state = st.selectbox("State", state_list)
	filtered_df = df[df['state'] == sel_state] if sel_state != 'All' else df.copy()

	dist_list = ['All'] + sorted(filtered_df['district'].unique().tolist())
	sel_dist = st.selectbox("District", dist_list)
	if sel_dist != 'All':
	filtered_df = filtered_df[filtered_df['district'] == sel_dist]

	st.markdown("---")
	risk_filter = st.multiselect("Risk Level", ['Low', 'Medium', 'High', 'Critical'], default=['High', 'Critical'])
	if risk_filter:
	filtered_df = filtered_df[filtered_df['risk_category'].isin(risk_filter)]
	else:
	filtered_df = pd.DataFrame()

	st.markdown("---")
	st.link_button("📓 Open Analysis Notebook", "https://colab.research.google.com/drive/1YAQ4nfxltvG_cts3fmGc_zi2JQc4oPOT?usp=sharing", use_container_width=True)
	st.info(f"User: UIDAI_Officer\n\nTeam: UIDAI_4571\n\nUpdate: {datetime.now().strftime('%H:%M:%S')}")

	# 6. HEADER & METRICS
	col1, col2 = st.columns([3, 1])
	with col1:
	st.title("🛡️ S.A.T.A.R.K AI Dashboard")
	st.markdown("Context-Aware Fraud Detection & Prevention System")
	with col2:
	st.markdown(f"""<div style="text-align: right; padding-top: 20px;"><span class="status-badge bg-green">● System Online</span><div style="font-size: 12px; color: #64748b; margin-top: 8px;">{datetime.now().strftime('%d %b %Y')}</div></div>""", unsafe_allow_html=True)

	st.markdown("---")

	if not filtered_df.empty:
	m1, m2, m3, m4, m5 = st.columns(5)
	total = len(filtered_df)
	high = len(filtered_df[filtered_df['RISK_SCORE'] > 75])
	crit = len(filtered_df[filtered_df['RISK_SCORE'] > 85])

	m1.metric("Total Centers", f"{total:,}", border=True)
	m2.metric("High Risk", f"{high}", delta="Review", delta_color="inverse", border=True)
	m3.metric("Critical", f"{crit}", delta="Urgent", delta_color="inverse", border=True)
	m4.metric("Avg Risk", f"{filtered_df['RISK_SCORE'].mean():.1f}/100", border=True)
	m5.metric("Weekend Spikes", f"{len(filtered_df[(filtered_df['is_weekend'] == 1) & (filtered_df['RISK_SCORE'] > 70)])}", delta="Suspicious", delta_color="off", border=True)
	else:
	st.error("❌ Critical Error: 'analyzed_aadhaar_data.csv' not found. Please upload the data file.")

	st.markdown("##")

	# 7. TABS
	tab_map, tab_list, tab_charts, tab_insights = st.tabs(["🗺️ Geographic Risk", "📋 Priority List", "📊 Patterns", "🔍 AI Insights"])

	with tab_map:
	c_map, c_det = st.columns([3, 1])
	with c_map:
	if not filtered_df.empty:
	zoom_lvl = 10 if sel_dist != 'All' else (6 if sel_state != 'All' else 3.8)
	fig = px.scatter_mapbox(filtered_df, lat="lat", lon="lon", color="RISK_SCORE", size="total_activity",
	color_continuous_scale=["#22c55e", "#fbbf24", "#f97316", "#ef4444"], size_max=25, zoom=zoom_lvl,
	center=None if sel_state == 'All' else {"lat": filtered_df['lat'].mean(), "lon": filtered_df['lon'].mean()},
	hover_name="district", hover_data={"state": True, "pincode": True},
	mapbox_style="carto-positron", height=650, title="<b>Live Fraud Risk Heatmap</b>")
	fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0})
	st.plotly_chart(fig, use_container_width=True)
	with c_det:
	st.subheader("🔥 Top Hotspots")
	if not filtered_df.empty:
	top = filtered_df.groupby('district').agg({'RISK_SCORE': 'mean', 'total_activity': 'sum'}).sort_values('RISK_SCORE', ascending=False).head(5)
	for i, (d, r) in enumerate(top.iterrows(), 1):
	clr = "#ef4444" if r['RISK_SCORE'] > 85 else "#f97316"
	st.markdown(f"""<div class="hotspot-card" style="border-left-color: {clr};"><b>#{i} {d}</b><br><span style="font-size:12px;color:#64748b">Risk: <b style="color:{clr}">{r['RISK_SCORE']:.1f}</b> \| Act: {int(r['total_activity'])}</span></div>""", unsafe_allow_html=True)

	with tab_list:
	st.subheader("🎯 Priority Investigation")
	if not filtered_df.empty:
	targets = filtered_df[filtered_df['RISK_SCORE'] > 75].sort_values('RISK_SCORE', ascending=False)
	csv = targets.to_csv(index=False).encode('utf-8')
	st.download_button("📥 Export CSV", data=csv, file_name="stark_priority.csv", mime="text/csv", type="primary")
	st.dataframe(targets[['date', 'state', 'district', 'pincode', 'enrol_adult', 'total_activity', 'RISK_SCORE']],
	column_config={"RISK_SCORE": st.column_config.ProgressColumn("Risk", format="%.1f%%", min_value=0, max_value=100)}, use_container_width=True, hide_index=True)

	with tab_charts:
	c1, c2 = st.columns(2)
	with c1:
	st.markdown("Ghost ID Detection")
	if not filtered_df.empty:
	fig = px.scatter(filtered_df, x="total_activity", y="ratio_deviation", color="risk_category", size="RISK_SCORE",
	color_discrete_map={'Critical': '#ef4444', 'High': '#f97316', 'Medium': '#eab308', 'Low': '#22c55e'}, height=350)
	fig.add_hline(y=0.2, line_dash="dash", line_color="red")
	st.plotly_chart(fig, use_container_width=True)
	with c2:
	st.markdown("Weekend Activity Analysis")
	if not filtered_df.empty:
	wk_counts = filtered_df.groupby('is_weekend')['total_activity'].sum().reset_index()
	wk_counts['Type'] = wk_counts['is_weekend'].map({0: 'Weekday', 1: 'Weekend'})
	fig = px.bar(wk_counts, x='Type', y='total_activity', color='Type', color_discrete_map={'Weekday': '#3b82f6', 'Weekend': '#ef4444'}, height=350)
	st.plotly_chart(fig, use_container_width=True)

	with tab_insights:
	st.subheader("🔍 AI Detective Insights")
	if not filtered_df.empty:
	anom = filtered_df[filtered_df['ratio_deviation'] > 0.4]
	st.info(f"🤖 AI Analysis: Detected {len(anom)} centers with statistically significant enrollment deviations (> 2σ from mean).")
	c_i1, c_i2 = st.columns(2)
	with c_i1:
	st.markdown("#### 🚨 Primary Risk Factors")
	st.markdown("- High Volume on Weekends: 28% correlation with fraud")
	st.markdown("- Adult Enrollment Spikes: 45% correlation with ghost IDs")
	with c_i2:
	st.markdown("#### 💡 Recommended Actions")
	st.markdown(f"1. Immediate audit of {len(filtered_df[filtered_df['RISK_SCORE']>90])} centers with >90 Risk Score")
	st.markdown("2. Deploy biometric re-verification for 'Rural A' cluster")

	st.markdown("---")
	st.markdown("""<div style="text-align: center; font-size: 13px; color: #94a3b8;"><b>Project S.A.T.A.R.K AI</b> \| UIDAI Hackathon 2026</div>""", unsafe_allow_html=True)