Spaces:

rockerritesh
/

electionnepal

Paused

App Files Files Community

electionnepal / app.py

rockerritesh

feat: add constituency winning threshold analysis page

fa2b444 unverified about 2 months ago

raw

history blame contribute delete

180 kB

	"""
	Nepal Election Analysis - GIS Visualization & Comparative Voter Study
	=====================================================================
	A comprehensive application for analyzing voter data changes between two election periods
	with interactive GIS maps and statistical visualizations.

	Author: Data Analysis Project
	Date: January 2026
	"""

	import streamlit as st
	import pandas as pd
	import numpy as np
	import geopandas as gpd
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import sqlite3
	import json
	import folium
	from streamlit_folium import st_folium
	import os
	from io import BytesIO
	from datetime import datetime

	# Page configuration
	st.set_page_config(
	page_title="Nepal Election Analysis",
	page_icon="🗳️",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	color: #1E3A8A;
	text-align: center;
	margin-bottom: 1rem;
	}
	.sub-header {
	font-size: 1.5rem;
	color: #374151;
	text-align: center;
	margin-bottom: 2rem;
	}
	.metric-card {
	background-color: #F3F4F6;
	padding: 1rem;
	border-radius: 0.5rem;
	text-align: center;
	}
	.positive-change {
	color: #059669;
	font-weight: bold;
	}
	.negative-change {
	color: #DC2626;
	font-weight: bold;
	}
	.stTabs [data-baseweb="tab-list"] {
	gap: 24px;
	}
	.stTabs [data-baseweb="tab"] {
	height: 50px;
	padding-left: 20px;
	padding-right: 20px;
	}
	</style>
	""", unsafe_allow_html=True)


	# ── Login & User Tracking ────────────────────────────────────────────────────

	LOGIN_TRACKING_FILE = "data/login_tracking.json"


	def init_login_tracking_file():
	"""Ensure the login tracking JSON file exists."""
	os.makedirs(os.path.dirname(LOGIN_TRACKING_FILE), exist_ok=True)
	if not os.path.exists(LOGIN_TRACKING_FILE):
	with open(LOGIN_TRACKING_FILE, "w") as f:
	json.dump([], f)


	def record_login(name: str, location: str):
	"""Append a login record to the tracking file."""
	init_login_tracking_file()
	with open(LOGIN_TRACKING_FILE, "r") as f:
	records = json.load(f)
	records.append({
	"name": name,
	"location": location,
	"login_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	})
	with open(LOGIN_TRACKING_FILE, "w") as f:
	json.dump(records, f, indent=2)


	def load_login_records():
	"""Read all login records."""
	init_login_tracking_file()
	with open(LOGIN_TRACKING_FILE, "r") as f:
	return json.load(f)


	def show_login_page():
	"""Render the login form and validate credentials."""
	st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-header">Please log in to continue</p>', unsafe_allow_html=True)

	with st.form("login_form"):
	username = st.text_input("Username")
	password = st.text_input("Password", type="password")
	submitted = st.form_submit_button("Login")

	if submitted:
	try:
	admin_pw = st.secrets["ADMIN_PASSWORD"]
	except Exception:
	admin_pw = os.environ.get("ADMIN_PASSWORD", "")
	try:
	user_pw = st.secrets["USER_PASSWORD"]
	except Exception:
	user_pw = os.environ.get("USER_PASSWORD", "")

	if username == "limsim" and password == admin_pw:
	st.session_state["authenticated"] = True
	st.session_state["username"] = "limsim"
	st.session_state["is_admin"] = True
	st.session_state["needs_user_info"] = False
	st.session_state["user_display_name"] = "Admin (limsim)"
	st.session_state["user_location"] = ""
	st.session_state["login_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	record_login("Admin (limsim)", "—")
	st.rerun()
	elif username == "user_nepal" and password == user_pw:
	st.session_state["authenticated"] = True
	st.session_state["username"] = "user_nepal"
	st.session_state["is_admin"] = False
	st.session_state["needs_user_info"] = True
	st.rerun()
	else:
	st.error("Invalid username or password.")


	def show_user_info_page():
	"""Collect name and location from user_nepal before granting full access."""
	st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-header">Tell us about yourself</p>', unsafe_allow_html=True)

	with st.form("user_info_form"):
	name = st.text_input("Your Name")
	location = st.text_input("Your Location")
	submitted = st.form_submit_button("Continue")

	if submitted:
	if not name.strip() or not location.strip():
	st.warning("Please fill in both fields.")
	else:
	st.session_state["needs_user_info"] = False
	st.session_state["user_display_name"] = name.strip()
	st.session_state["user_location"] = location.strip()
	st.session_state["login_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	record_login(name.strip(), location.strip())
	st.rerun()


	def show_admin_dashboard():
	"""Admin-only dashboard showing login metrics and records."""
	st.header("🔐 Admin: Login Records")

	records = load_login_records()
	if not records:
	st.info("No login records yet.")
	return

	df = pd.DataFrame(records)

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Total Logins", len(df))
	with col2:
	st.metric("Unique Users", df["name"].nunique())

	st.subheader("Login History")
	st.dataframe(
	df.sort_values("login_time", ascending=False).reset_index(drop=True),
	use_container_width=True,
	)

	csv_data = df.to_csv(index=False)
	st.download_button(
	label="📥 Download CSV",
	data=csv_data,
	file_name="login_records.csv",
	mime="text/csv",
	)


	def app_entry():
	"""Gate: unauthenticated → login, needs info → info page, else → main()."""
	if not st.session_state.get("authenticated", False):
	show_login_page()
	return
	if st.session_state.get("needs_user_info", False):
	show_user_info_page()
	return
	main()


	@st.cache_data
	def load_shapefiles():
	"""Load all Nepal shapefiles for provinces, districts, and local levels."""
	base_path = "data/gis/raw/NPL_new_wgs"

	# Load country boundary
	country_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_0.shp")

	# Load provinces (7 provinces)
	provinces_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_1.shp")

	# Load districts (77 districts)
	districts_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_2.shp")

	# Load local levels (municipalities)
	local_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_3.shp")

	return country_gdf, provinces_gdf, districts_gdf, local_gdf


	@st.cache_data
	def load_json_data():
	"""Load administrative data from JSON files."""
	base_path = "data/admin/web_data"

	with open(f"{base_path}/provinces.json", "r", encoding="utf-8") as f:
	provinces_data = json.load(f)

	with open(f"{base_path}/districts.json", "r", encoding="utf-8") as f:
	districts_data = json.load(f)

	with open(f"{base_path}/municipalities.json", "r", encoding="utf-8") as f:
	municipalities_data = json.load(f)

	return provinces_data, districts_data, municipalities_data


	@st.cache_data
	def load_cached_stats():
	"""Load pre-computed statistics from JSON files for instant loading."""

	with open("data/cached_stats/old_stats.json", "r", encoding="utf-8") as f:
	old_stats = json.load(f)

	with open("data/cached_stats/new_stats.json", "r", encoding="utf-8") as f:
	new_stats = json.load(f)

	# Convert to DataFrames
	old_province = pd.DataFrame(old_stats['provinces'])
	old_district = pd.DataFrame(old_stats['districts'])
	old_municipality = pd.DataFrame(old_stats['municipalities'])
	old_overall = pd.DataFrame([old_stats['overall']])
	old_age = pd.DataFrame(old_stats['age_distribution'])

	new_province = pd.DataFrame(new_stats['provinces'])
	new_district = pd.DataFrame(new_stats['districts'])
	new_municipality = pd.DataFrame(new_stats['municipalities'])
	new_overall = pd.DataFrame([new_stats['overall']])
	new_age = pd.DataFrame(new_stats['age_distribution'])

	return (old_province, old_district, old_municipality, old_overall, old_age,
	new_province, new_district, new_municipality, new_overall, new_age)


	@st.cache_data
	def get_database_stats_fast(db_path, provinces_json, districts_json, municipalities_json):
	"""Get statistics using pre-aggregated JSON data for speed."""

	# Convert JSON data to DataFrames (already aggregated - no need for heavy DB queries)
	province_stats = pd.DataFrame([
	{
	'province_id': int(k),
	'province_name': v['name'],
	'total_voters': v['total_voters'],
	'male_voters': v['male'],
	'female_voters': v['female'],
	'avg_age': v['avg_age'],
	'districts_count': v['districts_count'],
	'municipalities_count': v['municipalities_count']
	}
	for k, v in provinces_json.items()
	])

	district_stats = pd.DataFrame([
	{
	'district_id': int(k),
	'district_name': v['name'],
	'province_id': v['province_id'],
	'total_voters': v['total_voters'],
	'male_voters': v['male'],
	'female_voters': v['female'],
	'avg_age': v['avg_age'],
	'municipalities_count': v['municipalities_count']
	}
	for k, v in districts_json.items()
	])

	# Add province names to districts
	prov_name_map = {int(k): v['name'] for k, v in provinces_json.items()}
	district_stats['province_name'] = district_stats['province_id'].map(prov_name_map)

	municipality_stats = pd.DataFrame([
	{
	'municipality_id': int(k),
	'municipality_name': v['name'],
	'district_id': v['district_id'],
	'total_voters': v['total_voters'],
	'male_voters': v['male'],
	'female_voters': v['female'],
	'avg_age': v['avg_age']
	}
	for k, v in municipalities_json.items()
	])

	# Add district names to municipalities
	dist_name_map = {int(k): v['name'] for k, v in districts_json.items()}
	dist_prov_map = {int(k): v['province_id'] for k, v in districts_json.items()}
	municipality_stats['district_name'] = municipality_stats['district_id'].map(dist_name_map)
	municipality_stats['province_id'] = municipality_stats['district_id'].map(dist_prov_map)

	# Calculate overall stats from province data
	overall_stats = pd.DataFrame([{
	'total_voters': province_stats['total_voters'].sum(),
	'male_voters': province_stats['male_voters'].sum(),
	'female_voters': province_stats['female_voters'].sum(),
	'avg_age': province_stats['avg_age'].mean(),
	'min_age': 18,
	'max_age': 105
	}])

	# Age distribution - estimate from avg_age (or use simple query if needed)
	# Using estimated distribution based on typical voter demographics
	total = overall_stats['total_voters'].iloc[0]
	age_distribution = pd.DataFrame([
	{'age_group': '18-25', 'count': int(total * 0.15), 'gender': 'पुरुष'},
	{'age_group': '18-25', 'count': int(total * 0.14), 'gender': 'महिला'},
	{'age_group': '26-35', 'count': int(total * 0.18), 'gender': 'पुरुष'},
	{'age_group': '26-35', 'count': int(total * 0.17), 'gender': 'महिला'},
	{'age_group': '36-45', 'count': int(total * 0.14), 'gender': 'पुरुष'},
	{'age_group': '36-45', 'count': int(total * 0.13), 'gender': 'महिला'},
	{'age_group': '46-55', 'count': int(total * 0.10), 'gender': 'पुरुष'},
	{'age_group': '46-55', 'count': int(total * 0.09), 'gender': 'महिला'},
	{'age_group': '56-65', 'count': int(total * 0.06), 'gender': 'पुरुष'},
	{'age_group': '56-65', 'count': int(total * 0.05), 'gender': 'महिला'},
	{'age_group': '65+', 'count': int(total * 0.04), 'gender': 'पुरुष'},
	{'age_group': '65+', 'count': int(total * 0.03), 'gender': 'महिला'},
	])

	return province_stats, district_stats, municipality_stats, overall_stats, age_distribution


	@st.cache_data
	def get_age_distribution_from_db(db_path):
	"""Get actual age distribution from database - separate cached query."""
	conn = sqlite3.connect(db_path)

	age_dist_query = """
	SELECT
	CASE
	WHEN age BETWEEN 18 AND 25 THEN '18-25'
	WHEN age BETWEEN 26 AND 35 THEN '26-35'
	WHEN age BETWEEN 36 AND 45 THEN '36-45'
	WHEN age BETWEEN 46 AND 55 THEN '46-55'
	WHEN age BETWEEN 56 AND 65 THEN '56-65'
	WHEN age > 65 THEN '65+'
	ELSE 'Unknown'
	END as age_group,
	COUNT(*) as count,
	gender
	FROM voters
	GROUP BY age_group, gender
	ORDER BY age_group
	"""

	age_distribution = pd.read_sql_query(age_dist_query, conn)
	conn.close()

	return age_distribution


	@st.cache_data
	def get_old_database_stats_fast(db_path):
	"""Get statistics from old database using optimized queries."""
	conn = sqlite3.connect(db_path)

	# Get province-level aggregated stats with a simpler approach
	# First get voter counts per booth
	province_query = """
	SELECT
	p.id as province_id,
	p.name_np as province_name,
	COUNT(v.id) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age
	FROM voters v
	JOIN voting_booths vb ON v.booth_id = vb.id
	JOIN wards w ON vb.ward_id = w.id
	JOIN municipalities m ON w.municipality_id = m.id
	JOIN districts d ON m.district_id = d.id
	JOIN provinces p ON d.province_id = p.id
	GROUP BY p.id, p.name_np
	ORDER BY p.id
	"""

	# Use a faster approach - get total counts first
	total_query = """
	SELECT COUNT(*) as total_voters,
	SUM(CASE WHEN gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(age) as avg_age
	FROM voters
	"""
	overall_stats = pd.read_sql_query(total_query, conn)
	overall_stats['min_age'] = 18
	overall_stats['max_age'] = 105

	# Get province stats using indexed booth_id
	province_query_fast = """
	WITH booth_province AS (
	SELECT vb.id as booth_id, p.id as province_id, p.name_np as province_name
	FROM voting_booths vb
	JOIN wards w ON vb.ward_id = w.id
	JOIN municipalities m ON w.municipality_id = m.id
	JOIN districts d ON m.district_id = d.id
	JOIN provinces p ON d.province_id = p.id
	)
	SELECT
	bp.province_id,
	bp.province_name,
	COUNT(v.id) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age
	FROM voters v
	JOIN booth_province bp ON v.booth_id = bp.booth_id
	GROUP BY bp.province_id, bp.province_name
	ORDER BY bp.province_id
	"""

	province_stats = pd.read_sql_query(province_query_fast, conn)

	# Get district stats
	district_query_fast = """
	WITH booth_district AS (
	SELECT vb.id as booth_id, d.id as district_id, d.name_np as district_name,
	d.province_id, p.name_np as province_name
	FROM voting_booths vb
	JOIN wards w ON vb.ward_id = w.id
	JOIN municipalities m ON w.municipality_id = m.id
	JOIN districts d ON m.district_id = d.id
	JOIN provinces p ON d.province_id = p.id
	)
	SELECT
	bd.district_id,
	bd.district_name,
	bd.province_id,
	bd.province_name,
	COUNT(v.id) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age
	FROM voters v
	JOIN booth_district bd ON v.booth_id = bd.booth_id
	GROUP BY bd.district_id, bd.district_name, bd.province_id, bd.province_name
	ORDER BY bd.province_id, bd.district_id
	"""

	district_stats = pd.read_sql_query(district_query_fast, conn)

	# Get municipality stats
	municipality_query_fast = """
	WITH booth_muni AS (
	SELECT vb.id as booth_id, m.id as municipality_id, m.name_np as municipality_name,
	m.district_id, d.name_np as district_name, d.province_id
	FROM voting_booths vb
	JOIN wards w ON vb.ward_id = w.id
	JOIN municipalities m ON w.municipality_id = m.id
	JOIN districts d ON m.district_id = d.id
	)
	SELECT
	bm.municipality_id,
	bm.municipality_name,
	bm.district_id,
	bm.district_name,
	bm.province_id,
	COUNT(v.id) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age
	FROM voters v
	JOIN booth_muni bm ON v.booth_id = bm.booth_id
	GROUP BY bm.municipality_id, bm.municipality_name, bm.district_id, bm.district_name, bm.province_id
	ORDER BY bm.province_id, bm.district_id, bm.municipality_id
	"""

	municipality_stats = pd.read_sql_query(municipality_query_fast, conn)

	# Age distribution - estimated for speed
	total = overall_stats['total_voters'].iloc[0]
	age_distribution = pd.DataFrame([
	{'age_group': '18-25', 'count': int(total * 0.15), 'gender': 'पुरुष'},
	{'age_group': '18-25', 'count': int(total * 0.14), 'gender': 'महिला'},
	{'age_group': '26-35', 'count': int(total * 0.18), 'gender': 'पुरुष'},
	{'age_group': '26-35', 'count': int(total * 0.17), 'gender': 'महिला'},
	{'age_group': '36-45', 'count': int(total * 0.14), 'gender': 'पुरुष'},
	{'age_group': '36-45', 'count': int(total * 0.13), 'gender': 'महिला'},
	{'age_group': '46-55', 'count': int(total * 0.10), 'gender': 'पुरुष'},
	{'age_group': '46-55', 'count': int(total * 0.09), 'gender': 'महिला'},
	{'age_group': '56-65', 'count': int(total * 0.06), 'gender': 'पुरुष'},
	{'age_group': '56-65', 'count': int(total * 0.05), 'gender': 'महिला'},
	{'age_group': '65+', 'count': int(total * 0.04), 'gender': 'पुरुष'},
	{'age_group': '65+', 'count': int(total * 0.03), 'gender': 'महिला'},
	])

	conn.close()

	return province_stats, district_stats, municipality_stats, overall_stats, age_distribution


	def create_comparative_dataframe(old_stats, new_stats, key_col, name_col):
	"""Create comparative dataframe between old and new data."""
	merged = pd.merge(
	old_stats, new_stats,
	on=[key_col, name_col] if name_col else [key_col],
	suffixes=('_old', '_new'),
	how='outer'
	)

	# Calculate changes
	merged['voter_change'] = merged['total_voters_new'] - merged['total_voters_old']
	merged['voter_change_pct'] = ((merged['total_voters_new'] - merged['total_voters_old']) / merged['total_voters_old'] * 100).round(2)
	merged['male_change'] = merged['male_voters_new'] - merged['male_voters_old']
	merged['female_change'] = merged['female_voters_new'] - merged['female_voters_old']

	return merged


	def show_voter_search():
	"""Display voter search interface with hierarchical location filtering."""
	st.header("🔍 Voter List Search")
	st.markdown("""
	Search voters by location hierarchy: Province → District → Municipality → Ward → Booth
	""")

	# Database selection
	db_choice = st.radio(
	"Select Database",
	["Current Voter Data", "Old Voter Data"],
	horizontal=True
	)

	db_path = "voter_db_new.sqlite3" if db_choice == "Current Voter Data" else "voter_db_old.sqlite3"

	# Load location hierarchy data with caching
	@st.cache_data
	def load_provinces(_db_path):
	conn = sqlite3.connect(_db_path)
	df = pd.read_sql_query("SELECT id, name_np FROM provinces ORDER BY id", conn)
	conn.close()
	return df

	@st.cache_data
	def load_districts(_db_path, province_id=None):
	conn = sqlite3.connect(_db_path)
	if province_id:
	df = pd.read_sql_query(f"SELECT id, province_id, name_np FROM districts WHERE province_id = {province_id} ORDER BY name_np", conn)
	else:
	df = pd.read_sql_query("SELECT id, province_id, name_np FROM districts ORDER BY name_np", conn)
	conn.close()
	return df

	@st.cache_data
	def load_municipalities(_db_path, district_id=None):
	conn = sqlite3.connect(_db_path)
	if district_id:
	df = pd.read_sql_query(f"SELECT id, district_id, name_np FROM municipalities WHERE district_id = {district_id} ORDER BY name_np", conn)
	else:
	df = pd.read_sql_query("SELECT id, district_id, name_np FROM municipalities ORDER BY name_np", conn)
	conn.close()
	return df

	@st.cache_data
	def load_wards(_db_path, municipality_id=None):
	conn = sqlite3.connect(_db_path)
	if municipality_id:
	df = pd.read_sql_query(f"SELECT id, municipality_id, ward_number FROM wards WHERE municipality_id = {municipality_id} ORDER BY ward_number", conn)
	else:
	df = pd.read_sql_query("SELECT id, municipality_id, ward_number FROM wards ORDER BY ward_number", conn)
	conn.close()
	return df

	@st.cache_data
	def load_booths(_db_path, ward_id=None):
	conn = sqlite3.connect(_db_path)
	if ward_id:
	df = pd.read_sql_query(f"SELECT id, ward_id, name_np, google_map_link FROM voting_booths WHERE ward_id = {ward_id} ORDER BY name_np", conn)
	else:
	df = pd.read_sql_query("SELECT id, ward_id, name_np, google_map_link FROM voting_booths ORDER BY name_np", conn)
	conn.close()
	return df

	@st.cache_data
	def search_voters(_db_path, province_id, district_id, municipality_id, ward_id, booth_id, search_name, search_voter_id, gender_filter):
	"""Search voters based on filters."""
	query = """
	SELECT
	v.voter_id AS "Voter ID",
	v.name_np AS "Name",
	v.age AS "Age",
	v.gender AS "Gender",
	v.parents_name_np AS "Parents Name",
	v.partners_name_np AS "Partner Name",
	b.name_np AS "Booth",
	w.ward_number AS "Ward",
	m.name_np AS "Municipality",
	d.name_np AS "District",
	p.name_np AS "Province"
	FROM voters v
	JOIN voting_booths b ON v.booth_id = b.id
	JOIN wards w ON b.ward_id = w.id
	JOIN municipalities m ON w.municipality_id = m.id
	JOIN districts d ON m.district_id = d.id
	JOIN provinces p ON d.province_id = p.id
	WHERE 1=1
	"""

	params = []

	if province_id:
	query += " AND p.id = ?"
	params.append(province_id)

	if district_id:
	query += " AND d.id = ?"
	params.append(district_id)

	if municipality_id:
	query += " AND m.id = ?"
	params.append(municipality_id)

	if ward_id:
	query += " AND w.id = ?"
	params.append(ward_id)

	if booth_id:
	query += " AND b.id = ?"
	params.append(booth_id)

	if search_name:
	query += " AND v.name_np LIKE ?"
	params.append(f"%{search_name}%")

	if search_voter_id:
	query += " AND v.voter_id LIKE ?"
	params.append(f"%{search_voter_id}%")

	if gender_filter == "male":
	query += " AND v.gender = 'पुरुष'"
	elif gender_filter == "female":
	query += " AND v.gender = 'महिला'"

	query += " ORDER BY v.name_np"

	conn = sqlite3.connect(_db_path)
	results_df = pd.read_sql_query(query, conn, params=params)
	conn.close()
	return results_df

	# Load provinces
	provinces_df = load_provinces(db_path)

	st.markdown("---")
	st.subheader("📍 Location Filter")

	# Search Level Selection
	search_level = st.radio(
	"🎯 Search Level",
	["Province Level", "District Level", "Municipality Level", "Ward Level"],
	horizontal=True,
	help="Choose the geographic level to search voters"
	)

	# Province selector (always shown)
	province_options = ["-- Select Province --"] + [f"{row['id']}. {row['name_np']}" for _, row in provinces_df.iterrows()]
	selected_province = st.selectbox("🏔️ Province (प्रदेश)", province_options, key="voter_province")

	selected_province_id = None
	if selected_province != "-- Select Province --":
	selected_province_id = int(selected_province.split(".")[0])

	# Initialize variables
	selected_district_id = None
	selected_municipality_id = None
	selected_ward_id = None
	selected_booth_id = None
	wards_df = None

	# District selector (shown for District, Municipality, Ward levels)
	if search_level in ["District Level", "Municipality Level", "Ward Level"]:
	if selected_province_id:
	districts_df = load_districts(db_path, selected_province_id)
	district_options = ["-- Select District --"] + [f"{row['id']}. {row['name_np']}" for _, row in districts_df.iterrows()]
	else:
	districts_df = None
	district_options = ["-- Select Province First --"]

	selected_district = st.selectbox("🏛️ District (जिल्ला)", district_options, key="voter_district")

	if selected_district not in ["-- Select District --", "-- Select Province First --"]:
	selected_district_id = int(selected_district.split(".")[0])

	# Municipality selector (shown for Municipality and Ward levels)
	if search_level in ["Municipality Level", "Ward Level"]:
	if selected_district_id:
	municipalities_df = load_municipalities(db_path, selected_district_id)
	muni_options = ["-- Select Municipality --"] + [f"{row['id']}. {row['name_np']}" for _, row in municipalities_df.iterrows()]
	else:
	municipalities_df = None
	muni_options = ["-- Select District First --"]

	selected_municipality = st.selectbox("🏘️ Municipality (नगरपालिका)", muni_options, key="voter_municipality")

	if selected_municipality not in ["-- Select Municipality --", "-- Select District First --"]:
	selected_municipality_id = int(selected_municipality.split(".")[0])

	# Ward selector (shown only for Ward level)
	if search_level == "Ward Level":
	col_ward, col_booth = st.columns(2)

	with col_ward:
	if selected_municipality_id:
	wards_df = load_wards(db_path, selected_municipality_id)
	ward_options = ["-- All Wards --"] + [f"Ward {row['ward_number']}" for _, row in wards_df.iterrows()]
	else:
	wards_df = None
	ward_options = ["-- Select Municipality First --"]

	selected_ward = st.selectbox("🔢 Ward (वडा)", ward_options, key="voter_ward")

	if selected_ward not in ["-- All Wards --", "-- Select Municipality First --"]:
	ward_number = int(selected_ward.split(" ")[1])
	if wards_df is not None:
	ward_row = wards_df[wards_df['ward_number'] == ward_number]
	if not ward_row.empty:
	selected_ward_id = int(ward_row.iloc[0]['id'])

	with col_booth:
	# Booth selector (optional, depends on ward)
	if selected_ward_id:
	booths_df = load_booths(db_path, selected_ward_id)
	booth_options = ["-- All Booths --"] + [f"{row['id']}. {row['name_np']}" for _, row in booths_df.iterrows()]
	selected_booth = st.selectbox("🏫 Voting Booth (मतदान केन्द्र)", booth_options, key="voter_booth")

	if selected_booth != "-- All Booths --":
	selected_booth_id = int(selected_booth.split(".")[0])
	else:
	st.selectbox("🏫 Voting Booth (मतदान केन्द्र)", ["-- Select Ward First --"], key="voter_booth_disabled")

	st.markdown("---")

	# Search by name
	st.subheader("🔎 Search Options")
	col_search1, col_search2 = st.columns([3, 1])
	with col_search1:
	search_name = st.text_input("Enter voter name (नाम खोज्नुहोस्)", placeholder="e.g., राम बहादुर", key="search_name")
	with col_search2:
	search_voter_id = st.text_input("Voter ID", placeholder="e.g., 12345678", key="search_voter_id")

	# Gender filter
	gender_filter = st.radio("Gender Filter", ["All", "पुरुष (Male)", "महिला (Female)"], horizontal=True, key="gender_filter")

	gender_val = None
	if gender_filter == "पुरुष (Male)":
	gender_val = "male"
	elif gender_filter == "महिला (Female)":
	gender_val = "female"

	st.markdown("---")

	# Show current selection summary
	if selected_province_id:
	selection_parts = [f"Province: {selected_province.split('. ')[1] if '. ' in selected_province else selected_province}"]
	if selected_district_id:
	selection_parts.append(f"District: {selected_district.split('. ')[1] if '. ' in selected_district else selected_district}")
	if selected_municipality_id:
	selection_parts.append(f"Municipality: {selected_municipality.split('. ')[1] if '. ' in selected_municipality else selected_municipality}")
	if selected_ward_id:
	selection_parts.append(f"Ward: {selected_ward}")

	st.info(f"🔍 Search Level: {search_level} \| " + " → ".join(selection_parts))

	# Search button with validation based on search level
	col_btn1, col_btn2 = st.columns([1, 3])
	with col_btn1:
	search_clicked = st.button("🔍 Search Voters", type="primary", use_container_width=True)

	# Perform search with level-based validation
	if search_clicked:
	# Validate based on search level
	validation_error = None

	if search_level == "Province Level" and not selected_province_id:
	validation_error = "⚠️ Please select a Province to search."
	elif search_level == "District Level" and not selected_district_id:
	validation_error = "⚠️ Please select a District to search."
	elif search_level == "Municipality Level" and not selected_municipality_id:
	validation_error = "⚠️ Please select a Municipality to search."
	elif search_level == "Ward Level" and not selected_municipality_id:
	validation_error = "⚠️ Please select at least a Municipality to search at Ward level."

	if validation_error:
	st.warning(validation_error)
	else:
	# Show warning for large searches
	if search_level == "Province Level":
	st.warning("⚠️ Searching entire province may take time and return large results...")
	elif search_level == "District Level":
	st.warning("⏳ Searching entire district...")

	with st.spinner(f"Searching voters at {search_level}..."):
	try:
	results_df = search_voters(
	db_path,
	selected_province_id,
	selected_district_id,
	selected_municipality_id,
	selected_ward_id,
	selected_booth_id,
	search_name,
	search_voter_id,
	gender_val
	)

	# Store results in session state
	st.session_state['voter_search_results'] = results_df
	st.session_state['voter_search_done'] = True

	except Exception as e:
	st.error(f"Error searching: {str(e)}")
	st.exception(e)

	# Display results from session state
	if st.session_state.get('voter_search_done', False) and 'voter_search_results' in st.session_state:
	results_df = st.session_state['voter_search_results']

	if len(results_df) > 0:
	st.success(f"✅ Found {len(results_df):,} voters")

	# Statistics
	col1, col2, col3, col4 = st.columns(4)
	col1.metric("Total Found", f"{len(results_df):,}")
	col2.metric("Male (पुरुष)", f"{len(results_df[results_df['Gender'] == 'पुरुष']):,}")
	col3.metric("Female (महिला)", f"{len(results_df[results_df['Gender'] == 'महिला']):,}")
	col4.metric("Avg Age", f"{results_df['Age'].mean():.1f}")

	# Pagination for large datasets
	total_voters = len(results_df)
	rows_per_page = 100
	total_pages = (total_voters + rows_per_page - 1) // rows_per_page

	if total_pages > 1:
	st.markdown("---")
	col_page1, col_page2, col_page3 = st.columns([1, 2, 1])
	with col_page2:
	current_page = st.number_input(
	f"Page (1 to {total_pages})",
	min_value=1,
	max_value=total_pages,
	value=1,
	key="voter_page"
	)

	start_idx = (current_page - 1) * rows_per_page
	end_idx = min(start_idx + rows_per_page, total_voters)

	st.info(f"📄 Showing voters {start_idx + 1:,} to {end_idx:,} of {total_voters:,} (Page {current_page} of {total_pages})")

	# Display paginated results
	st.dataframe(
	results_df.iloc[start_idx:end_idx],
	use_container_width=True,
	hide_index=True,
	height=500
	)
	else:
	# Display all results if small dataset
	st.dataframe(
	results_df,
	use_container_width=True,
	hide_index=True,
	height=500
	)

	# Export options
	st.markdown("### 📥 Export Options")

	export_format = st.radio(
	"Select export format:",
	options=["CSV", "XLSX", "PDF"],
	horizontal=True,
	key="export_format"
	)

	# Download button based on selected format
	if export_format == "CSV":
	csv = results_df.to_csv(index=False)
	st.download_button(
	label=f"📥 Download All {len(results_df):,} Voters (CSV)",
	data=csv,
	file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	mime="text/csv"
	)
	elif export_format == "XLSX":
	# Excel Export
	from io import BytesIO

	# Create Excel file in memory
	output = BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	results_df.to_excel(writer, index=False, sheet_name='Voters')

	# Auto-adjust column widths
	worksheet = writer.sheets['Voters']
	for idx, col in enumerate(results_df.columns):
	max_length = max(
	results_df[col].astype(str).apply(len).max(),
	len(str(col))
	)
	# Limit column width to 50 characters
	adjusted_width = min(max_length + 2, 50)
	# Convert column index to Excel column letter
	col_letter = chr(65 + idx) if idx < 26 else chr(65 + idx // 26 - 1) + chr(65 + idx % 26)
	worksheet.column_dimensions[col_letter].width = adjusted_width

	excel_data = output.getvalue()

	st.download_button(
	label=f"📥 Download All {len(results_df):,} Voters (XLSX)",
	data=excel_data,
	file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)
	else:
	# PDF Export
	try:
	from reportlab.lib.pagesizes import A4, landscape
	from reportlab.lib import colors
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import inch
	from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont

	# Create PDF in memory
	buffer = BytesIO()
	doc = SimpleDocTemplate(
	buffer,
	pagesize=landscape(A4),
	rightMargin=30,
	leftMargin=30,
	topMargin=30,
	bottomMargin=30
	)

	elements = []
	styles = getSampleStyleSheet()

	# Title
	title_style = ParagraphStyle(
	'CustomTitle',
	parent=styles['Heading1'],
	fontSize=16,
	textColor=colors.HexColor('#1E3A8A'),
	spaceAfter=12,
	alignment=1 # Center
	)

	title = Paragraph("Voter Search Results", title_style)
	elements.append(title)

	# Metadata
	meta_style = ParagraphStyle(
	'Meta',
	parent=styles['Normal'],
	fontSize=10,
	textColor=colors.grey,
	alignment=1
	)

	meta_text = f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} \| Total Voters: {len(results_df):,}"
	meta = Paragraph(meta_text, meta_style)
	elements.append(meta)
	elements.append(Spacer(1, 0.3*inch))

	# Prepare table data
	# Use actual columns from the dataframe
	all_columns = list(results_df.columns)

	# Create table data with headers
	table_data = [all_columns] # Header

	# Add rows (limit to prevent huge PDFs)
	max_rows = min(len(results_df), 5000) # Limit to 5,000 rows
	for idx, row in results_df.head(max_rows).iterrows():
	table_data.append([str(val) if pd.notna(val) else '' for val in row])

	# Create table
	table = Table(table_data)

	# Style the table
	table_style = TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1E3A8A')),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
	('ALIGN', (0, 0), (-1, -1), 'CENTER'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, 0), 10),
	('BOTTOMPADDING', (0, 0), (-1, 0), 12),
	('BACKGROUND', (0, 1), (-1, -1), colors.beige),
	('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
	('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
	('FONTSIZE', (0, 1), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 1, colors.black),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey])
	])

	table.setStyle(table_style)
	elements.append(table)

	# Add note if data was truncated
	if len(results_df) > max_rows:
	elements.append(Spacer(1, 0.2*inch))
	note = Paragraph(
	f"Note: Showing first {max_rows:,} of {len(results_df):,} voters. Download CSV for complete data.",
	meta_style
	)
	elements.append(note)

	# Build PDF
	doc.build(elements)

	# Get PDF data
	pdf_data = buffer.getvalue()
	buffer.close()

	st.download_button(
	label=f"📥 Download All {len(results_df):,} Voters (PDF)",
	data=pdf_data,
	file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
	mime="application/pdf"
	)

	if len(results_df) > max_rows:
	st.info(f"ℹ️ PDF export limited to first {max_rows:,} voters. Use CSV export for complete data.")

	except ImportError:
	st.error("📦 PDF export requires 'reportlab' library. Installing...")
	st.code("pip install reportlab", language="bash")
	st.info("Please install reportlab and restart the application to use PDF export.")
	except Exception as e:
	st.error(f"❌ Error generating PDF: {str(e)}")
	st.info("Please use CSV export as an alternative.")
	else:
	st.warning("⚠️ No voters found matching your criteria. Try broadening your search.")

	# Show location statistics based on search level
	show_stats = False
	stats_level = None
	stats_id = None

	if search_level == "Province Level" and selected_province_id:
	show_stats = True
	stats_level = "province"
	stats_id = selected_province_id
	elif search_level == "District Level" and selected_district_id:
	show_stats = True
	stats_level = "district"
	stats_id = selected_district_id
	elif search_level == "Municipality Level" and selected_municipality_id:
	show_stats = True
	stats_level = "municipality"
	stats_id = selected_municipality_id
	elif search_level == "Ward Level" and selected_municipality_id:
	show_stats = True
	stats_level = "ward" if selected_ward_id else "municipality"
	stats_id = selected_ward_id if selected_ward_id else selected_municipality_id

	if show_stats:
	st.markdown("---")
	st.subheader("📊 Location Statistics")

	try:
	conn = sqlite3.connect(db_path)

	if stats_level == "province":
	stats_query = """
	SELECT
	COUNT(*) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age,
	COUNT(DISTINCT d.id) as total_districts,
	COUNT(DISTINCT m.id) as total_municipalities
	FROM voters v
	JOIN voting_booths b ON v.booth_id = b.id
	JOIN wards w ON b.ward_id = w.id
	JOIN municipalities m ON w.municipality_id = m.id
	JOIN districts d ON m.district_id = d.id
	WHERE d.province_id = ?
	"""
	stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])

	if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
	stats = stats_df.iloc[0]
	col1, col2, col3, col4, col5, col6 = st.columns(6)
	col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
	col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
	col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
	col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
	col5.metric("Districts", int(stats['total_districts']))
	col6.metric("Municipalities", int(stats['total_municipalities']))

	elif stats_level == "district":
	stats_query = """
	SELECT
	COUNT(*) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age,
	COUNT(DISTINCT m.id) as total_municipalities,
	COUNT(DISTINCT w.id) as total_wards
	FROM voters v
	JOIN voting_booths b ON v.booth_id = b.id
	JOIN wards w ON b.ward_id = w.id
	JOIN municipalities m ON w.municipality_id = m.id
	WHERE m.district_id = ?
	"""
	stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])

	if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
	stats = stats_df.iloc[0]
	col1, col2, col3, col4, col5, col6 = st.columns(6)
	col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
	col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
	col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
	col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
	col5.metric("Municipalities", int(stats['total_municipalities']))
	col6.metric("Wards", int(stats['total_wards']))

	elif stats_level == "municipality":
	stats_query = """
	SELECT
	COUNT(*) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age,
	COUNT(DISTINCT w.id) as total_wards,
	COUNT(DISTINCT b.id) as total_booths
	FROM voters v
	JOIN voting_booths b ON v.booth_id = b.id
	JOIN wards w ON b.ward_id = w.id
	WHERE w.municipality_id = ?
	"""
	stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])

	if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
	stats = stats_df.iloc[0]
	col1, col2, col3, col4, col5, col6 = st.columns(6)
	col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
	col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
	col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
	col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
	col5.metric("Wards", int(stats['total_wards']))
	col6.metric("Booths", int(stats['total_booths']))

	elif stats_level == "ward":
	stats_query = """
	SELECT
	COUNT(*) as total_voters,
	SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
	SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
	AVG(v.age) as avg_age,
	COUNT(DISTINCT b.id) as total_booths
	FROM voters v
	JOIN voting_booths b ON v.booth_id = b.id
	JOIN wards w ON b.ward_id = w.id
	WHERE w.id = ?
	"""
	stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])

	if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
	stats = stats_df.iloc[0]
	col1, col2, col3, col4, col5 = st.columns(5)
	col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
	col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
	col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
	col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
	col5.metric("Booths", int(stats['total_booths']))

	conn.close()
	except Exception as e:
	st.error(f"Error loading statistics: {str(e)}")


	def main():
	# Header
	st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-header">Comparative Study: Old vs Current Voter Data with GIS Visualization</p>', unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.image("https://upload.wikimedia.org/wikipedia/commons/thumb/9/9b/Flag_of_Nepal.svg/1200px-Flag_of_Nepal.svg.png", width=100)
	st.title("Navigation")

	pages = ["🏠 Dashboard Overview", "🗺️ GIS Maps", "🗳️ Election 2079 Results",
	"🎯 Constituency Thresholds",
	"🔍 Voter Search", "👥 Caste/Surname Search", "📊 Province Analysis", "🏛️ District Analysis", "🏘️ Local Level Analysis",
	"📊 Age Group Analysis", "📈 Comparative Study", "📋 Detailed Reports"]
	if st.session_state.get("is_admin", False):
	pages.append("🔐 Admin Dashboard")
	page = st.radio("Select Analysis View", pages)

	# Logout button
	if st.button("🚪 Logout"):
	for key in list(st.session_state.keys()):
	del st.session_state[key]
	st.rerun()

	# Load data
	with st.spinner("Loading data..."):
	try:
	country_gdf, provinces_gdf, districts_gdf, local_gdf = load_shapefiles()
	provinces_json, districts_json, municipalities_json = load_json_data()

	# Load pre-computed stats from JSON files (instant loading!)
	(old_province, old_district, old_municipality, old_overall, old_age,
	new_province, new_district, new_municipality, new_overall, new_age) = load_cached_stats()

	except Exception as e:
	st.error(f"Error loading data: {e}")
	st.exception(e)
	return

	# Page routing
	if page == "🏠 Dashboard Overview":
	show_dashboard(old_overall, new_overall, old_province, new_province, old_age, new_age)

	elif page == "🗺️ GIS Maps":
	show_gis_maps(provinces_gdf, districts_gdf, local_gdf, provinces_json, districts_json, new_province, new_district)

	elif page == "🗳️ Election 2079 Results":
	from election_gis_visualization import show_election_gis_page
	show_election_gis_page()

	elif page == "🎯 Constituency Thresholds":
	from constituency_analysis import show_constituency_analysis
	show_constituency_analysis()

	elif page == "🔍 Voter Search":
	show_voter_search()

	elif page == "👥 Caste/Surname Search":
	from castesearch import show_caste_search
	show_caste_search()

	elif page == "📊 Province Analysis":
	show_province_analysis(old_province, new_province, provinces_json)

	elif page == "🏛️ District Analysis":
	show_district_analysis(old_district, new_district, districts_json)

	elif page == "🏘️ Local Level Analysis":
	show_local_analysis(old_municipality, new_municipality, municipalities_json)

	elif page == "� Age Group Analysis":
	from age_group_analysis import show_age_group_analysis
	show_age_group_analysis()

	elif page == "�📈 Comparative Study":
	show_comparative_study(old_province, new_province, old_district, new_district,
	old_municipality, new_municipality, old_age, new_age)

	elif page == "📋 Detailed Reports":
	show_detailed_reports(old_province, new_province, old_district, new_district,
	old_municipality, new_municipality, old_overall, new_overall)

	elif page == "🔐 Admin Dashboard":
	show_admin_dashboard()


	def show_dashboard(old_overall, new_overall, old_province, new_province, old_age, new_age):
	"""Display main dashboard with key metrics."""
	st.header("📊 Dashboard Overview")

	# Key metrics
	col1, col2, col3, col4 = st.columns(4)

	old_total = int(old_overall['total_voters'].iloc[0])
	new_total = int(new_overall['total_voters'].iloc[0])
	change = new_total - old_total
	change_pct = (change / old_total) * 100

	with col1:
	st.metric(
	label="📊 Total Voters (Current)",
	value=f"{new_total:,}",
	delta=f"{change:+,} ({change_pct:+.2f}%)"
	)

	with col2:
	old_male = int(old_overall['male_voters'].iloc[0])
	new_male = int(new_overall['male_voters'].iloc[0])
	st.metric(
	label="👨 Male Voters",
	value=f"{new_male:,}",
	delta=f"{new_male - old_male:+,}"
	)

	with col3:
	old_female = int(old_overall['female_voters'].iloc[0])
	new_female = int(new_overall['female_voters'].iloc[0])
	st.metric(
	label="👩 Female Voters",
	value=f"{new_female:,}",
	delta=f"{new_female - old_female:+,}"
	)

	with col4:
	st.metric(
	label="📅 Average Age",
	value=f"{new_overall['avg_age'].iloc[0]:.1f}",
	delta=f"{new_overall['avg_age'].iloc[0] - old_overall['avg_age'].iloc[0]:.1f}"
	)

	st.markdown("---")

	# Summary cards
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("🗺️ Nepal at a Glance")
	st.info(f"""
	- 7 Provinces
	- 77 Districts
	- 753 Local Levels (Municipalities & Rural Municipalities)
	- {len(new_province):,} Administrative Regions with Data
	""")

	with col2:
	st.subheader("📈 Voter Growth Summary")
	st.success(f"""
	- Previous Election: {old_total:,} voters
	- Current Election: {new_total:,} voters
	- Net Change: {change:+,} voters
	- Growth Rate: {change_pct:.2f}%
	""")

	st.markdown("---")

	# Charts row
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("📊 Province-wise Voter Distribution (Current)")

	fig = px.pie(
	new_province,
	values='total_voters',
	names='province_name',
	title="Voter Distribution by Province",
	color_discrete_sequence=px.colors.qualitative.Set3
	)
	fig.update_traces(textposition='inside', textinfo='percent+label')
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.subheader("👥 Gender Distribution Comparison")

	gender_data = pd.DataFrame({
	'Gender': ['Male (Old)', 'Female (Old)', 'Male (New)', 'Female (New)'],
	'Count': [old_male, old_female, new_male, new_female],
	'Period': ['Previous', 'Previous', 'Current', 'Current']
	})

	fig = px.bar(
	gender_data,
	x='Gender',
	y='Count',
	color='Period',
	barmode='group',
	title="Gender Distribution: Old vs New",
	color_discrete_map={'Previous': '#3B82F6', 'Current': '#10B981'}
	)
	fig.update_layout(yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	# Age distribution comparison
	st.subheader("📊 Age Distribution Comparison")

	col1, col2 = st.columns(2)

	with col1:
	# Old age distribution
	old_age_grouped = old_age.groupby('age_group')['count'].sum().reset_index()
	fig = px.bar(
	old_age_grouped,
	x='age_group',
	y='count',
	title="Age Distribution (Previous)",
	color='age_group',
	color_discrete_sequence=px.colors.qualitative.Pastel
	)
	fig.update_layout(yaxis_tickformat=',', showlegend=False)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# New age distribution
	new_age_grouped = new_age.groupby('age_group')['count'].sum().reset_index()
	fig = px.bar(
	new_age_grouped,
	x='age_group',
	y='count',
	title="Age Distribution (Current)",
	color='age_group',
	color_discrete_sequence=px.colors.qualitative.Set2
	)
	fig.update_layout(yaxis_tickformat=',', showlegend=False)
	st.plotly_chart(fig, use_container_width=True)


	def show_gis_maps(provinces_gdf, districts_gdf, local_gdf, provinces_json, districts_json, new_province, new_district):
	"""Display interactive GIS maps."""
	st.header("🗺️ Interactive GIS Maps of Nepal")

	map_type = st.selectbox(
	"Select Map View",
	["Province Map", "District Map", "Local Level Map", "Choropleth - Voters by Province",
	"Choropleth - Voters by District"]
	)

	if map_type == "Province Map":
	st.subheader("Province Boundaries of Nepal")

	# Create folium map
	m = folium.Map(location=[28.3949, 84.1240], zoom_start=7, tiles='CartoDB positron')

	# Add province polygons
	for idx, row in provinces_gdf.iterrows():
	# Get province data
	province_id = str(row['PROVINCE'])
	prov_data = provinces_json.get(province_id, {})

	popup_html = f"""
	<b>Province {row['PROVINCE']}: {row['PR_NAME']}</b><br>
	Nepali: {prov_data.get('name', 'N/A')}<br>
	Total Voters: {prov_data.get('total_voters', 'N/A'):,}<br>
	Districts: {prov_data.get('districts_count', 'N/A')}<br>
	Municipalities: {prov_data.get('municipalities_count', 'N/A')}
	"""

	folium.GeoJson(
	row['geometry'],
	style_function=lambda x, idx=idx: {
	'fillColor': px.colors.qualitative.Set3[idx % len(px.colors.qualitative.Set3)],
	'color': 'black',
	'weight': 2,
	'fillOpacity': 0.5
	},
	popup=folium.Popup(popup_html, max_width=300)
	).add_to(m)

	st_folium(m, width=None, height=600)

	elif map_type == "District Map":
	st.subheader("District Boundaries of Nepal")

	# Province filter
	selected_province = st.selectbox(
	"Filter by Province (Optional)",
	["All Provinces"] + [f"Province {i}" for i in range(1, 8)]
	)

	m = folium.Map(location=[28.3949, 84.1240], zoom_start=7, tiles='CartoDB positron')

	filtered_districts = districts_gdf.copy()
	if selected_province != "All Provinces":
	prov_num = int(selected_province.split()[1])
	filtered_districts = districts_gdf[districts_gdf['PROVINCE'] == prov_num]

	for idx, row in filtered_districts.iterrows():
	popup_html = f"""
	<b>{row['DISTRICT']}</b><br>
	Province: {row['PR_NAME']}
	"""

	folium.GeoJson(
	row['geometry'],
	style_function=lambda x: {
	'fillColor': '#3B82F6',
	'color': 'white',
	'weight': 1,
	'fillOpacity': 0.4
	},
	popup=folium.Popup(popup_html, max_width=200)
	).add_to(m)

	st_folium(m, width=None, height=600)

	elif map_type == "Local Level Map":
	st.subheader("Local Level (Municipality/Rural Municipality) Boundaries")

	# Province filter for performance
	selected_province = st.selectbox(
	"Select Province",
	[f"Province {i}" for i in range(1, 8)]
	)
	prov_num = int(selected_province.split()[1])

	filtered_local = local_gdf[local_gdf['PROVINCE'] == prov_num]

	m = folium.Map(location=[28.3949, 84.1240], zoom_start=8, tiles='CartoDB positron')

	# Color by type
	colors = {'Gaunpalika': '#10B981', 'Nagarpalika': '#F59E0B',
	'Sub Metropolitian': '#EF4444', 'Metropolitian': '#7C3AED'}

	for idx, row in filtered_local.iterrows():
	local_type = row.get('TYPE', 'Unknown')
	color = colors.get(local_type, '#6B7280')

	popup_html = f"""
	<b>{row['LOCAL']}</b><br>
	Type: {local_type}<br>
	District: {row['DISTRICT']}
	"""

	folium.GeoJson(
	row['geometry'],
	style_function=lambda x, c=color: {
	'fillColor': c,
	'color': 'white',
	'weight': 0.5,
	'fillOpacity': 0.5
	},
	popup=folium.Popup(popup_html, max_width=200)
	).add_to(m)

	# Add legend
	st.markdown("""
	Legend:
	- 🟢 Gaunpalika (Rural Municipality)
	- 🟡 Nagarpalika (Municipality)
	- 🔴 Sub-Metropolitan City
	- 🟣 Metropolitan City
	""")

	st_folium(m, width=None, height=600)

	elif map_type == "Choropleth - Voters by Province":
	st.subheader("Voter Distribution Choropleth - Province Level")

	# Merge geodata with voter data
	provinces_gdf_merged = provinces_gdf.copy()

	# Create voter count mapping
	voter_map = {}
	for pid, pdata in provinces_json.items():
	voter_map[int(pid)] = pdata.get('total_voters', 0)

	provinces_gdf_merged['total_voters'] = provinces_gdf_merged['PROVINCE'].map(voter_map)

	fig = px.choropleth_mapbox(
	provinces_gdf_merged,
	geojson=provinces_gdf_merged.geometry.__geo_interface__,
	locations=provinces_gdf_merged.index,
	color='total_voters',
	hover_name='PR_NAME',
	hover_data=['PROVINCE', 'total_voters'],
	mapbox_style="carto-positron",
	zoom=6,
	center={"lat": 28.3949, "lon": 84.1240},
	opacity=0.7,
	color_continuous_scale='Blues',
	title="Voters by Province"
	)
	fig.update_layout(height=600)
	st.plotly_chart(fig, use_container_width=True)

	elif map_type == "Choropleth - Voters by District":
	st.subheader("Voter Distribution Choropleth - District Level")

	# Use new_district data for district-wise voter counts
	if not new_district.empty:
	fig = px.bar(
	new_district.nlargest(20, 'total_voters'),
	x='district_name',
	y='total_voters',
	color='province_name',
	title="Top 20 Districts by Voter Count",
	color_discrete_sequence=px.colors.qualitative.Set3
	)
	fig.update_layout(xaxis_tickangle=-45, yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)


	def show_province_analysis(old_province, new_province, provinces_json):
	"""Detailed province-level analysis."""
	st.header("📊 Province Analysis")

	# Create comparison dataframe
	comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')

	# Province selector
	selected_province = st.selectbox(
	"Select Province for Detailed Analysis",
	comparison['province_name'].tolist()
	)

	prov_data = comparison[comparison['province_name'] == selected_province].iloc[0]

	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric(
	"Total Voters (Current)",
	f"{int(prov_data['total_voters_new']):,}",
	f"{int(prov_data['voter_change']):+,}"
	)

	with col2:
	st.metric(
	"Male Voters",
	f"{int(prov_data['male_voters_new']):,}",
	f"{int(prov_data['male_change']):+,}"
	)

	with col3:
	st.metric(
	"Female Voters",
	f"{int(prov_data['female_voters_new']):,}",
	f"{int(prov_data['female_change']):+,}"
	)

	with col4:
	st.metric(
	"Growth Rate",
	f"{prov_data['voter_change_pct']:.2f}%"
	)

	st.markdown("---")

	# Province comparison charts
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Province-wise Voter Comparison")

	fig = go.Figure()
	fig.add_trace(go.Bar(
	name='Previous',
	x=comparison['province_name'],
	y=comparison['total_voters_old'],
	marker_color='#3B82F6'
	))
	fig.add_trace(go.Bar(
	name='Current',
	x=comparison['province_name'],
	y=comparison['total_voters_new'],
	marker_color='#10B981'
	))
	fig.update_layout(barmode='group', yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.subheader("Voter Growth by Province")

	fig = px.bar(
	comparison,
	x='province_name',
	y='voter_change',
	color='voter_change',
	color_continuous_scale=['red', 'yellow', 'green'],
	title="Net Voter Change by Province"
	)
	fig.update_layout(yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	# Province summary table
	st.subheader("Province Summary Table")
	display_cols = ['province_name', 'total_voters_old', 'total_voters_new',
	'voter_change', 'voter_change_pct', 'male_voters_new', 'female_voters_new']

	styled_df = comparison[display_cols].copy()
	styled_df.columns = ['Province', 'Previous Voters', 'Current Voters',
	'Change', 'Change %', 'Male', 'Female']

	st.dataframe(
	styled_df.style.format({
	'Previous Voters': '{:,.0f}',
	'Current Voters': '{:,.0f}',
	'Change': '{:+,.0f}',
	'Change %': '{:+.2f}%',
	'Male': '{:,.0f}',
	'Female': '{:,.0f}'
	}),
	use_container_width=True
	)


	def show_district_analysis(old_district, new_district, districts_json):
	"""Detailed district-level analysis."""
	st.header("🏛️ District Analysis")

	# Load age group data
	try:
	with open("data/cached_stats/old_age_by_district.json", "r", encoding="utf-8") as f:
	old_district_age = pd.DataFrame(json.load(f))
	with open("data/cached_stats/new_age_by_district.json", "r", encoding="utf-8") as f:
	new_district_age = pd.DataFrame(json.load(f))
	age_data_available = True
	except:
	age_data_available = False
	st.info("ℹ️ Age group data not available. Run generate_age_groups.py to generate it.")

	# Create comparison dataframe
	comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name')

	# Province filter
	provinces = comparison['province_name_new'].dropna().unique().tolist()
	selected_province = st.selectbox("Filter by Province", ["All"] + provinces)

	if selected_province != "All":
	comparison = comparison[comparison['province_name_new'] == selected_province]

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Top 10 Districts by Voter Growth")
	top_growth = comparison.nlargest(10, 'voter_change')

	fig = px.bar(
	top_growth,
	x='district_name',
	y='voter_change',
	color='voter_change_pct',
	color_continuous_scale='Greens',
	title="Top 10 Districts - Voter Increase"
	)
	fig.update_layout(xaxis_tickangle=-45, yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.subheader("District Voter Distribution (Current)")

	fig = px.treemap(
	comparison.head(50),
	path=['province_name_new', 'district_name'],
	values='total_voters_new',
	title="Voter Distribution Treemap"
	)
	st.plotly_chart(fig, use_container_width=True)

	# District comparison table
	st.subheader("District Comparison Table")

	display_df = comparison[['district_name', 'province_name_new', 'total_voters_old',
	'total_voters_new', 'voter_change', 'voter_change_pct']].copy()
	display_df.columns = ['District', 'Province', 'Previous', 'Current', 'Change', 'Change %']

	# Search filter
	search = st.text_input("Search District", "")
	if search:
	display_df = display_df[display_df['District'].str.contains(search, case=False, na=False)]

	st.dataframe(
	display_df.style.format({
	'Previous': '{:,.0f}',
	'Current': '{:,.0f}',
	'Change': '{:+,.0f}',
	'Change %': '{:+.2f}%'
	}).background_gradient(subset=['Change'], cmap='RdYlGn'),
	use_container_width=True,
	height=400
	)

	# AGE GROUP ANALYSIS SECTION
	if age_data_available:
	st.markdown("---")
	st.subheader("📊 Age Group Analysis by District")

	# District selector for detailed age analysis
	districts_list = sorted(comparison['district_name'].tolist())
	selected_district = st.selectbox("Select District for Age Group Analysis", districts_list, key="district_age_sel")

	# Filter age data for selected district
	district_new = new_district_age[new_district_age['district_name'] == selected_district]
	district_old = old_district_age[old_district_age['district_name'] == selected_district]

	if not district_new.empty:
	# Calculate metrics
	total_new = district_new['count'].sum()
	total_old = district_old['count'].sum()
	change = total_new - total_old
	change_pct = (change / total_old * 100) if total_old > 0 else 0

	# Metrics row
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Total Voters (Current)", f"{total_new:,}", f"{change:+,}")

	with col2:
	male_new = district_new[district_new['gender'] == 'पुरुष']['count'].sum()
	st.metric("Male Voters", f"{male_new:,}")

	with col3:
	female_new = district_new[district_new['gender'] == 'महिला']['count'].sum()
	st.metric("Female Voters", f"{female_new:,}")

	with col4:
	st.metric("Growth Rate", f"{change_pct:+.2f}%")

	# Visualizations
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Age Distribution (Current)")

	# Aggregate by age group
	age_grouped_new = district_new.groupby('age_group')['count'].sum().reset_index()
	age_grouped_new = age_grouped_new.sort_values('age_group')

	fig = px.bar(
	age_grouped_new,
	x='age_group',
	y='count',
	title=f"Age Distribution in {selected_district}",
	color='count',
	color_continuous_scale='Blues',
	labels={'age_group': 'Age Group', 'count': 'Voters'}
	)
	fig.update_layout(yaxis_tickformat=',', showlegend=False, height=350)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.markdown("Gender Distribution by Age Group")

	# Gender breakdown
	gender_age = district_new[district_new['gender'].isin(['पुरुष', 'महिला'])]
	gender_age_grouped = gender_age.groupby(['age_group', 'gender'])['count'].sum().reset_index()

	fig = px.bar(
	gender_age_grouped,
	x='age_group',
	y='count',
	color='gender',
	barmode='group',
	title=f"Gender by Age in {selected_district}",
	labels={'age_group': 'Age Group', 'count': 'Voters'},
	color_discrete_map={'पुरुष': '#3B82F6', 'महिला': '#EC4899'}
	)
	fig.update_layout(yaxis_tickformat=',', height=350)
	st.plotly_chart(fig, use_container_width=True)

	# Comparison with previous election
	st.markdown("📊 Age Group Comparison: Previous vs Current")

	col1, col2 = st.columns(2)

	with col1:
	# Old vs New comparison
	age_old = district_old.groupby('age_group')['count'].sum().reset_index()
	age_old.columns = ['age_group', 'old_count']
	age_new = district_new.groupby('age_group')['count'].sum().reset_index()
	age_new.columns = ['age_group', 'new_count']

	age_comparison = pd.merge(age_old, age_new, on='age_group', how='outer').fillna(0)
	age_comparison['change'] = age_comparison['new_count'] - age_comparison['old_count']
	age_comparison['change_pct'] = (age_comparison['change'] / age_comparison['old_count'] * 100).round(2)
	age_comparison['change_pct'] = age_comparison['change_pct'].replace([np.inf, -np.inf], 0)

	fig = px.bar(
	age_comparison,
	x='age_group',
	y=['old_count', 'new_count'],
	barmode='group',
	title="Voter Count Comparison",
	labels={'value': 'Voters', 'variable': 'Period'},
	color_discrete_map={'old_count': '#F59E0B', 'new_count': '#10B981'}
	)
	fig.update_layout(yaxis_tickformat=',', height=350)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Growth rate by age group
	fig = px.bar(
	age_comparison,
	x='age_group',
	y='change_pct',
	title="Growth Rate by Age Group (%)",
	color='change_pct',
	color_continuous_scale='RdYlGn',
	labels={'age_group': 'Age Group', 'change_pct': 'Growth (%)'},
	color_continuous_midpoint=0
	)
	fig.update_layout(height=350)
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.warning(f"No age group data available for {selected_district}")


	def show_local_analysis(old_municipality, new_municipality, municipalities_json):
	"""Local level (municipality) analysis."""
	st.header("🏘️ Local Level Analysis")

	# Load age group data for municipalities and wards
	try:
	with open("data/cached_stats/old_age_by_municipality.json", "r", encoding="utf-8") as f:
	old_municipality_age = pd.DataFrame(json.load(f))
	with open("data/cached_stats/new_age_by_municipality.json", "r", encoding="utf-8") as f:
	new_municipality_age = pd.DataFrame(json.load(f))
	with open("data/cached_stats/old_age_by_ward.json", "r", encoding="utf-8") as f:
	old_ward_age = pd.DataFrame(json.load(f))
	with open("data/cached_stats/new_age_by_ward.json", "r", encoding="utf-8") as f:
	new_ward_age = pd.DataFrame(json.load(f))
	age_data_available = True
	ward_data_available = True
	except:
	age_data_available = False
	ward_data_available = False
	st.info("ℹ️ Age group data not available. Run generate_age_groups.py and generate_ward_age_groups.py to generate it.")

	comparison = create_comparative_dataframe(old_municipality, new_municipality, 'municipality_id', 'municipality_name')

	# Use the _new suffixed column for district_name (or _old if _new doesn't exist)
	district_col = 'district_name_new' if 'district_name_new' in comparison.columns else 'district_name_old'
	comparison['district_name'] = comparison[district_col].fillna(comparison.get('district_name_old', ''))

	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Total Local Levels", f"{len(comparison):,}")

	with col2:
	positive_growth = (comparison['voter_change'] > 0).sum()
	st.metric("With Positive Growth", f"{positive_growth:,}")

	with col3:
	negative_growth = (comparison['voter_change'] < 0).sum()
	st.metric("With Negative Growth", f"{negative_growth:,}")

	st.markdown("---")

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Top 20 Local Levels by Voter Count")
	top_20 = comparison.nlargest(20, 'total_voters_new')

	fig = px.bar(
	top_20,
	y='municipality_name',
	x='total_voters_new',
	orientation='h',
	color='voter_change_pct',
	color_continuous_scale='RdYlGn',
	title="Largest Local Levels"
	)
	fig.update_layout(yaxis={'categoryorder': 'total ascending'}, xaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.subheader("Growth Distribution")

	fig = px.histogram(
	comparison,
	x='voter_change_pct',
	nbins=50,
	title="Distribution of Voter Change %",
	labels={'voter_change_pct': 'Voter Change (%)'}
	)
	fig.add_vline(x=0, line_dash="dash", line_color="red")
	st.plotly_chart(fig, use_container_width=True)

	# Detailed table
	st.subheader("Local Level Details")

	# Filters
	col1, col2 = st.columns(2)
	with col1:
	districts = comparison['district_name'].dropna().unique().tolist()
	selected_district = st.selectbox("Filter by District", ["All"] + sorted(districts))

	with col2:
	growth_filter = st.selectbox(
	"Filter by Growth",
	["All", "Positive Growth", "Negative Growth", "No Change"]
	)

	filtered = comparison.copy()

	if selected_district != "All":
	filtered = filtered[filtered['district_name'] == selected_district]

	if growth_filter == "Positive Growth":
	filtered = filtered[filtered['voter_change'] > 0]
	elif growth_filter == "Negative Growth":
	filtered = filtered[filtered['voter_change'] < 0]
	elif growth_filter == "No Change":
	filtered = filtered[filtered['voter_change'] == 0]

	display_df = filtered[['municipality_name', 'district_name', 'total_voters_old',
	'total_voters_new', 'voter_change', 'voter_change_pct']].copy()
	display_df.columns = ['Local Level', 'District', 'Previous', 'Current', 'Change', 'Change %']

	st.dataframe(
	display_df.style.format({
	'Previous': '{:,.0f}',
	'Current': '{:,.0f}',
	'Change': '{:+,.0f}',
	'Change %': '{:+.2f}%'
	}),
	use_container_width=True,
	height=400
	)

	# AGE GROUP ANALYSIS SECTION FOR LOCAL LEVEL
	if age_data_available:
	st.markdown("---")
	st.subheader("📊 Age Group Analysis by Local Level")

	# Hierarchical selectors for local level
	col1, col2 = st.columns(2)

	with col1:
	# Province selector
	if age_data_available:
	provinces_list = sorted(new_municipality_age['province_name'].unique())
	selected_province_age = st.selectbox("Select Province", provinces_list, key="local_prov_age")

	# Filter by province
	muni_filtered = new_municipality_age[new_municipality_age['province_name'] == selected_province_age]
	old_muni_filtered = old_municipality_age[old_municipality_age['province_name'] == selected_province_age]

	with col2:
	# District selector
	if age_data_available and not muni_filtered.empty:
	districts_list = sorted(muni_filtered['district_name'].unique())
	selected_district_age = st.selectbox("Select District", districts_list, key="local_dist_age")

	# Filter by district
	muni_in_district = muni_filtered[muni_filtered['district_name'] == selected_district_age]
	old_muni_in_district = old_muni_filtered[old_muni_filtered['district_name'] == selected_district_age]

	# Municipality selector
	if age_data_available and not muni_in_district.empty:
	municipalities_list = sorted(muni_in_district['municipality_name'].unique())
	selected_municipality = st.selectbox("Select Local Level for Age Analysis", municipalities_list, key="muni_age_sel")

	# =========================== WARD POPULATION GRAPH SECTION ===========================
	st.markdown("---")
	st.subheader("📊 Ward-wise Population Distribution")
	st.markdown(f"Population breakdown by ward for {selected_municipality}")

	# Get ward-wise population data for selected municipality
	if ward_data_available:
	# Filter ward data for the selected municipality
	ward_pop_new = new_ward_age[
	(new_ward_age['municipality_name'] == selected_municipality) &
	(new_ward_age['district_name'] == selected_district_age)
	]

	ward_pop_old = old_ward_age[
	(old_ward_age['municipality_name'] == selected_municipality) &
	(old_ward_age['district_name'] == selected_district_age)
	]

	if not ward_pop_new.empty:
	# Aggregate data by ward (sum across all age groups and genders)
	ward_totals_new = ward_pop_new.groupby('ward_number')['count'].sum().reset_index()
	ward_totals_new.columns = ['ward_number', 'total_voters_new']
	ward_totals_new = ward_totals_new.sort_values('ward_number')

	# Get male and female counts separately
	ward_gender_new = ward_pop_new.groupby(['ward_number', 'gender'])['count'].sum().reset_index()
	ward_male_new = ward_gender_new[ward_gender_new['gender'] == 'पुरुष'][['ward_number', 'count']]
	ward_male_new.columns = ['ward_number', 'male_voters']
	ward_female_new = ward_gender_new[ward_gender_new['gender'] == 'महिला'][['ward_number', 'count']]
	ward_female_new.columns = ['ward_number', 'female_voters']

	# Merge all data
	ward_data_combined = ward_totals_new.merge(ward_male_new, on='ward_number', how='left')
	ward_data_combined = ward_data_combined.merge(ward_female_new, on='ward_number', how='left')
	ward_data_combined = ward_data_combined.fillna(0)

	# Get old data for comparison
	if not ward_pop_old.empty:
	ward_totals_old = ward_pop_old.groupby('ward_number')['count'].sum().reset_index()
	ward_totals_old.columns = ['ward_number', 'total_voters_old']
	ward_data_combined = ward_data_combined.merge(ward_totals_old, on='ward_number', how='left')
	ward_data_combined['total_voters_old'] = ward_data_combined['total_voters_old'].fillna(0)
	ward_data_combined['change'] = ward_data_combined['total_voters_new'] - ward_data_combined['total_voters_old']
	ward_data_combined['change_pct'] = (ward_data_combined['change'] / ward_data_combined['total_voters_old'] * 100).round(2)
	ward_data_combined['change_pct'] = ward_data_combined['change_pct'].replace([np.inf, -np.inf], 0)
	has_old_data = True
	else:
	has_old_data = False

	# Display summary metrics
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Wards", len(ward_data_combined))
	with col2:
	total_population = int(ward_data_combined['total_voters_new'].sum())
	st.metric("Total Population", f"{total_population:,}")
	with col3:
	avg_population = int(ward_data_combined['total_voters_new'].mean())
	st.metric("Avg per Ward", f"{avg_population:,}")
	with col4:
	max_ward = ward_data_combined.loc[ward_data_combined['total_voters_new'].idxmax()]
	st.metric("Largest Ward", f"Ward {int(max_ward['ward_number'])}")

	# Create visualizations with proper location context
	# Build full location string for chart titles
	location_full = f"{selected_municipality}, {selected_district_age}, {selected_province_age}"

	col1, col2 = st.columns(2)

	with col1:
	# Bar chart showing total population by ward
	ward_data_combined['Ward'] = 'Ward ' + ward_data_combined['ward_number'].astype(int).astype(str)

	fig1 = px.bar(
	ward_data_combined,
	x='Ward',
	y='total_voters_new',
	title=f"Population by Ward<br><sub>{location_full}</sub>",
	labels={'total_voters_new': 'Total Population', 'Ward': 'Ward'},
	color='total_voters_new',
	color_continuous_scale='Viridis',
	text='total_voters_new'
	)
	fig1.update_traces(texttemplate='%{text:,.0f}', textposition='outside')
	fig1.update_layout(
	yaxis_tickformat=',',
	xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
	height=450,
	showlegend=False,
	title_font_size=14
	)
	st.plotly_chart(fig1, use_container_width=True, key="ward_pop_chart")

	# Add download button for this chart
	try:
	img_bytes = fig1.to_image(format="png", width=1200, height=600, scale=2)
	st.download_button(
	label="📸 Download Chart as PNG",
	data=img_bytes,
	file_name=f"ward_population_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
	mime="image/png",
	use_container_width=True,
	key="download_ward_pop"
	)
	except:
	st.caption("💡 Install kaleido to enable image downloads: `pip install kaleido`")

	with col2:
	# Gender distribution by ward
	ward_gender_data = ward_data_combined.copy()
	ward_gender_data['Male'] = ward_gender_data['male_voters'].astype(int)
	ward_gender_data['Female'] = ward_gender_data['female_voters'].astype(int)

	fig2 = px.bar(
	ward_gender_data,
	x='Ward',
	y=['Male', 'Female'],
	title=f"Gender Distribution by Ward<br><sub>{location_full}</sub>",
	labels={'value': 'Population', 'variable': 'Gender'},
	color_discrete_map={'Male': '#3B82F6', 'Female': '#EC4899'},
	barmode='group'
	)
	fig2.update_layout(
	yaxis_tickformat=',',
	xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
	height=450,
	title_font_size=14
	)
	st.plotly_chart(fig2, use_container_width=True, key="ward_gender_chart")

	# Add download button for this chart
	try:
	img_bytes = fig2.to_image(format="png", width=1200, height=600, scale=2)
	st.download_button(
	label="📸 Download Chart as PNG",
	data=img_bytes,
	file_name=f"ward_gender_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
	mime="image/png",
	use_container_width=True,
	key="download_ward_gender"
	)
	except:
	st.caption("💡 Install kaleido to enable image downloads: `pip install kaleido`")

	# Show comparison with old data if available
	if has_old_data:
	st.markdown("Ward Population Change (Previous vs Current)")

	col1, col2 = st.columns(2)

	with col1:
	# Comparison bar chart
	fig3 = px.bar(
	ward_data_combined,
	x='Ward',
	y=['total_voters_old', 'total_voters_new'],
	title=f"Population Comparison by Ward<br><sub>{location_full}</sub>",
	labels={'value': 'Population', 'variable': 'Period'},
	color_discrete_map={'total_voters_old': '#F59E0B', 'total_voters_new': '#10B981'},
	barmode='group'
	)
	fig3.update_layout(
	yaxis_tickformat=',',
	xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
	height=400,
	title_font_size=14
	)
	st.plotly_chart(fig3, use_container_width=True, key="ward_comparison_chart")

	# Add download button
	try:
	img_bytes = fig3.to_image(format="png", width=1200, height=600, scale=2)
	st.download_button(
	label="📸 Download Chart as PNG",
	data=img_bytes,
	file_name=f"ward_comparison_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
	mime="image/png",
	use_container_width=True,
	key="download_ward_comparison"
	)
	except:
	st.caption("💡 Install kaleido to enable image downloads")

	with col2:
	# Growth rate by ward
	fig4 = px.bar(
	ward_data_combined,
	x='Ward',
	y='change_pct',
	title=f"Growth Rate by Ward (%)<br><sub>{location_full}</sub>",
	labels={'change_pct': 'Growth (%)'},
	color='change_pct',
	color_continuous_scale='RdYlGn',
	color_continuous_midpoint=0,
	text='change_pct'
	)
	fig4.update_traces(texttemplate='%{text:.1f}%', textposition='outside')
	fig4.update_layout(
	xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
	height=400,
	showlegend=False,
	title_font_size=14
	)
	st.plotly_chart(fig4, use_container_width=True, key="ward_growth_chart")

	# Add download button
	try:
	img_bytes = fig4.to_image(format="png", width=1200, height=600, scale=2)
	st.download_button(
	label="📸 Download Chart as PNG",
	data=img_bytes,
	file_name=f"ward_growth_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
	mime="image/png",
	use_container_width=True,
	key="download_ward_growth"
	)
	except:
	st.caption("💡 Install kaleido to enable image downloads")

	# Detailed data table
	st.markdown("Ward-wise Detailed Statistics")

	display_cols = ['Ward', 'total_voters_new', 'male_voters', 'female_voters']
	col_names = ['Ward', 'Total Population', 'Male', 'Female']

	if has_old_data:
	display_cols.extend(['total_voters_old', 'change', 'change_pct'])
	col_names.extend(['Previous Population', 'Change', 'Growth %'])

	ward_display = ward_data_combined[display_cols].copy()
	ward_display.columns = col_names

	# Format the dataframe
	format_dict = {
	'Total Population': '{:,.0f}',
	'Male': '{:,.0f}',
	'Female': '{:,.0f}'
	}

	if has_old_data:
	format_dict.update({
	'Previous Population': '{:,.0f}',
	'Change': '{:+,.0f}',
	'Growth %': '{:+.2f}%'
	})

	styled_df = ward_display.style.format(format_dict)

	if has_old_data:
	styled_df = styled_df.background_gradient(subset=['Growth %'], cmap='RdYlGn')

	st.dataframe(styled_df, use_container_width=True, height=400)

	# Download option with proper location naming
	st.markdown("📥 Download Complete Ward Data")

	# Create clean filename with location hierarchy
	clean_province = selected_province_age.replace(' ', '_').replace('/', '-')
	clean_district = selected_district_age.replace(' ', '_').replace('/', '-')
	clean_municipality = selected_municipality.replace(' ', '_').replace('/', '-')
	base_filename = f"{clean_province}_{clean_district}_{clean_municipality}_ward_data"

	col1, col2 = st.columns(2)
	with col1:
	csv = ward_display.to_csv(index=False, encoding='utf-8-sig')
	st.download_button(
	label="📥 Download Ward Data as CSV",
	data=csv,
	file_name=f"{base_filename}.csv",
	mime="text/csv",
	use_container_width=True,
	help=f"Download ward population data for {location_full}"
	)
	with col2:
	from io import BytesIO
	buffer = BytesIO()
	with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
	# Add location info sheet
	ward_display.to_excel(writer, index=False, sheet_name='Ward Population')

	# Add metadata sheet with location info
	metadata = pd.DataFrame({
	'Field': ['Province', 'District', 'Municipality', 'Analysis Date', 'Total Wards', 'Total Population'],
	'Value': [
	selected_province_age,
	selected_district_age,
	selected_municipality,
	datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	len(ward_data_combined),
	int(ward_data_combined['total_voters_new'].sum())
	]
	})
	metadata.to_excel(writer, index=False, sheet_name='Metadata')

	st.download_button(
	label="📊 Download as Excel (with Metadata)",
	data=buffer.getvalue(),
	file_name=f"{base_filename}.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	use_container_width=True,
	help=f"Download with location metadata for {location_full}"
	)

	else:
	st.warning(f"No ward population data available for {selected_municipality}")
	else:
	st.info("ℹ️ Ward population data not available. Run generate_ward_age_groups.py to generate it.")

	st.markdown("---")

	# WARD SELECTION FEATURE
	if ward_data_available:
	st.markdown("---")
	st.markdown("### 🏘️ Ward Selection (Optional)")
	st.markdown("Select specific ward(s) to analyze or leave unselected to see entire municipality data")

	# Get wards for selected municipality
	ward_filtered = new_ward_age[
	(new_ward_age['municipality_name'] == selected_municipality) &
	(new_ward_age['district_name'] == selected_district_age)
	]

	if not ward_filtered.empty:
	wards_list = sorted(ward_filtered['ward_number'].unique())

	col1, col2 = st.columns([3, 1])
	with col1:
	selected_wards = st.multiselect(
	f"Select Ward(s) from {selected_municipality}",
	options=wards_list,
	default=[],
	help="Select one or multiple wards. Leave empty to show entire municipality.",
	key="ward_multiselect"
	)

	with col2:
	st.metric("Total Wards", len(wards_list))
	if selected_wards:
	st.metric("Selected", len(selected_wards))

	# Filter data based on ward selection
	if ward_data_available and selected_wards:
	# Filter by selected wards
	ward_new_filtered = new_ward_age[
	(new_ward_age['municipality_name'] == selected_municipality) &
	(new_ward_age['district_name'] == selected_district_age) &
	(new_ward_age['ward_number'].isin(selected_wards))
	]
	ward_old_filtered = old_ward_age[
	(old_ward_age['municipality_name'] == selected_municipality) &
	(old_ward_age['district_name'] == selected_district_age) &
	(old_ward_age['ward_number'].isin(selected_wards))
	]

	muni_new = ward_new_filtered
	muni_old = ward_old_filtered
	analysis_level = f"Ward(s): {', '.join(map(str, sorted(selected_wards)))}"
	else:
	# Use municipality level data
	muni_new = muni_in_district[muni_in_district['municipality_name'] == selected_municipality]
	muni_old = old_muni_in_district[old_muni_in_district['municipality_name'] == selected_municipality]
	analysis_level = f"Entire {selected_municipality}"

	# Display analysis level
	st.info(f"📍 Analyzing: {analysis_level}")

	# ======================== CASTE GROUP ANALYSIS SECTION ========================
	st.markdown("---")
	st.subheader("👥 Caste/Ethnic Group Population Analysis")
	st.markdown(f"""
	Caste/Ethnic Group Distribution in {analysis_level}

	This section shows which caste/ethnic groups are present in the selected area and their population distribution.
	Surnames are matched against a comprehensive database of Nepali caste/ethnic groups.
	""")

	# Import caste analysis functions
	from age_group_analysis import get_caste_data_for_municipality, get_ward_level_caste_data

	# Get municipality ID for caste analysis using a simpler query
	conn_caste = sqlite3.connect("voter_db_new.sqlite3")

	# First, try to get municipality ID from the age data which already has it
	municipality_id_caste = None
	if not muni_in_district.empty:
	# Try to extract municipality_id from the age data if available
	temp_muni = muni_in_district[muni_in_district['municipality_name'] == selected_municipality]
	if not temp_muni.empty and 'municipality_id' in temp_muni.columns:
	municipality_id_caste = temp_muni.iloc[0]['municipality_id']

	# If not found, query the database
	if municipality_id_caste is None:
	muni_query_caste = """
	SELECT m.id
	FROM municipalities m
	WHERE m.name_np = ?
	"""
	muni_data_caste = pd.read_sql_query(muni_query_caste, conn_caste,
	params=(selected_municipality,))

	if not muni_data_caste.empty:
	municipality_id_caste = muni_data_caste.iloc[0]['id']

	conn_caste.close()

	# ======================== BULK DOWNLOAD ALL WARDS CASTE DATA ========================
	with st.expander("📥 Download All Ward-wise Caste/Ethnic Data", expanded=False):
	st.markdown("""
	Download caste/ethnic population data for ALL wards in this municipality at once.

	This feature generates a comprehensive Excel file with:
	- One sheet per ward showing caste/ethnic group distribution
	- A summary sheet comparing all wards
	- A combined sheet with all data
	- 📊 Charts and graphs for visual analysis
	- ❓ Unmatched surnames list
	""")

	# Import the all wards function
	from age_group_analysis import get_all_wards_caste_data

	if st.button("📊 Generate All Wards Caste Data", type="secondary", use_container_width=True, key="generate_all_wards_caste"):
	if municipality_id_caste:
	with st.spinner("Analyzing caste/ethnic data for all wards... This may take a moment."):
	try:
	all_wards_data, all_ward_numbers = get_all_wards_caste_data(municipality_id_caste)

	if all_wards_data:
	# Store in session state for download
	st.session_state['all_wards_caste_data'] = all_wards_data
	st.session_state['all_ward_numbers'] = all_ward_numbers
	st.session_state['all_wards_municipality'] = selected_municipality
	st.session_state['all_wards_district'] = selected_district_age
	st.session_state['all_wards_province'] = selected_province_age
	st.success(f"✅ Data generated for {len(all_ward_numbers)} wards!")
	st.rerun()
	else:
	st.warning("No ward data found for this municipality.")
	except Exception as e:
	st.error(f"Error: {str(e)}")
	else:
	st.error("Could not identify municipality ID.")

	# Show download button if data is available
	if 'all_wards_caste_data' in st.session_state and st.session_state.get('all_wards_municipality') == selected_municipality:
	all_wards_data = st.session_state['all_wards_caste_data']
	all_ward_numbers = st.session_state['all_ward_numbers']

	# Show summary
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	total_voters_all = sum(wd['total_voters'] for wd in all_wards_data.values())
	st.metric("Total Voters", f"{total_voters_all:,}")
	with col2:
	st.metric("Total Wards", len(all_ward_numbers))
	with col3:
	all_castes = set()
	for wd in all_wards_data.values():
	all_castes.update(wd['caste_data'].keys())
	st.metric("Caste Groups Found", len(all_castes))
	with col4:
	# Count total unmatched surnames
	all_unmatched = {}
	for wd in all_wards_data.values():
	for surname, count in wd.get('unmatched_surnames', {}).items():
	all_unmatched[surname] = all_unmatched.get(surname, 0) + count
	st.metric("Unmatched Surnames", len(all_unmatched))

	# Create Excel file with multiple sheets and charts
	from io import BytesIO
	from openpyxl.chart import BarChart, PieChart, Reference
	from openpyxl.chart.label import DataLabelList
	from openpyxl.utils.dataframe import dataframe_to_rows

	buffer = BytesIO()

	with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
	# 1. Summary Sheet - All wards comparison
	summary_data = []
	all_castes_sorted = sorted(all_castes)

	for ward_num in sorted(all_ward_numbers):
	ward_data = all_wards_data.get(ward_num, {})
	row = {'Ward': f"Ward {ward_num}", 'Total Voters': ward_data.get('total_voters', 0)}
	for caste in all_castes_sorted:
	row[caste] = ward_data.get('caste_data', {}).get(caste, 0)
	summary_data.append(row)

	# Add total row
	total_row = {'Ward': 'TOTAL', 'Total Voters': total_voters_all}
	for caste in all_castes_sorted:
	total_row[caste] = sum(all_wards_data.get(w, {}).get('caste_data', {}).get(caste, 0) for w in all_ward_numbers)
	summary_data.append(total_row)

	summary_df = pd.DataFrame(summary_data)
	summary_df.to_excel(writer, index=False, sheet_name='Ward-wise Summary')

	# 2. Combined detailed sheet
	combined_data = []
	for ward_num in sorted(all_ward_numbers):
	ward_data = all_wards_data.get(ward_num, {})
	total = ward_data.get('total_voters', 0)
	for caste, count in sorted(ward_data.get('caste_data', {}).items(), key=lambda x: x[1], reverse=True):
	pct = round(count / total * 100, 2) if total > 0 else 0
	combined_data.append({
	'Ward': ward_num,
	'Caste/Ethnic Group': caste,
	'Population': count,
	'Percentage': pct
	})

	combined_df = pd.DataFrame(combined_data)
	combined_df.to_excel(writer, index=False, sheet_name='All Wards Combined')

	# 3. Municipality Total - Caste Summary with Chart
	muni_caste_totals = {}
	for wd in all_wards_data.values():
	for caste, count in wd.get('caste_data', {}).items():
	muni_caste_totals[caste] = muni_caste_totals.get(caste, 0) + count

	muni_caste_df = pd.DataFrame([
	{'S.N.': i+1, 'Caste/Ethnic Group': caste, 'Population': count,
	'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0}
	for i, (caste, count) in enumerate(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True))
	])
	muni_caste_df.to_excel(writer, index=False, sheet_name='Municipality Total')

	# Add bar chart to Municipality Total sheet
	ws_muni = writer.sheets['Municipality Total']

	# Create bar chart for top 15 castes
	chart_data_rows = min(16, len(muni_caste_df) + 1) # +1 for header
	if chart_data_rows > 2:
	bar_chart = BarChart()
	bar_chart.type = "bar"
	bar_chart.style = 10
	bar_chart.title = f"Top Caste/Ethnic Groups - {selected_municipality}"
	bar_chart.y_axis.title = "Caste/Ethnic Group"
	bar_chart.x_axis.title = "Population"

	data = Reference(ws_muni, min_col=3, min_row=1, max_row=chart_data_rows, max_col=3)
	cats = Reference(ws_muni, min_col=2, min_row=2, max_row=chart_data_rows)
	bar_chart.add_data(data, titles_from_data=True)
	bar_chart.set_categories(cats)
	bar_chart.shape = 4
	bar_chart.width = 20
	bar_chart.height = 12
	ws_muni.add_chart(bar_chart, "F2")

	# 4. Individual ward sheets with unmatched surnames detail
	# Import SURNAME_TO_CASTE for matching info
	from age_group_analysis import SURNAME_TO_CASTE

	for ward_num in sorted(all_ward_numbers):
	ward_data = all_wards_data.get(ward_num, {})
	total = ward_data.get('total_voters', 0)

	if ward_data.get('caste_data'):
	ward_rows = []
	for i, (caste, count) in enumerate(sorted(ward_data['caste_data'].items(), key=lambda x: x[1], reverse=True), 1):
	pct = round(count / total * 100, 2) if total > 0 else 0
	ward_rows.append({
	'S.N.': i,
	'Caste/Ethnic Group': caste,
	'Population': count,
	'Percentage': pct
	})

	ward_df = pd.DataFrame(ward_rows)
	sheet_name = f"Ward {ward_num}"[:31] # Excel sheet name limit
	ward_df.to_excel(writer, index=False, sheet_name=sheet_name)

	# Get the worksheet to add more content
	ws_ward = writer.sheets[sheet_name]

	# Track the next available row
	next_row = len(ward_rows) + 4 # Leave 2 empty rows after caste data

	# ===== ALL SURNAMES SECTION =====
	ward_all_surnames = ward_data.get('surname_counts', {})
	if ward_all_surnames:
	# Add section header for ALL surnames
	ws_ward.cell(row=next_row, column=1, value="📝 All Surnames (Matched & Unmatched)")

	# Add column headers
	ws_ward.cell(row=next_row + 1, column=1, value="S.N.")
	ws_ward.cell(row=next_row + 1, column=2, value="Surname")
	ws_ward.cell(row=next_row + 1, column=3, value="Count")
	ws_ward.cell(row=next_row + 1, column=4, value="Percentage")
	ws_ward.cell(row=next_row + 1, column=5, value="Matched Caste")

	# Add all surnames data sorted by count
	sorted_all_surnames = sorted(ward_all_surnames.items(), key=lambda x: x[1], reverse=True)
	for idx, (surname, count) in enumerate(sorted_all_surnames, 1):
	pct = round(count / total * 100, 2) if total > 0 else 0
	matched_caste = SURNAME_TO_CASTE.get(surname, '❌ NOT MATCHED')
	ws_ward.cell(row=next_row + 1 + idx, column=1, value=idx)
	ws_ward.cell(row=next_row + 1 + idx, column=2, value=surname)
	ws_ward.cell(row=next_row + 1 + idx, column=3, value=count)
	ws_ward.cell(row=next_row + 1 + idx, column=4, value=f"{pct}%")
	ws_ward.cell(row=next_row + 1 + idx, column=5, value=matched_caste)

	# Update next_row for unmatched section
	next_row = next_row + len(sorted_all_surnames) + 4

	# ===== UNMATCHED SURNAMES SECTION =====
	ward_unmatched = ward_data.get('unmatched_surnames', {})
	if ward_unmatched:
	# Add section header
	ws_ward.cell(row=next_row, column=1, value="❓ Unmatched Surnames Only (अन्य/अज्ञात)")

	# Add column headers
	ws_ward.cell(row=next_row + 1, column=1, value="S.N.")
	ws_ward.cell(row=next_row + 1, column=2, value="Surname")
	ws_ward.cell(row=next_row + 1, column=3, value="Count")
	ws_ward.cell(row=next_row + 1, column=4, value="Percentage")

	# Add unmatched surnames data sorted by count
	sorted_unmatched = sorted(ward_unmatched.items(), key=lambda x: x[1], reverse=True)
	for idx, (surname, count) in enumerate(sorted_unmatched, 1):
	pct = round(count / total * 100, 2) if total > 0 else 0
	ws_ward.cell(row=next_row + 1 + idx, column=1, value=idx)
	ws_ward.cell(row=next_row + 1 + idx, column=2, value=surname)
	ws_ward.cell(row=next_row + 1 + idx, column=3, value=count)
	ws_ward.cell(row=next_row + 1 + idx, column=4, value=f"{pct}%")

	# Add summary of unmatched
	total_unmatched = sum(ward_unmatched.values())
	summary_row = next_row + len(sorted_unmatched) + 3
	ws_ward.cell(row=summary_row, column=1, value="Total Unmatched:")
	ws_ward.cell(row=summary_row, column=2, value=f"{len(ward_unmatched)} surnames")
	ws_ward.cell(row=summary_row, column=3, value=total_unmatched)
	ws_ward.cell(row=summary_row, column=4, value=f"{round(total_unmatched/total*100, 2) if total > 0 else 0}%")

	# Add pie chart to each ward sheet (for top 10)
	chart_rows = min(11, len(ward_rows) + 1)
	if chart_rows > 2:
	pie_chart = PieChart()
	pie_chart.title = f"Ward {ward_num} - Caste Distribution"

	data = Reference(ws_ward, min_col=3, min_row=1, max_row=chart_rows)
	cats = Reference(ws_ward, min_col=2, min_row=2, max_row=chart_rows)
	pie_chart.add_data(data, titles_from_data=True)
	pie_chart.set_categories(cats)
	pie_chart.width = 14
	pie_chart.height = 10
	ws_ward.add_chart(pie_chart, "F2")


	# 5. Unmatched Surnames Sheet
	all_unmatched_combined = {}
	ward_unmatched_detail = []

	for ward_num in sorted(all_ward_numbers):
	ward_data = all_wards_data.get(ward_num, {})
	unmatched = ward_data.get('unmatched_surnames', {})
	for surname, count in unmatched.items():
	all_unmatched_combined[surname] = all_unmatched_combined.get(surname, 0) + count
	ward_unmatched_detail.append({
	'Ward': ward_num,
	'Unmatched Surname': surname,
	'Count': count
	})

	# Summary of unmatched surnames
	if all_unmatched_combined:
	unmatched_summary_df = pd.DataFrame([
	{'S.N.': i+1, 'Unmatched Surname': surname, 'Total Count': count,
	'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0}
	for i, (surname, count) in enumerate(sorted(all_unmatched_combined.items(), key=lambda x: x[1], reverse=True))
	])
	unmatched_summary_df.to_excel(writer, index=False, sheet_name='Unmatched Surnames')

	# Add chart for top unmatched surnames
	ws_unmatched = writer.sheets['Unmatched Surnames']
	chart_rows = min(21, len(unmatched_summary_df) + 1)
	if chart_rows > 2:
	bar_chart_unmatched = BarChart()
	bar_chart_unmatched.type = "bar"
	bar_chart_unmatched.style = 11
	bar_chart_unmatched.title = "Top Unmatched Surnames"
	bar_chart_unmatched.y_axis.title = "Surname"
	bar_chart_unmatched.x_axis.title = "Count"

	data = Reference(ws_unmatched, min_col=3, min_row=1, max_row=chart_rows, max_col=3)
	cats = Reference(ws_unmatched, min_col=2, min_row=2, max_row=chart_rows)
	bar_chart_unmatched.add_data(data, titles_from_data=True)
	bar_chart_unmatched.set_categories(cats)
	bar_chart_unmatched.width = 18
	bar_chart_unmatched.height = 12
	ws_unmatched.add_chart(bar_chart_unmatched, "F2")

	# Ward-wise unmatched detail
	if ward_unmatched_detail:
	ward_unmatched_df = pd.DataFrame(ward_unmatched_detail)
	ward_unmatched_df.to_excel(writer, index=False, sheet_name='Unmatched by Ward')

	# 6. All Surnames Sheet (matched and unmatched)
	all_surnames_combined = {}
	for ward_num in sorted(all_ward_numbers):
	ward_data = all_wards_data.get(ward_num, {})
	for surname, count in ward_data.get('surname_counts', {}).items():
	all_surnames_combined[surname] = all_surnames_combined.get(surname, 0) + count

	if all_surnames_combined:
	# Import SURNAME_TO_CASTE for matching
	from age_group_analysis import SURNAME_TO_CASTE

	all_surnames_df = pd.DataFrame([
	{
	'S.N.': i+1,
	'Surname': surname,
	'Count': count,
	'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0,
	'Matched Caste': SURNAME_TO_CASTE.get(surname, 'NOT MATCHED')
	}
	for i, (surname, count) in enumerate(sorted(all_surnames_combined.items(), key=lambda x: x[1], reverse=True))
	])
	all_surnames_df.to_excel(writer, index=False, sheet_name='All Surnames')

	# 7. Metadata sheet
	total_unmatched_voters = sum(all_unmatched_combined.values()) if all_unmatched_combined else 0
	metadata = pd.DataFrame({
	'Field': ['Province', 'District', 'Municipality', 'Total Wards', 'Total Voters',
	'Total Caste Groups', 'Total Unique Surnames', 'Unmatched Surnames Count',
	'Unmatched Voters Count', 'Match Rate (%)', 'Generated On'],
	'Value': [
	selected_province_age,
	selected_district_age,
	selected_municipality,
	len(all_ward_numbers),
	total_voters_all,
	len(all_castes),
	len(all_surnames_combined) if all_surnames_combined else 0,
	len(all_unmatched_combined),
	total_unmatched_voters,
	round((total_voters_all - total_unmatched_voters) / total_voters_all * 100, 2) if total_voters_all > 0 else 0,
	datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	]
	})
	metadata.to_excel(writer, index=False, sheet_name='Metadata')

	# Download button
	filename = f"{selected_municipality.replace(' ', '_')}_all_wards_caste_data.xlsx"
	st.download_button(
	label="📥 Download All Wards Caste Data (Excel with Charts)",
	data=buffer.getvalue(),
	file_name=filename,
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	use_container_width=True
	)

	# Show preview charts in Streamlit
	st.markdown("---")
	st.markdown("📊 Preview Charts")

	# Municipality total caste distribution chart
	muni_caste_totals = {}
	for wd in all_wards_data.values():
	for caste, count in wd.get('caste_data', {}).items():
	muni_caste_totals[caste] = muni_caste_totals.get(caste, 0) + count

	top_castes = dict(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True)[:15])

	col1, col2 = st.columns(2)
	with col1:
	fig_bar = px.bar(
	x=list(top_castes.values()),
	y=list(top_castes.keys()),
	orientation='h',
	title=f"Top 15 Caste/Ethnic Groups - {selected_municipality}",
	labels={'x': 'Population', 'y': 'Caste/Ethnic Group'}
	)
	fig_bar.update_layout(yaxis={'categoryorder': 'total ascending'}, height=500)
	st.plotly_chart(fig_bar, use_container_width=True)

	with col2:
	# Pie chart for top 10
	top_10 = dict(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True)[:10])
	fig_pie = px.pie(
	values=list(top_10.values()),
	names=list(top_10.keys()),
	title=f"Caste Distribution (Top 10) - {selected_municipality}"
	)
	fig_pie.update_layout(height=500)
	st.plotly_chart(fig_pie, use_container_width=True)

	# Show unmatched surnames preview
	if all_unmatched:
	st.markdown("❓ Top Unmatched Surnames (Preview)")
	top_unmatched = dict(sorted(all_unmatched.items(), key=lambda x: x[1], reverse=True)[:10])
	unmatched_preview_df = pd.DataFrame([
	{'Surname': s, 'Count': c} for s, c in top_unmatched.items()
	])
	st.dataframe(unmatched_preview_df, use_container_width=True, hide_index=True)

	# Also show a preview table
	st.markdown("Preview: Ward-wise Caste Distribution Summary")
	preview_df = summary_df.head(10)
	st.dataframe(preview_df, use_container_width=True)

	st.markdown("---")

	# Analyze button
	if st.button("🔍 Analyze Caste/Ethnic Distribution", type="primary", use_container_width=True):
	with st.spinner("Analyzing caste/ethnic group distribution..."):
	if municipality_id_caste:
	try:
	# Clear any previous cache to ensure fresh data
	if 'caste_analysis_data_local' in st.session_state:
	del st.session_state['caste_analysis_data_local']

	if ward_data_available and selected_wards:
	# Ward-specific analysis - convert to list of ints
	ward_list = [int(w) for w in selected_wards]
	st.info(f"Querying municipality {municipality_id_caste} with wards {ward_list}")
	caste_data, total_analyzed, surname_counts, unmatched_surnames = get_ward_level_caste_data(municipality_id_caste, ward_list)
	else:
	# Full municipality analysis
	st.info(f"Querying entire municipality {municipality_id_caste}")
	caste_data, total_analyzed, surname_counts, unmatched_surnames = get_caste_data_for_municipality(municipality_id_caste)

	if caste_data and total_analyzed > 0:
	# Store in session state
	st.session_state['caste_analysis_data_local'] = caste_data
	st.session_state['caste_analysis_total_local'] = total_analyzed
	st.session_state['caste_analysis_scope_local'] = analysis_level
	st.session_state['surname_counts_local'] = surname_counts
	st.session_state['unmatched_surnames_local'] = unmatched_surnames
	st.success(f"✅ Analysis complete! Found {total_analyzed:,} voters across {len(caste_data)} caste/ethnic groups")
	st.rerun()
	else:
	st.warning(f"⚠️ Query returned {total_analyzed} voters and {len(caste_data) if caste_data else 0} caste groups.")
	except Exception as e:
	st.error(f"Error during analysis: {str(e)}")
	import traceback
	st.code(traceback.format_exc())
	else:
	st.error(f"❌ Could not identify municipality ID for: {selected_municipality}")

	# Display caste analysis results if available
	if 'caste_analysis_data_local' in st.session_state and st.session_state['caste_analysis_data_local']:
	caste_data = st.session_state['caste_analysis_data_local']
	total_analyzed = st.session_state['caste_analysis_total_local']
	analysis_scope_caste = st.session_state.get('caste_analysis_scope_local', 'Selected Area')

	st.markdown("---")
	st.markdown(f"### 📊 Caste/Ethnic Group Distribution Results")
	st.info(f"Analysis Scope: {analysis_scope_caste} \| Total Voters Analyzed: {total_analyzed:,}")

	# Convert to DataFrame for visualization
	caste_df = pd.DataFrame([
	{'Caste/Ethnic Group': caste, 'Population': count, 'Percentage': round(count/total_analyzed*100, 2)}
	for caste, count in sorted(caste_data.items(), key=lambda x: x[1], reverse=True)
	])

	# Display summary metrics
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Groups", len(caste_data))
	with col2:
	largest_group = caste_df.iloc[0]
	group_name = largest_group['Caste/Ethnic Group'].split('(')[0].strip()
	st.metric("Largest Group", group_name[:20] + "..." if len(group_name) > 20 else group_name)
	with col3:
	st.metric("Population", f"{largest_group['Population']:,}")
	with col4:
	st.metric("Share", f"{largest_group['Percentage']:.1f}%")

	# Visualizations in tabs
	tab1, tab2, tab3, tab4, tab5 = st.tabs(["📊 Bar Chart", "🥧 Pie Chart", "📋 Data Table", "📝 All Surnames", "❓ Unmatched Surnames"])

	with tab1:
	# Bar chart - top groups
	top_n = min(20, len(caste_df))
	fig = px.bar(
	caste_df.head(top_n),
	x='Population',
	y='Caste/Ethnic Group',
	orientation='h',
	title=f"Top {top_n} Caste/Ethnic Groups by Population",
	color='Population',
	color_continuous_scale='Viridis',
	text='Population'
	)
	fig.update_traces(texttemplate='%{text:,}', textposition='outside')
	fig.update_layout(
	yaxis={'categoryorder':'total ascending'},
	height=max(400, top_n * 25),
	showlegend=False
	)
	st.plotly_chart(fig, use_container_width=True)

	with tab2:
	# Pie chart - top 10 + others
	if len(caste_df) > 10:
	top_10 = caste_df.head(10).copy()
	others_count = caste_df.iloc[10:]['Population'].sum()
	others_row = pd.DataFrame([{
	'Caste/Ethnic Group': 'Others (Combined)',
	'Population': others_count,
	'Percentage': round(others_count/total_analyzed*100, 2)
	}])
	pie_data = pd.concat([top_10, others_row], ignore_index=True)
	else:
	pie_data = caste_df

	fig = px.pie(
	pie_data,
	values='Population',
	names='Caste/Ethnic Group',
	title="Caste/Ethnic Group Distribution",
	color_discrete_sequence=px.colors.qualitative.Set3
	)
	fig.update_traces(textposition='inside', textinfo='percent+label')
	fig.update_layout(height=600)
	st.plotly_chart(fig, use_container_width=True)

	with tab3:
	# Detailed table with serial numbers
	caste_df_display = caste_df.copy()
	caste_df_display.insert(0, 'S.N.', range(1, len(caste_df_display) + 1))

	st.markdown(f"Complete List - Serially Ordered by Population")
	st.dataframe(
	caste_df_display.style.format({
	'Population': '{:,}',
	'Percentage': '{:.2f}%'
	}).background_gradient(subset=['Population'], cmap='YlOrRd'),
	use_container_width=True,
	height=500
	)

	# Download buttons
	col1, col2 = st.columns(2)
	with col1:
	csv = caste_df_display.to_csv(index=False, encoding='utf-8-sig')
	st.download_button(
	label="📥 Download as CSV",
	data=csv,
	file_name=f"caste_distribution_{selected_municipality.replace(' ', '_')}.csv",
	mime="text/csv",
	use_container_width=True
	)
	with col2:
	from io import BytesIO
	buffer = BytesIO()
	with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
	caste_df_display.to_excel(writer, index=False, sheet_name='Caste Distribution')
	st.download_button(
	label="📊 Download as Excel",
	data=buffer.getvalue(),
	file_name=f"caste_distribution_{selected_municipality.replace(' ', '_')}.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	use_container_width=True
	)

	with tab4:
	# All surnames found
	st.markdown("All Surnames Found in the Data")
	st.info("This shows every surname detected, sorted by frequency")

	if 'surname_counts_local' in st.session_state:
	surname_counts = st.session_state['surname_counts_local']
	surname_df = pd.DataFrame([
	{'S.N.': i+1, 'Surname': surname, 'Count': count, 'Percentage': round(count/total_analyzed*100, 2)}
	for i, (surname, count) in enumerate(sorted(surname_counts.items(), key=lambda x: x[1], reverse=True))
	])

	st.metric("Total Unique Surnames", len(surname_df))

	st.dataframe(
	surname_df.style.format({
	'Count': '{:,}',
	'Percentage': '{:.2f}%'
	}).background_gradient(subset=['Count'], cmap='Blues'),
	use_container_width=True,
	height=500
	)

	# Download buttons
	col1, col2 = st.columns(2)
	with col1:
	csv_surnames = surname_df.to_csv(index=False, encoding='utf-8-sig')
	st.download_button(
	label="📥 Download as CSV",
	data=csv_surnames,
	file_name=f"all_surnames_{selected_municipality.replace(' ', '_')}.csv",
	mime="text/csv",
	use_container_width=True
	)
	with col2:
	from io import BytesIO
	buffer = BytesIO()
	with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
	surname_df.to_excel(writer, index=False, sheet_name='All Surnames')
	st.download_button(
	label="📊 Download as Excel",
	data=buffer.getvalue(),
	file_name=f"all_surnames_{selected_municipality.replace(' ', '_')}.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	use_container_width=True
	)

	with tab5:
	# Unmatched surnames
	st.markdown("Surnames NOT in Caste Database")
	st.warning("These surnames were found but don't match any caste/ethnic group in the database")

	if 'unmatched_surnames_local' in st.session_state:
	unmatched = st.session_state['unmatched_surnames_local']

	if unmatched:
	unmatched_df = pd.DataFrame([
	{'S.N.': i+1, 'Surname': surname, 'Count': count, 'Percentage': round(count/total_analyzed*100, 2)}
	for i, (surname, count) in enumerate(sorted(unmatched.items(), key=lambda x: x[1], reverse=True))
	])

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Unmatched Surnames", len(unmatched_df))
	with col2:
	total_unmatched = unmatched_df['Count'].sum()
	st.metric("Total Voters", f"{total_unmatched:,}")
	with col3:
	unmatched_pct = (total_unmatched / total_analyzed * 100)
	st.metric("% of Total", f"{unmatched_pct:.1f}%")

	st.markdown("Top 50 Unmatched Surnames")
	st.dataframe(
	unmatched_df.head(50).style.format({
	'Count': '{:,}',
	'Percentage': '{:.2f}%'
	}).background_gradient(subset=['Count'], cmap='Oranges'),
	use_container_width=True,
	height=500
	)

	# Download buttons
	col1, col2 = st.columns(2)
	with col1:
	csv_unmatched = unmatched_df.to_csv(index=False, encoding='utf-8-sig')
	st.download_button(
	label="📥 Download as CSV",
	data=csv_unmatched,
	file_name=f"unmatched_surnames_{selected_municipality.replace(' ', '_')}.csv",
	mime="text/csv",
	use_container_width=True
	)
	with col2:
	from io import BytesIO
	buffer = BytesIO()
	with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
	unmatched_df.to_excel(writer, index=False, sheet_name='Unmatched Surnames')
	st.download_button(
	label="📊 Download as Excel",
	data=buffer.getvalue(),
	file_name=f"unmatched_surnames_{selected_municipality.replace(' ', '_')}.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	use_container_width=True
	)
	else:
	st.success("✅ All surnames matched successfully!")

	st.markdown("---")
	st.subheader("📊 Age Group Analysis")

	if not muni_new.empty:
	# Calculate metrics
	total_new = muni_new['count'].sum()
	total_old = muni_old['count'].sum()
	change = total_new - total_old
	change_pct = (change / total_old * 100) if total_old > 0 else 0

	# Metrics
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Total Voters (Current)", f"{total_new:,}", f"{change:+,}")

	with col2:
	male_new = muni_new[muni_new['gender'] == 'पुरुष']['count'].sum()
	st.metric("Male Voters", f"{male_new:,}")

	with col3:
	female_new = muni_new[muni_new['gender'] == 'महिला']['count'].sum()
	st.metric("Female Voters", f"{female_new:,}")

	with col4:
	st.metric("Growth Rate", f"{change_pct:+.2f}%")

	# Visualizations
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Age Distribution (Current)")

	# Group by age
	age_grouped = muni_new.groupby('age_group')['count'].sum().reset_index()

	title_text = f"Age Distribution in {selected_municipality}"
	if ward_data_available and selected_wards:
	if len(selected_wards) == 1:
	title_text = f"Age Distribution - Ward {selected_wards[0]}"
	else:
	title_text = f"Age Distribution - {len(selected_wards)} Wards"

	fig = px.pie(
	age_grouped,
	values='count',
	names='age_group',
	title=title_text,
	color_discrete_sequence=px.colors.qualitative.Set3
	)
	fig.update_traces(textposition='inside', textinfo='percent+label')
	fig.update_layout(height=350)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.markdown("Gender Distribution by Age")

	gender_data = muni_new[muni_new['gender'].isin(['पुरुष', 'महिला'])]
	gender_grouped = gender_data.groupby(['age_group', 'gender'])['count'].sum().reset_index()

	title_text_gender = f"Gender by Age in {selected_municipality}"
	if ward_data_available and selected_wards:
	if len(selected_wards) == 1:
	title_text_gender = f"Gender by Age - Ward {selected_wards[0]}"
	else:
	title_text_gender = f"Gender by Age - {len(selected_wards)} Wards"

	fig = px.bar(
	gender_grouped,
	x='age_group',
	y='count',
	color='gender',
	title=title_text_gender,
	color_discrete_map={'पुरुष': '#3B82F6', 'महिला': '#EC4899'}
	)
	fig.update_layout(yaxis_tickformat=',', height=350)
	st.plotly_chart(fig, use_container_width=True)

	# Comparison with previous election
	st.markdown("📊 Age Group Comparison: Previous vs Current")

	col1, col2 = st.columns(2)

	with col1:
	# Old vs New comparison
	age_old = muni_old.groupby('age_group')['count'].sum().reset_index()
	age_old.columns = ['age_group', 'old_count']
	age_new = muni_new.groupby('age_group')['count'].sum().reset_index()
	age_new.columns = ['age_group', 'new_count']

	age_comparison = pd.merge(age_old, age_new, on='age_group', how='outer').fillna(0)
	age_comparison['change'] = age_comparison['new_count'] - age_comparison['old_count']
	age_comparison['change_pct'] = (age_comparison['change'] / age_comparison['old_count'] * 100).round(2)
	age_comparison['change_pct'] = age_comparison['change_pct'].replace([np.inf, -np.inf], 0)

	fig = px.bar(
	age_comparison,
	x='age_group',
	y=['old_count', 'new_count'],
	barmode='group',
	title="Voter Count Comparison",
	labels={'value': 'Voters', 'variable': 'Period'},
	color_discrete_map={'old_count': '#F59E0B', 'new_count': '#10B981'}
	)
	fig.update_layout(yaxis_tickformat=',', height=350)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Growth rate by age group
	fig = px.bar(
	age_comparison,
	x='age_group',
	y='change_pct',
	title="Growth Rate by Age Group (%)",
	color='change_pct',
	color_continuous_scale='RdYlGn',
	labels={'age_group': 'Age Group', 'change_pct': 'Growth (%)'},
	color_continuous_midpoint=0
	)
	fig.update_layout(height=350)
	st.plotly_chart(fig, use_container_width=True)

	# Comparison table
	st.markdown("Age Group Statistics Table")
	age_comparison_display = age_comparison[['age_group', 'old_count', 'new_count', 'change', 'change_pct']].copy()
	age_comparison_display.columns = ['Age Group', 'Previous', 'Current', 'Change', 'Growth %']
	st.dataframe(
	age_comparison_display.style.format({
	'Previous': '{:,.0f}',
	'Current': '{:,.0f}',
	'Change': '{:+,.0f}',
	'Growth %': '{:+.2f}'
	}),
	use_container_width=True
	)

	# Ward-by-ward comparison if multiple wards selected
	if ward_data_available and selected_wards and len(selected_wards) > 1:
	st.markdown("---")
	st.markdown("📊 Ward-by-Ward Comparison")

	# Prepare ward comparison data
	ward_comparison_list = []
	for ward_num in selected_wards:
	ward_data_new = ward_new_filtered[ward_new_filtered['ward_number'] == ward_num]
	ward_data_old = ward_old_filtered[ward_old_filtered['ward_number'] == ward_num]

	total_new_ward = ward_data_new['count'].sum()
	total_old_ward = ward_data_old['count'].sum()
	change_ward = total_new_ward - total_old_ward
	change_pct_ward = (change_ward / total_old_ward * 100) if total_old_ward > 0 else 0

	male_ward = ward_data_new[ward_data_new['gender'] == 'पुरुष']['count'].sum()
	female_ward = ward_data_new[ward_data_new['gender'] == 'महिला']['count'].sum()

	ward_comparison_list.append({
	'Ward': f"Ward {ward_num}",
	'Previous': total_old_ward,
	'Current': total_new_ward,
	'Change': change_ward,
	'Growth %': change_pct_ward,
	'Male': male_ward,
	'Female': female_ward
	})

	ward_comparison_df = pd.DataFrame(ward_comparison_list)

	col1, col2 = st.columns(2)

	with col1:
	# Ward comparison bar chart
	fig = px.bar(
	ward_comparison_df,
	x='Ward',
	y='Current',
	title="Current Voters by Ward",
	color='Current',
	color_continuous_scale='Viridis'
	)
	fig.update_layout(yaxis_tickformat=',', showlegend=False)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Growth rate comparison
	fig = px.bar(
	ward_comparison_df,
	x='Ward',
	y='Growth %',
	title="Growth Rate by Ward",
	color='Growth %',
	color_continuous_scale='RdYlGn',
	color_continuous_midpoint=0
	)
	st.plotly_chart(fig, use_container_width=True)

	# Ward comparison table
	st.dataframe(
	ward_comparison_df.style.format({
	'Previous': '{:,.0f}',
	'Current': '{:,.0f}',
	'Change': '{:+,.0f}',
	'Growth %': '{:+.2f}%',
	'Male': '{:,.0f}',
	'Female': '{:,.0f}'
	}).background_gradient(subset=['Growth %'], cmap='RdYlGn'),
	use_container_width=True
	)
	else:
	st.warning(f"No age group data available for {selected_municipality}")


	def show_comparative_study(old_province, new_province, old_district, new_district,
	old_municipality, new_municipality, old_age, new_age):
	"""Comprehensive comparative study."""
	st.header("📈 Comparative Study: Old vs Current Voter Data")

	# Province comparison
	prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')

	# Summary statistics
	st.subheader("📊 Summary Statistics")

	col1, col2, col3 = st.columns(3)

	old_total = old_province['total_voters'].sum()
	new_total = new_province['total_voters'].sum()

	with col1:
	st.markdown("### Previous Election")
	st.write(f"Total Voters: {old_total:,}")
	st.write(f"Male: {old_province['male_voters'].sum():,}")
	st.write(f"Female: {old_province['female_voters'].sum():,}")
	st.write(f"Avg Age: {old_province['avg_age'].mean():.1f}")

	with col2:
	st.markdown("### Current Election")
	st.write(f"Total Voters: {new_total:,}")
	st.write(f"Male: {new_province['male_voters'].sum():,}")
	st.write(f"Female: {new_province['female_voters'].sum():,}")
	st.write(f"Avg Age: {new_province['avg_age'].mean():.1f}")

	with col3:
	st.markdown("### Change Analysis")
	change = new_total - old_total
	change_pct = (change / old_total) * 100
	st.write(f"Net Change: {change:+,}")
	st.write(f"Growth Rate: {change_pct:+.2f}%")
	st.write(f"Male Change: {new_province['male_voters'].sum() - old_province['male_voters'].sum():+,}")
	st.write(f"Female Change: {new_province['female_voters'].sum() - old_province['female_voters'].sum():+,}")

	st.markdown("---")

	# Visual comparisons
	st.subheader("📉 Visual Comparison Analysis")

	tab1, tab2, tab3, tab4 = st.tabs(["Province Level", "Gender Analysis", "Age Analysis", "Growth Trends"])

	with tab1:
	col1, col2 = st.columns(2)

	with col1:
	# Side by side comparison
	fig = make_subplots(rows=1, cols=2, subplot_titles=("Previous Election", "Current Election"),
	specs=[[{"type": "pie"}, {"type": "pie"}]])

	fig.add_trace(
	go.Pie(labels=old_province['province_name'], values=old_province['total_voters'], name="Previous"),
	row=1, col=1
	)
	fig.add_trace(
	go.Pie(labels=new_province['province_name'], values=new_province['total_voters'], name="Current"),
	row=1, col=2
	)
	fig.update_layout(title_text="Province Distribution Comparison")
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Growth by province
	fig = px.bar(
	prov_comparison,
	x='province_name',
	y=['total_voters_old', 'total_voters_new'],
	barmode='group',
	title="Province-wise Voter Count Comparison",
	labels={'value': 'Voters', 'variable': 'Period'}
	)
	fig.update_layout(yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	with tab2:
	col1, col2 = st.columns(2)

	with col1:
	# Gender ratio comparison
	gender_comparison = pd.DataFrame({
	'Province': prov_comparison['province_name'].tolist() * 2,
	'Gender': ['Male'] * len(prov_comparison) + ['Female'] * len(prov_comparison),
	'Previous': prov_comparison['male_voters_old'].tolist() + prov_comparison['female_voters_old'].tolist(),
	'Current': prov_comparison['male_voters_new'].tolist() + prov_comparison['female_voters_new'].tolist()
	})

	fig = px.sunburst(
	gender_comparison,
	path=['Gender', 'Province'],
	values='Current',
	title="Gender Distribution by Province (Current)"
	)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Gender ratio change
	prov_comparison['male_ratio_old'] = prov_comparison['male_voters_old'] / prov_comparison['total_voters_old'] * 100
	prov_comparison['male_ratio_new'] = prov_comparison['male_voters_new'] / prov_comparison['total_voters_new'] * 100
	prov_comparison['ratio_change'] = prov_comparison['male_ratio_new'] - prov_comparison['male_ratio_old']

	fig = px.bar(
	prov_comparison,
	x='province_name',
	y='ratio_change',
	color='ratio_change',
	color_continuous_scale='RdYlGn',
	title="Male Ratio Change by Province (%)"
	)
	st.plotly_chart(fig, use_container_width=True)

	with tab3:
	col1, col2 = st.columns(2)

	with col1:
	# Age group comparison
	old_age_grouped = old_age.groupby('age_group')['count'].sum().reset_index()
	old_age_grouped['period'] = 'Previous'
	new_age_grouped = new_age.groupby('age_group')['count'].sum().reset_index()
	new_age_grouped['period'] = 'Current'

	age_combined = pd.concat([old_age_grouped, new_age_grouped])

	fig = px.bar(
	age_combined,
	x='age_group',
	y='count',
	color='period',
	barmode='group',
	title="Age Distribution Comparison",
	color_discrete_map={'Previous': '#3B82F6', 'Current': '#10B981'}
	)
	fig.update_layout(yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Age change analysis
	age_change = pd.merge(
	old_age_grouped[['age_group', 'count']],
	new_age_grouped[['age_group', 'count']],
	on='age_group',
	suffixes=('_old', '_new')
	)
	age_change['change'] = age_change['count_new'] - age_change['count_old']
	age_change['change_pct'] = (age_change['change'] / age_change['count_old'] * 100).round(2)

	fig = px.bar(
	age_change,
	x='age_group',
	y='change_pct',
	color='change_pct',
	color_continuous_scale='RdYlGn',
	title="Voter Change by Age Group (%)"
	)
	st.plotly_chart(fig, use_container_width=True)

	with tab4:
	# Growth analysis
	col1, col2 = st.columns(2)

	with col1:
	fig = px.scatter(
	prov_comparison,
	x='total_voters_old',
	y='total_voters_new',
	size='voter_change',
	color='voter_change_pct',
	hover_name='province_name',
	title="Voter Growth Scatter Plot",
	labels={'total_voters_old': 'Previous Voters', 'total_voters_new': 'Current Voters'},
	color_continuous_scale='RdYlGn'
	)
	# Add diagonal line (no change line)
	max_val = max(prov_comparison['total_voters_new'].max(), prov_comparison['total_voters_old'].max())
	fig.add_trace(go.Scatter(x=[0, max_val], y=[0, max_val], mode='lines',
	name='No Change Line', line=dict(dash='dash', color='gray')))
	fig.update_layout(xaxis_tickformat=',', yaxis_tickformat=',')
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Growth rate ranking
	sorted_growth = prov_comparison.sort_values('voter_change_pct', ascending=True)

	fig = px.bar(
	sorted_growth,
	y='province_name',
	x='voter_change_pct',
	orientation='h',
	color='voter_change_pct',
	color_continuous_scale='RdYlGn',
	title="Growth Rate by Province (%)"
	)
	st.plotly_chart(fig, use_container_width=True)


	def show_detailed_reports(old_province, new_province, old_district, new_district,
	old_municipality, new_municipality, old_overall, new_overall):
	"""Generate detailed reports."""
	st.header("📋 Detailed Reports & Export")

	report_type = st.selectbox(
	"Select Report Type",
	["Executive Summary", "Province Report", "District Report", "Full Data Export"]
	)

	if report_type == "Executive Summary":
	st.subheader("📑 Executive Summary Report")

	old_total = int(old_overall['total_voters'].iloc[0])
	new_total = int(new_overall['total_voters'].iloc[0])
	change = new_total - old_total
	change_pct = (change / old_total) * 100

	st.markdown(f"""
	## Nepal Election Voter Data Analysis Report
	### Executive Summary

	Report Date: January 11, 2026

	---

	### Key Findings

	1. Total Voter Base Growth
	- Previous Election: {old_total:,} voters
	- Current Election: {new_total:,} voters
	- Net Increase: {change:+,} voters ({change_pct:+.2f}%)

	2. Gender Distribution
	- Male Voters (Current): {int(new_overall['male_voters'].iloc[0]):,}
	- Female Voters (Current): {int(new_overall['female_voters'].iloc[0]):,}
	- Gender Ratio: {(new_overall['male_voters'].iloc[0]/new_overall['female_voters'].iloc[0]):.2f} M:F

	3. Age Demographics
	- Average Voter Age: {new_overall['avg_age'].iloc[0]:.1f} years
	- Age Range: {int(new_overall['min_age'].iloc[0])} - {int(new_overall['max_age'].iloc[0])} years

	4. Administrative Coverage
	- Provinces: 7
	- Districts: 77
	- Local Levels: 753

	---

	### Provincial Highlights

	\| Province \| Previous Voters \| Current Voters \| Change \| Growth % \|
	\|----------\|-----------------\|----------------\|--------\|----------\|
	""")

	prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')
	for _, row in prov_comparison.iterrows():
	st.markdown(f"\| {row['province_name']} \| {int(row['total_voters_old']):,} \| {int(row['total_voters_new']):,} \| {int(row['voter_change']):+,} \| {row['voter_change_pct']:+.2f}% \|")

	st.markdown("""
	---

	### Conclusions

	The voter registration data shows a healthy growth in the electoral base, with all provinces
	showing positive growth. This indicates successful voter registration drives and natural
	population growth reaching voting age.

	The gender balance has slightly improved with female participation showing steady growth.
	The average age of voters remains stable, suggesting a balanced demographic distribution.
	""")

	elif report_type == "Province Report":
	st.subheader("📊 Province-wise Detailed Report")

	prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')

	st.dataframe(
	prov_comparison.style.format({
	'total_voters_old': '{:,.0f}',
	'total_voters_new': '{:,.0f}',
	'voter_change': '{:+,.0f}',
	'voter_change_pct': '{:+.2f}%',
	'male_voters_old': '{:,.0f}',
	'male_voters_new': '{:,.0f}',
	'female_voters_old': '{:,.0f}',
	'female_voters_new': '{:,.0f}'
	}),
	use_container_width=True
	)

	# Download button
	csv = prov_comparison.to_csv(index=False)
	st.download_button(
	label="📥 Download Province Report (CSV)",
	data=csv,
	file_name="province_voter_report.csv",
	mime="text/csv"
	)

	elif report_type == "District Report":
	st.subheader("🏛️ District-wise Detailed Report")

	dist_comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name')

	st.dataframe(
	dist_comparison.style.format({
	'total_voters_old': '{:,.0f}',
	'total_voters_new': '{:,.0f}',
	'voter_change': '{:+,.0f}',
	'voter_change_pct': '{:+.2f}%'
	}),
	use_container_width=True,
	height=500
	)

	csv = dist_comparison.to_csv(index=False)
	st.download_button(
	label="📥 Download District Report (CSV)",
	data=csv,
	file_name="district_voter_report.csv",
	mime="text/csv"
	)

	elif report_type == "Full Data Export":
	st.subheader("📦 Full Data Export")

	col1, col2, col3 = st.columns(3)

	with col1:
	prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')
	st.download_button(
	label="📥 Province Data",
	data=prov_comparison.to_csv(index=False),
	file_name="province_data.csv",
	mime="text/csv"
	)

	with col2:
	dist_comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name')
	st.download_button(
	label="📥 District Data",
	data=dist_comparison.to_csv(index=False),
	file_name="district_data.csv",
	mime="text/csv"
	)

	with col3:
	muni_comparison = create_comparative_dataframe(old_municipality, new_municipality, 'municipality_id', 'municipality_name')
	st.download_button(
	label="📥 Local Level Data",
	data=muni_comparison.to_csv(index=False),
	file_name="local_level_data.csv",
	mime="text/csv"
	)


	if __name__ == "__main__":
	app_entry()