electionnepal / app.py
rockerritesh's picture
feat: add constituency winning threshold analysis page
fa2b444 unverified
"""
Nepal Election Analysis - GIS Visualization & Comparative Voter Study
=====================================================================
A comprehensive application for analyzing voter data changes between two election periods
with interactive GIS maps and statistical visualizations.
Author: Data Analysis Project
Date: January 2026
"""
import streamlit as st
import pandas as pd
import numpy as np
import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sqlite3
import json
import folium
from streamlit_folium import st_folium
import os
from io import BytesIO
from datetime import datetime
# Page configuration
st.set_page_config(
page_title="Nepal Election Analysis",
page_icon="🗳️",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
color: #1E3A8A;
text-align: center;
margin-bottom: 1rem;
}
.sub-header {
font-size: 1.5rem;
color: #374151;
text-align: center;
margin-bottom: 2rem;
}
.metric-card {
background-color: #F3F4F6;
padding: 1rem;
border-radius: 0.5rem;
text-align: center;
}
.positive-change {
color: #059669;
font-weight: bold;
}
.negative-change {
color: #DC2626;
font-weight: bold;
}
.stTabs [data-baseweb="tab-list"] {
gap: 24px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
padding-left: 20px;
padding-right: 20px;
}
</style>
""", unsafe_allow_html=True)
# ── Login & User Tracking ────────────────────────────────────────────────────
LOGIN_TRACKING_FILE = "data/login_tracking.json"
def init_login_tracking_file():
"""Ensure the login tracking JSON file exists."""
os.makedirs(os.path.dirname(LOGIN_TRACKING_FILE), exist_ok=True)
if not os.path.exists(LOGIN_TRACKING_FILE):
with open(LOGIN_TRACKING_FILE, "w") as f:
json.dump([], f)
def record_login(name: str, location: str):
"""Append a login record to the tracking file."""
init_login_tracking_file()
with open(LOGIN_TRACKING_FILE, "r") as f:
records = json.load(f)
records.append({
"name": name,
"location": location,
"login_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
})
with open(LOGIN_TRACKING_FILE, "w") as f:
json.dump(records, f, indent=2)
def load_login_records():
"""Read all login records."""
init_login_tracking_file()
with open(LOGIN_TRACKING_FILE, "r") as f:
return json.load(f)
def show_login_page():
"""Render the login form and validate credentials."""
st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">Please log in to continue</p>', unsafe_allow_html=True)
with st.form("login_form"):
username = st.text_input("Username")
password = st.text_input("Password", type="password")
submitted = st.form_submit_button("Login")
if submitted:
try:
admin_pw = st.secrets["ADMIN_PASSWORD"]
except Exception:
admin_pw = os.environ.get("ADMIN_PASSWORD", "")
try:
user_pw = st.secrets["USER_PASSWORD"]
except Exception:
user_pw = os.environ.get("USER_PASSWORD", "")
if username == "limsim" and password == admin_pw:
st.session_state["authenticated"] = True
st.session_state["username"] = "limsim"
st.session_state["is_admin"] = True
st.session_state["needs_user_info"] = False
st.session_state["user_display_name"] = "Admin (limsim)"
st.session_state["user_location"] = ""
st.session_state["login_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
record_login("Admin (limsim)", "—")
st.rerun()
elif username == "user_nepal" and password == user_pw:
st.session_state["authenticated"] = True
st.session_state["username"] = "user_nepal"
st.session_state["is_admin"] = False
st.session_state["needs_user_info"] = True
st.rerun()
else:
st.error("Invalid username or password.")
def show_user_info_page():
"""Collect name and location from user_nepal before granting full access."""
st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">Tell us about yourself</p>', unsafe_allow_html=True)
with st.form("user_info_form"):
name = st.text_input("Your Name")
location = st.text_input("Your Location")
submitted = st.form_submit_button("Continue")
if submitted:
if not name.strip() or not location.strip():
st.warning("Please fill in both fields.")
else:
st.session_state["needs_user_info"] = False
st.session_state["user_display_name"] = name.strip()
st.session_state["user_location"] = location.strip()
st.session_state["login_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
record_login(name.strip(), location.strip())
st.rerun()
def show_admin_dashboard():
"""Admin-only dashboard showing login metrics and records."""
st.header("🔐 Admin: Login Records")
records = load_login_records()
if not records:
st.info("No login records yet.")
return
df = pd.DataFrame(records)
col1, col2 = st.columns(2)
with col1:
st.metric("Total Logins", len(df))
with col2:
st.metric("Unique Users", df["name"].nunique())
st.subheader("Login History")
st.dataframe(
df.sort_values("login_time", ascending=False).reset_index(drop=True),
use_container_width=True,
)
csv_data = df.to_csv(index=False)
st.download_button(
label="📥 Download CSV",
data=csv_data,
file_name="login_records.csv",
mime="text/csv",
)
def app_entry():
"""Gate: unauthenticated → login, needs info → info page, else → main()."""
if not st.session_state.get("authenticated", False):
show_login_page()
return
if st.session_state.get("needs_user_info", False):
show_user_info_page()
return
main()
@st.cache_data
def load_shapefiles():
"""Load all Nepal shapefiles for provinces, districts, and local levels."""
base_path = "data/gis/raw/NPL_new_wgs"
# Load country boundary
country_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_0.shp")
# Load provinces (7 provinces)
provinces_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_1.shp")
# Load districts (77 districts)
districts_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_2.shp")
# Load local levels (municipalities)
local_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_3.shp")
return country_gdf, provinces_gdf, districts_gdf, local_gdf
@st.cache_data
def load_json_data():
"""Load administrative data from JSON files."""
base_path = "data/admin/web_data"
with open(f"{base_path}/provinces.json", "r", encoding="utf-8") as f:
provinces_data = json.load(f)
with open(f"{base_path}/districts.json", "r", encoding="utf-8") as f:
districts_data = json.load(f)
with open(f"{base_path}/municipalities.json", "r", encoding="utf-8") as f:
municipalities_data = json.load(f)
return provinces_data, districts_data, municipalities_data
@st.cache_data
def load_cached_stats():
"""Load pre-computed statistics from JSON files for instant loading."""
with open("data/cached_stats/old_stats.json", "r", encoding="utf-8") as f:
old_stats = json.load(f)
with open("data/cached_stats/new_stats.json", "r", encoding="utf-8") as f:
new_stats = json.load(f)
# Convert to DataFrames
old_province = pd.DataFrame(old_stats['provinces'])
old_district = pd.DataFrame(old_stats['districts'])
old_municipality = pd.DataFrame(old_stats['municipalities'])
old_overall = pd.DataFrame([old_stats['overall']])
old_age = pd.DataFrame(old_stats['age_distribution'])
new_province = pd.DataFrame(new_stats['provinces'])
new_district = pd.DataFrame(new_stats['districts'])
new_municipality = pd.DataFrame(new_stats['municipalities'])
new_overall = pd.DataFrame([new_stats['overall']])
new_age = pd.DataFrame(new_stats['age_distribution'])
return (old_province, old_district, old_municipality, old_overall, old_age,
new_province, new_district, new_municipality, new_overall, new_age)
@st.cache_data
def get_database_stats_fast(db_path, provinces_json, districts_json, municipalities_json):
"""Get statistics using pre-aggregated JSON data for speed."""
# Convert JSON data to DataFrames (already aggregated - no need for heavy DB queries)
province_stats = pd.DataFrame([
{
'province_id': int(k),
'province_name': v['name'],
'total_voters': v['total_voters'],
'male_voters': v['male'],
'female_voters': v['female'],
'avg_age': v['avg_age'],
'districts_count': v['districts_count'],
'municipalities_count': v['municipalities_count']
}
for k, v in provinces_json.items()
])
district_stats = pd.DataFrame([
{
'district_id': int(k),
'district_name': v['name'],
'province_id': v['province_id'],
'total_voters': v['total_voters'],
'male_voters': v['male'],
'female_voters': v['female'],
'avg_age': v['avg_age'],
'municipalities_count': v['municipalities_count']
}
for k, v in districts_json.items()
])
# Add province names to districts
prov_name_map = {int(k): v['name'] for k, v in provinces_json.items()}
district_stats['province_name'] = district_stats['province_id'].map(prov_name_map)
municipality_stats = pd.DataFrame([
{
'municipality_id': int(k),
'municipality_name': v['name'],
'district_id': v['district_id'],
'total_voters': v['total_voters'],
'male_voters': v['male'],
'female_voters': v['female'],
'avg_age': v['avg_age']
}
for k, v in municipalities_json.items()
])
# Add district names to municipalities
dist_name_map = {int(k): v['name'] for k, v in districts_json.items()}
dist_prov_map = {int(k): v['province_id'] for k, v in districts_json.items()}
municipality_stats['district_name'] = municipality_stats['district_id'].map(dist_name_map)
municipality_stats['province_id'] = municipality_stats['district_id'].map(dist_prov_map)
# Calculate overall stats from province data
overall_stats = pd.DataFrame([{
'total_voters': province_stats['total_voters'].sum(),
'male_voters': province_stats['male_voters'].sum(),
'female_voters': province_stats['female_voters'].sum(),
'avg_age': province_stats['avg_age'].mean(),
'min_age': 18,
'max_age': 105
}])
# Age distribution - estimate from avg_age (or use simple query if needed)
# Using estimated distribution based on typical voter demographics
total = overall_stats['total_voters'].iloc[0]
age_distribution = pd.DataFrame([
{'age_group': '18-25', 'count': int(total * 0.15), 'gender': 'पुरुष'},
{'age_group': '18-25', 'count': int(total * 0.14), 'gender': 'महिला'},
{'age_group': '26-35', 'count': int(total * 0.18), 'gender': 'पुरुष'},
{'age_group': '26-35', 'count': int(total * 0.17), 'gender': 'महिला'},
{'age_group': '36-45', 'count': int(total * 0.14), 'gender': 'पुरुष'},
{'age_group': '36-45', 'count': int(total * 0.13), 'gender': 'महिला'},
{'age_group': '46-55', 'count': int(total * 0.10), 'gender': 'पुरुष'},
{'age_group': '46-55', 'count': int(total * 0.09), 'gender': 'महिला'},
{'age_group': '56-65', 'count': int(total * 0.06), 'gender': 'पुरुष'},
{'age_group': '56-65', 'count': int(total * 0.05), 'gender': 'महिला'},
{'age_group': '65+', 'count': int(total * 0.04), 'gender': 'पुरुष'},
{'age_group': '65+', 'count': int(total * 0.03), 'gender': 'महिला'},
])
return province_stats, district_stats, municipality_stats, overall_stats, age_distribution
@st.cache_data
def get_age_distribution_from_db(db_path):
"""Get actual age distribution from database - separate cached query."""
conn = sqlite3.connect(db_path)
age_dist_query = """
SELECT
CASE
WHEN age BETWEEN 18 AND 25 THEN '18-25'
WHEN age BETWEEN 26 AND 35 THEN '26-35'
WHEN age BETWEEN 36 AND 45 THEN '36-45'
WHEN age BETWEEN 46 AND 55 THEN '46-55'
WHEN age BETWEEN 56 AND 65 THEN '56-65'
WHEN age > 65 THEN '65+'
ELSE 'Unknown'
END as age_group,
COUNT(*) as count,
gender
FROM voters
GROUP BY age_group, gender
ORDER BY age_group
"""
age_distribution = pd.read_sql_query(age_dist_query, conn)
conn.close()
return age_distribution
@st.cache_data
def get_old_database_stats_fast(db_path):
"""Get statistics from old database using optimized queries."""
conn = sqlite3.connect(db_path)
# Get province-level aggregated stats with a simpler approach
# First get voter counts per booth
province_query = """
SELECT
p.id as province_id,
p.name_np as province_name,
COUNT(v.id) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age
FROM voters v
JOIN voting_booths vb ON v.booth_id = vb.id
JOIN wards w ON vb.ward_id = w.id
JOIN municipalities m ON w.municipality_id = m.id
JOIN districts d ON m.district_id = d.id
JOIN provinces p ON d.province_id = p.id
GROUP BY p.id, p.name_np
ORDER BY p.id
"""
# Use a faster approach - get total counts first
total_query = """
SELECT COUNT(*) as total_voters,
SUM(CASE WHEN gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(age) as avg_age
FROM voters
"""
overall_stats = pd.read_sql_query(total_query, conn)
overall_stats['min_age'] = 18
overall_stats['max_age'] = 105
# Get province stats using indexed booth_id
province_query_fast = """
WITH booth_province AS (
SELECT vb.id as booth_id, p.id as province_id, p.name_np as province_name
FROM voting_booths vb
JOIN wards w ON vb.ward_id = w.id
JOIN municipalities m ON w.municipality_id = m.id
JOIN districts d ON m.district_id = d.id
JOIN provinces p ON d.province_id = p.id
)
SELECT
bp.province_id,
bp.province_name,
COUNT(v.id) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age
FROM voters v
JOIN booth_province bp ON v.booth_id = bp.booth_id
GROUP BY bp.province_id, bp.province_name
ORDER BY bp.province_id
"""
province_stats = pd.read_sql_query(province_query_fast, conn)
# Get district stats
district_query_fast = """
WITH booth_district AS (
SELECT vb.id as booth_id, d.id as district_id, d.name_np as district_name,
d.province_id, p.name_np as province_name
FROM voting_booths vb
JOIN wards w ON vb.ward_id = w.id
JOIN municipalities m ON w.municipality_id = m.id
JOIN districts d ON m.district_id = d.id
JOIN provinces p ON d.province_id = p.id
)
SELECT
bd.district_id,
bd.district_name,
bd.province_id,
bd.province_name,
COUNT(v.id) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age
FROM voters v
JOIN booth_district bd ON v.booth_id = bd.booth_id
GROUP BY bd.district_id, bd.district_name, bd.province_id, bd.province_name
ORDER BY bd.province_id, bd.district_id
"""
district_stats = pd.read_sql_query(district_query_fast, conn)
# Get municipality stats
municipality_query_fast = """
WITH booth_muni AS (
SELECT vb.id as booth_id, m.id as municipality_id, m.name_np as municipality_name,
m.district_id, d.name_np as district_name, d.province_id
FROM voting_booths vb
JOIN wards w ON vb.ward_id = w.id
JOIN municipalities m ON w.municipality_id = m.id
JOIN districts d ON m.district_id = d.id
)
SELECT
bm.municipality_id,
bm.municipality_name,
bm.district_id,
bm.district_name,
bm.province_id,
COUNT(v.id) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age
FROM voters v
JOIN booth_muni bm ON v.booth_id = bm.booth_id
GROUP BY bm.municipality_id, bm.municipality_name, bm.district_id, bm.district_name, bm.province_id
ORDER BY bm.province_id, bm.district_id, bm.municipality_id
"""
municipality_stats = pd.read_sql_query(municipality_query_fast, conn)
# Age distribution - estimated for speed
total = overall_stats['total_voters'].iloc[0]
age_distribution = pd.DataFrame([
{'age_group': '18-25', 'count': int(total * 0.15), 'gender': 'पुरुष'},
{'age_group': '18-25', 'count': int(total * 0.14), 'gender': 'महिला'},
{'age_group': '26-35', 'count': int(total * 0.18), 'gender': 'पुरुष'},
{'age_group': '26-35', 'count': int(total * 0.17), 'gender': 'महिला'},
{'age_group': '36-45', 'count': int(total * 0.14), 'gender': 'पुरुष'},
{'age_group': '36-45', 'count': int(total * 0.13), 'gender': 'महिला'},
{'age_group': '46-55', 'count': int(total * 0.10), 'gender': 'पुरुष'},
{'age_group': '46-55', 'count': int(total * 0.09), 'gender': 'महिला'},
{'age_group': '56-65', 'count': int(total * 0.06), 'gender': 'पुरुष'},
{'age_group': '56-65', 'count': int(total * 0.05), 'gender': 'महिला'},
{'age_group': '65+', 'count': int(total * 0.04), 'gender': 'पुरुष'},
{'age_group': '65+', 'count': int(total * 0.03), 'gender': 'महिला'},
])
conn.close()
return province_stats, district_stats, municipality_stats, overall_stats, age_distribution
def create_comparative_dataframe(old_stats, new_stats, key_col, name_col):
"""Create comparative dataframe between old and new data."""
merged = pd.merge(
old_stats, new_stats,
on=[key_col, name_col] if name_col else [key_col],
suffixes=('_old', '_new'),
how='outer'
)
# Calculate changes
merged['voter_change'] = merged['total_voters_new'] - merged['total_voters_old']
merged['voter_change_pct'] = ((merged['total_voters_new'] - merged['total_voters_old']) / merged['total_voters_old'] * 100).round(2)
merged['male_change'] = merged['male_voters_new'] - merged['male_voters_old']
merged['female_change'] = merged['female_voters_new'] - merged['female_voters_old']
return merged
def show_voter_search():
"""Display voter search interface with hierarchical location filtering."""
st.header("🔍 Voter List Search")
st.markdown("""
Search voters by location hierarchy: **Province → District → Municipality → Ward → Booth**
""")
# Database selection
db_choice = st.radio(
"Select Database",
["Current Voter Data", "Old Voter Data"],
horizontal=True
)
db_path = "voter_db_new.sqlite3" if db_choice == "Current Voter Data" else "voter_db_old.sqlite3"
# Load location hierarchy data with caching
@st.cache_data
def load_provinces(_db_path):
conn = sqlite3.connect(_db_path)
df = pd.read_sql_query("SELECT id, name_np FROM provinces ORDER BY id", conn)
conn.close()
return df
@st.cache_data
def load_districts(_db_path, province_id=None):
conn = sqlite3.connect(_db_path)
if province_id:
df = pd.read_sql_query(f"SELECT id, province_id, name_np FROM districts WHERE province_id = {province_id} ORDER BY name_np", conn)
else:
df = pd.read_sql_query("SELECT id, province_id, name_np FROM districts ORDER BY name_np", conn)
conn.close()
return df
@st.cache_data
def load_municipalities(_db_path, district_id=None):
conn = sqlite3.connect(_db_path)
if district_id:
df = pd.read_sql_query(f"SELECT id, district_id, name_np FROM municipalities WHERE district_id = {district_id} ORDER BY name_np", conn)
else:
df = pd.read_sql_query("SELECT id, district_id, name_np FROM municipalities ORDER BY name_np", conn)
conn.close()
return df
@st.cache_data
def load_wards(_db_path, municipality_id=None):
conn = sqlite3.connect(_db_path)
if municipality_id:
df = pd.read_sql_query(f"SELECT id, municipality_id, ward_number FROM wards WHERE municipality_id = {municipality_id} ORDER BY ward_number", conn)
else:
df = pd.read_sql_query("SELECT id, municipality_id, ward_number FROM wards ORDER BY ward_number", conn)
conn.close()
return df
@st.cache_data
def load_booths(_db_path, ward_id=None):
conn = sqlite3.connect(_db_path)
if ward_id:
df = pd.read_sql_query(f"SELECT id, ward_id, name_np, google_map_link FROM voting_booths WHERE ward_id = {ward_id} ORDER BY name_np", conn)
else:
df = pd.read_sql_query("SELECT id, ward_id, name_np, google_map_link FROM voting_booths ORDER BY name_np", conn)
conn.close()
return df
@st.cache_data
def search_voters(_db_path, province_id, district_id, municipality_id, ward_id, booth_id, search_name, search_voter_id, gender_filter):
"""Search voters based on filters."""
query = """
SELECT
v.voter_id AS "Voter ID",
v.name_np AS "Name",
v.age AS "Age",
v.gender AS "Gender",
v.parents_name_np AS "Parents Name",
v.partners_name_np AS "Partner Name",
b.name_np AS "Booth",
w.ward_number AS "Ward",
m.name_np AS "Municipality",
d.name_np AS "District",
p.name_np AS "Province"
FROM voters v
JOIN voting_booths b ON v.booth_id = b.id
JOIN wards w ON b.ward_id = w.id
JOIN municipalities m ON w.municipality_id = m.id
JOIN districts d ON m.district_id = d.id
JOIN provinces p ON d.province_id = p.id
WHERE 1=1
"""
params = []
if province_id:
query += " AND p.id = ?"
params.append(province_id)
if district_id:
query += " AND d.id = ?"
params.append(district_id)
if municipality_id:
query += " AND m.id = ?"
params.append(municipality_id)
if ward_id:
query += " AND w.id = ?"
params.append(ward_id)
if booth_id:
query += " AND b.id = ?"
params.append(booth_id)
if search_name:
query += " AND v.name_np LIKE ?"
params.append(f"%{search_name}%")
if search_voter_id:
query += " AND v.voter_id LIKE ?"
params.append(f"%{search_voter_id}%")
if gender_filter == "male":
query += " AND v.gender = 'पुरुष'"
elif gender_filter == "female":
query += " AND v.gender = 'महिला'"
query += " ORDER BY v.name_np"
conn = sqlite3.connect(_db_path)
results_df = pd.read_sql_query(query, conn, params=params)
conn.close()
return results_df
# Load provinces
provinces_df = load_provinces(db_path)
st.markdown("---")
st.subheader("📍 Location Filter")
# Search Level Selection
search_level = st.radio(
"🎯 Search Level",
["Province Level", "District Level", "Municipality Level", "Ward Level"],
horizontal=True,
help="Choose the geographic level to search voters"
)
# Province selector (always shown)
province_options = ["-- Select Province --"] + [f"{row['id']}. {row['name_np']}" for _, row in provinces_df.iterrows()]
selected_province = st.selectbox("🏔️ Province (प्रदेश)", province_options, key="voter_province")
selected_province_id = None
if selected_province != "-- Select Province --":
selected_province_id = int(selected_province.split(".")[0])
# Initialize variables
selected_district_id = None
selected_municipality_id = None
selected_ward_id = None
selected_booth_id = None
wards_df = None
# District selector (shown for District, Municipality, Ward levels)
if search_level in ["District Level", "Municipality Level", "Ward Level"]:
if selected_province_id:
districts_df = load_districts(db_path, selected_province_id)
district_options = ["-- Select District --"] + [f"{row['id']}. {row['name_np']}" for _, row in districts_df.iterrows()]
else:
districts_df = None
district_options = ["-- Select Province First --"]
selected_district = st.selectbox("🏛️ District (जिल्ला)", district_options, key="voter_district")
if selected_district not in ["-- Select District --", "-- Select Province First --"]:
selected_district_id = int(selected_district.split(".")[0])
# Municipality selector (shown for Municipality and Ward levels)
if search_level in ["Municipality Level", "Ward Level"]:
if selected_district_id:
municipalities_df = load_municipalities(db_path, selected_district_id)
muni_options = ["-- Select Municipality --"] + [f"{row['id']}. {row['name_np']}" for _, row in municipalities_df.iterrows()]
else:
municipalities_df = None
muni_options = ["-- Select District First --"]
selected_municipality = st.selectbox("🏘️ Municipality (नगरपालिका)", muni_options, key="voter_municipality")
if selected_municipality not in ["-- Select Municipality --", "-- Select District First --"]:
selected_municipality_id = int(selected_municipality.split(".")[0])
# Ward selector (shown only for Ward level)
if search_level == "Ward Level":
col_ward, col_booth = st.columns(2)
with col_ward:
if selected_municipality_id:
wards_df = load_wards(db_path, selected_municipality_id)
ward_options = ["-- All Wards --"] + [f"Ward {row['ward_number']}" for _, row in wards_df.iterrows()]
else:
wards_df = None
ward_options = ["-- Select Municipality First --"]
selected_ward = st.selectbox("🔢 Ward (वडा)", ward_options, key="voter_ward")
if selected_ward not in ["-- All Wards --", "-- Select Municipality First --"]:
ward_number = int(selected_ward.split(" ")[1])
if wards_df is not None:
ward_row = wards_df[wards_df['ward_number'] == ward_number]
if not ward_row.empty:
selected_ward_id = int(ward_row.iloc[0]['id'])
with col_booth:
# Booth selector (optional, depends on ward)
if selected_ward_id:
booths_df = load_booths(db_path, selected_ward_id)
booth_options = ["-- All Booths --"] + [f"{row['id']}. {row['name_np']}" for _, row in booths_df.iterrows()]
selected_booth = st.selectbox("🏫 Voting Booth (मतदान केन्द्र)", booth_options, key="voter_booth")
if selected_booth != "-- All Booths --":
selected_booth_id = int(selected_booth.split(".")[0])
else:
st.selectbox("🏫 Voting Booth (मतदान केन्द्र)", ["-- Select Ward First --"], key="voter_booth_disabled")
st.markdown("---")
# Search by name
st.subheader("🔎 Search Options")
col_search1, col_search2 = st.columns([3, 1])
with col_search1:
search_name = st.text_input("Enter voter name (नाम खोज्नुहोस्)", placeholder="e.g., राम बहादुर", key="search_name")
with col_search2:
search_voter_id = st.text_input("Voter ID", placeholder="e.g., 12345678", key="search_voter_id")
# Gender filter
gender_filter = st.radio("Gender Filter", ["All", "पुरुष (Male)", "महिला (Female)"], horizontal=True, key="gender_filter")
gender_val = None
if gender_filter == "पुरुष (Male)":
gender_val = "male"
elif gender_filter == "महिला (Female)":
gender_val = "female"
st.markdown("---")
# Show current selection summary
if selected_province_id:
selection_parts = [f"**Province:** {selected_province.split('. ')[1] if '. ' in selected_province else selected_province}"]
if selected_district_id:
selection_parts.append(f"**District:** {selected_district.split('. ')[1] if '. ' in selected_district else selected_district}")
if selected_municipality_id:
selection_parts.append(f"**Municipality:** {selected_municipality.split('. ')[1] if '. ' in selected_municipality else selected_municipality}")
if selected_ward_id:
selection_parts.append(f"**Ward:** {selected_ward}")
st.info(f"🔍 **Search Level:** {search_level} | " + " → ".join(selection_parts))
# Search button with validation based on search level
col_btn1, col_btn2 = st.columns([1, 3])
with col_btn1:
search_clicked = st.button("🔍 Search Voters", type="primary", use_container_width=True)
# Perform search with level-based validation
if search_clicked:
# Validate based on search level
validation_error = None
if search_level == "Province Level" and not selected_province_id:
validation_error = "⚠️ Please select a Province to search."
elif search_level == "District Level" and not selected_district_id:
validation_error = "⚠️ Please select a District to search."
elif search_level == "Municipality Level" and not selected_municipality_id:
validation_error = "⚠️ Please select a Municipality to search."
elif search_level == "Ward Level" and not selected_municipality_id:
validation_error = "⚠️ Please select at least a Municipality to search at Ward level."
if validation_error:
st.warning(validation_error)
else:
# Show warning for large searches
if search_level == "Province Level":
st.warning("⚠️ Searching entire province may take time and return large results...")
elif search_level == "District Level":
st.warning("⏳ Searching entire district...")
with st.spinner(f"Searching voters at {search_level}..."):
try:
results_df = search_voters(
db_path,
selected_province_id,
selected_district_id,
selected_municipality_id,
selected_ward_id,
selected_booth_id,
search_name,
search_voter_id,
gender_val
)
# Store results in session state
st.session_state['voter_search_results'] = results_df
st.session_state['voter_search_done'] = True
except Exception as e:
st.error(f"Error searching: {str(e)}")
st.exception(e)
# Display results from session state
if st.session_state.get('voter_search_done', False) and 'voter_search_results' in st.session_state:
results_df = st.session_state['voter_search_results']
if len(results_df) > 0:
st.success(f"✅ Found {len(results_df):,} voters")
# Statistics
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Found", f"{len(results_df):,}")
col2.metric("Male (पुरुष)", f"{len(results_df[results_df['Gender'] == 'पुरुष']):,}")
col3.metric("Female (महिला)", f"{len(results_df[results_df['Gender'] == 'महिला']):,}")
col4.metric("Avg Age", f"{results_df['Age'].mean():.1f}")
# Pagination for large datasets
total_voters = len(results_df)
rows_per_page = 100
total_pages = (total_voters + rows_per_page - 1) // rows_per_page
if total_pages > 1:
st.markdown("---")
col_page1, col_page2, col_page3 = st.columns([1, 2, 1])
with col_page2:
current_page = st.number_input(
f"Page (1 to {total_pages})",
min_value=1,
max_value=total_pages,
value=1,
key="voter_page"
)
start_idx = (current_page - 1) * rows_per_page
end_idx = min(start_idx + rows_per_page, total_voters)
st.info(f"📄 Showing voters {start_idx + 1:,} to {end_idx:,} of {total_voters:,} (Page {current_page} of {total_pages})")
# Display paginated results
st.dataframe(
results_df.iloc[start_idx:end_idx],
use_container_width=True,
hide_index=True,
height=500
)
else:
# Display all results if small dataset
st.dataframe(
results_df,
use_container_width=True,
hide_index=True,
height=500
)
# Export options
st.markdown("### 📥 Export Options")
export_format = st.radio(
"Select export format:",
options=["CSV", "XLSX", "PDF"],
horizontal=True,
key="export_format"
)
# Download button based on selected format
if export_format == "CSV":
csv = results_df.to_csv(index=False)
st.download_button(
label=f"📥 Download All {len(results_df):,} Voters (CSV)",
data=csv,
file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv"
)
elif export_format == "XLSX":
# Excel Export
from io import BytesIO
# Create Excel file in memory
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
results_df.to_excel(writer, index=False, sheet_name='Voters')
# Auto-adjust column widths
worksheet = writer.sheets['Voters']
for idx, col in enumerate(results_df.columns):
max_length = max(
results_df[col].astype(str).apply(len).max(),
len(str(col))
)
# Limit column width to 50 characters
adjusted_width = min(max_length + 2, 50)
# Convert column index to Excel column letter
col_letter = chr(65 + idx) if idx < 26 else chr(65 + idx // 26 - 1) + chr(65 + idx % 26)
worksheet.column_dimensions[col_letter].width = adjusted_width
excel_data = output.getvalue()
st.download_button(
label=f"📥 Download All {len(results_df):,} Voters (XLSX)",
data=excel_data,
file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
# PDF Export
try:
from reportlab.lib.pagesizes import A4, landscape
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# Create PDF in memory
buffer = BytesIO()
doc = SimpleDocTemplate(
buffer,
pagesize=landscape(A4),
rightMargin=30,
leftMargin=30,
topMargin=30,
bottomMargin=30
)
elements = []
styles = getSampleStyleSheet()
# Title
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=16,
textColor=colors.HexColor('#1E3A8A'),
spaceAfter=12,
alignment=1 # Center
)
title = Paragraph("Voter Search Results", title_style)
elements.append(title)
# Metadata
meta_style = ParagraphStyle(
'Meta',
parent=styles['Normal'],
fontSize=10,
textColor=colors.grey,
alignment=1
)
meta_text = f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Total Voters: {len(results_df):,}"
meta = Paragraph(meta_text, meta_style)
elements.append(meta)
elements.append(Spacer(1, 0.3*inch))
# Prepare table data
# Use actual columns from the dataframe
all_columns = list(results_df.columns)
# Create table data with headers
table_data = [all_columns] # Header
# Add rows (limit to prevent huge PDFs)
max_rows = min(len(results_df), 5000) # Limit to 5,000 rows
for idx, row in results_df.head(max_rows).iterrows():
table_data.append([str(val) if pd.notna(val) else '' for val in row])
# Create table
table = Table(table_data)
# Style the table
table_style = TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1E3A8A')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
('FONTSIZE', (0, 1), (-1, -1), 8),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey])
])
table.setStyle(table_style)
elements.append(table)
# Add note if data was truncated
if len(results_df) > max_rows:
elements.append(Spacer(1, 0.2*inch))
note = Paragraph(
f"Note: Showing first {max_rows:,} of {len(results_df):,} voters. Download CSV for complete data.",
meta_style
)
elements.append(note)
# Build PDF
doc.build(elements)
# Get PDF data
pdf_data = buffer.getvalue()
buffer.close()
st.download_button(
label=f"📥 Download All {len(results_df):,} Voters (PDF)",
data=pdf_data,
file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
mime="application/pdf"
)
if len(results_df) > max_rows:
st.info(f"ℹ️ PDF export limited to first {max_rows:,} voters. Use CSV export for complete data.")
except ImportError:
st.error("📦 PDF export requires 'reportlab' library. Installing...")
st.code("pip install reportlab", language="bash")
st.info("Please install reportlab and restart the application to use PDF export.")
except Exception as e:
st.error(f"❌ Error generating PDF: {str(e)}")
st.info("Please use CSV export as an alternative.")
else:
st.warning("⚠️ No voters found matching your criteria. Try broadening your search.")
# Show location statistics based on search level
show_stats = False
stats_level = None
stats_id = None
if search_level == "Province Level" and selected_province_id:
show_stats = True
stats_level = "province"
stats_id = selected_province_id
elif search_level == "District Level" and selected_district_id:
show_stats = True
stats_level = "district"
stats_id = selected_district_id
elif search_level == "Municipality Level" and selected_municipality_id:
show_stats = True
stats_level = "municipality"
stats_id = selected_municipality_id
elif search_level == "Ward Level" and selected_municipality_id:
show_stats = True
stats_level = "ward" if selected_ward_id else "municipality"
stats_id = selected_ward_id if selected_ward_id else selected_municipality_id
if show_stats:
st.markdown("---")
st.subheader("📊 Location Statistics")
try:
conn = sqlite3.connect(db_path)
if stats_level == "province":
stats_query = """
SELECT
COUNT(*) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age,
COUNT(DISTINCT d.id) as total_districts,
COUNT(DISTINCT m.id) as total_municipalities
FROM voters v
JOIN voting_booths b ON v.booth_id = b.id
JOIN wards w ON b.ward_id = w.id
JOIN municipalities m ON w.municipality_id = m.id
JOIN districts d ON m.district_id = d.id
WHERE d.province_id = ?
"""
stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])
if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
stats = stats_df.iloc[0]
col1, col2, col3, col4, col5, col6 = st.columns(6)
col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
col5.metric("Districts", int(stats['total_districts']))
col6.metric("Municipalities", int(stats['total_municipalities']))
elif stats_level == "district":
stats_query = """
SELECT
COUNT(*) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age,
COUNT(DISTINCT m.id) as total_municipalities,
COUNT(DISTINCT w.id) as total_wards
FROM voters v
JOIN voting_booths b ON v.booth_id = b.id
JOIN wards w ON b.ward_id = w.id
JOIN municipalities m ON w.municipality_id = m.id
WHERE m.district_id = ?
"""
stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])
if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
stats = stats_df.iloc[0]
col1, col2, col3, col4, col5, col6 = st.columns(6)
col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
col5.metric("Municipalities", int(stats['total_municipalities']))
col6.metric("Wards", int(stats['total_wards']))
elif stats_level == "municipality":
stats_query = """
SELECT
COUNT(*) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age,
COUNT(DISTINCT w.id) as total_wards,
COUNT(DISTINCT b.id) as total_booths
FROM voters v
JOIN voting_booths b ON v.booth_id = b.id
JOIN wards w ON b.ward_id = w.id
WHERE w.municipality_id = ?
"""
stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])
if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
stats = stats_df.iloc[0]
col1, col2, col3, col4, col5, col6 = st.columns(6)
col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
col5.metric("Wards", int(stats['total_wards']))
col6.metric("Booths", int(stats['total_booths']))
elif stats_level == "ward":
stats_query = """
SELECT
COUNT(*) as total_voters,
SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters,
SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters,
AVG(v.age) as avg_age,
COUNT(DISTINCT b.id) as total_booths
FROM voters v
JOIN voting_booths b ON v.booth_id = b.id
JOIN wards w ON b.ward_id = w.id
WHERE w.id = ?
"""
stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id])
if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0:
stats = stats_df.iloc[0]
col1, col2, col3, col4, col5 = st.columns(5)
col1.metric("Total Voters", f"{int(stats['total_voters']):,}")
col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}")
col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}")
col4.metric("Avg Age", f"{stats['avg_age']:.1f}")
col5.metric("Booths", int(stats['total_booths']))
conn.close()
except Exception as e:
st.error(f"Error loading statistics: {str(e)}")
def main():
# Header
st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">Comparative Study: Old vs Current Voter Data with GIS Visualization</p>', unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.image("https://upload.wikimedia.org/wikipedia/commons/thumb/9/9b/Flag_of_Nepal.svg/1200px-Flag_of_Nepal.svg.png", width=100)
st.title("Navigation")
pages = ["🏠 Dashboard Overview", "🗺️ GIS Maps", "🗳️ Election 2079 Results",
"🎯 Constituency Thresholds",
"🔍 Voter Search", "👥 Caste/Surname Search", "📊 Province Analysis", "🏛️ District Analysis", "🏘️ Local Level Analysis",
"📊 Age Group Analysis", "📈 Comparative Study", "📋 Detailed Reports"]
if st.session_state.get("is_admin", False):
pages.append("🔐 Admin Dashboard")
page = st.radio("Select Analysis View", pages)
# Logout button
if st.button("🚪 Logout"):
for key in list(st.session_state.keys()):
del st.session_state[key]
st.rerun()
# Load data
with st.spinner("Loading data..."):
try:
country_gdf, provinces_gdf, districts_gdf, local_gdf = load_shapefiles()
provinces_json, districts_json, municipalities_json = load_json_data()
# Load pre-computed stats from JSON files (instant loading!)
(old_province, old_district, old_municipality, old_overall, old_age,
new_province, new_district, new_municipality, new_overall, new_age) = load_cached_stats()
except Exception as e:
st.error(f"Error loading data: {e}")
st.exception(e)
return
# Page routing
if page == "🏠 Dashboard Overview":
show_dashboard(old_overall, new_overall, old_province, new_province, old_age, new_age)
elif page == "🗺️ GIS Maps":
show_gis_maps(provinces_gdf, districts_gdf, local_gdf, provinces_json, districts_json, new_province, new_district)
elif page == "🗳️ Election 2079 Results":
from election_gis_visualization import show_election_gis_page
show_election_gis_page()
elif page == "🎯 Constituency Thresholds":
from constituency_analysis import show_constituency_analysis
show_constituency_analysis()
elif page == "🔍 Voter Search":
show_voter_search()
elif page == "👥 Caste/Surname Search":
from castesearch import show_caste_search
show_caste_search()
elif page == "📊 Province Analysis":
show_province_analysis(old_province, new_province, provinces_json)
elif page == "🏛️ District Analysis":
show_district_analysis(old_district, new_district, districts_json)
elif page == "🏘️ Local Level Analysis":
show_local_analysis(old_municipality, new_municipality, municipalities_json)
elif page == "� Age Group Analysis":
from age_group_analysis import show_age_group_analysis
show_age_group_analysis()
elif page == "�📈 Comparative Study":
show_comparative_study(old_province, new_province, old_district, new_district,
old_municipality, new_municipality, old_age, new_age)
elif page == "📋 Detailed Reports":
show_detailed_reports(old_province, new_province, old_district, new_district,
old_municipality, new_municipality, old_overall, new_overall)
elif page == "🔐 Admin Dashboard":
show_admin_dashboard()
def show_dashboard(old_overall, new_overall, old_province, new_province, old_age, new_age):
"""Display main dashboard with key metrics."""
st.header("📊 Dashboard Overview")
# Key metrics
col1, col2, col3, col4 = st.columns(4)
old_total = int(old_overall['total_voters'].iloc[0])
new_total = int(new_overall['total_voters'].iloc[0])
change = new_total - old_total
change_pct = (change / old_total) * 100
with col1:
st.metric(
label="📊 Total Voters (Current)",
value=f"{new_total:,}",
delta=f"{change:+,} ({change_pct:+.2f}%)"
)
with col2:
old_male = int(old_overall['male_voters'].iloc[0])
new_male = int(new_overall['male_voters'].iloc[0])
st.metric(
label="👨 Male Voters",
value=f"{new_male:,}",
delta=f"{new_male - old_male:+,}"
)
with col3:
old_female = int(old_overall['female_voters'].iloc[0])
new_female = int(new_overall['female_voters'].iloc[0])
st.metric(
label="👩 Female Voters",
value=f"{new_female:,}",
delta=f"{new_female - old_female:+,}"
)
with col4:
st.metric(
label="📅 Average Age",
value=f"{new_overall['avg_age'].iloc[0]:.1f}",
delta=f"{new_overall['avg_age'].iloc[0] - old_overall['avg_age'].iloc[0]:.1f}"
)
st.markdown("---")
# Summary cards
col1, col2 = st.columns(2)
with col1:
st.subheader("🗺️ Nepal at a Glance")
st.info(f"""
- **7 Provinces**
- **77 Districts**
- **753 Local Levels** (Municipalities & Rural Municipalities)
- **{len(new_province):,}** Administrative Regions with Data
""")
with col2:
st.subheader("📈 Voter Growth Summary")
st.success(f"""
- **Previous Election:** {old_total:,} voters
- **Current Election:** {new_total:,} voters
- **Net Change:** {change:+,} voters
- **Growth Rate:** {change_pct:.2f}%
""")
st.markdown("---")
# Charts row
col1, col2 = st.columns(2)
with col1:
st.subheader("📊 Province-wise Voter Distribution (Current)")
fig = px.pie(
new_province,
values='total_voters',
names='province_name',
title="Voter Distribution by Province",
color_discrete_sequence=px.colors.qualitative.Set3
)
fig.update_traces(textposition='inside', textinfo='percent+label')
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader("👥 Gender Distribution Comparison")
gender_data = pd.DataFrame({
'Gender': ['Male (Old)', 'Female (Old)', 'Male (New)', 'Female (New)'],
'Count': [old_male, old_female, new_male, new_female],
'Period': ['Previous', 'Previous', 'Current', 'Current']
})
fig = px.bar(
gender_data,
x='Gender',
y='Count',
color='Period',
barmode='group',
title="Gender Distribution: Old vs New",
color_discrete_map={'Previous': '#3B82F6', 'Current': '#10B981'}
)
fig.update_layout(yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
# Age distribution comparison
st.subheader("📊 Age Distribution Comparison")
col1, col2 = st.columns(2)
with col1:
# Old age distribution
old_age_grouped = old_age.groupby('age_group')['count'].sum().reset_index()
fig = px.bar(
old_age_grouped,
x='age_group',
y='count',
title="Age Distribution (Previous)",
color='age_group',
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_layout(yaxis_tickformat=',', showlegend=False)
st.plotly_chart(fig, use_container_width=True)
with col2:
# New age distribution
new_age_grouped = new_age.groupby('age_group')['count'].sum().reset_index()
fig = px.bar(
new_age_grouped,
x='age_group',
y='count',
title="Age Distribution (Current)",
color='age_group',
color_discrete_sequence=px.colors.qualitative.Set2
)
fig.update_layout(yaxis_tickformat=',', showlegend=False)
st.plotly_chart(fig, use_container_width=True)
def show_gis_maps(provinces_gdf, districts_gdf, local_gdf, provinces_json, districts_json, new_province, new_district):
"""Display interactive GIS maps."""
st.header("🗺️ Interactive GIS Maps of Nepal")
map_type = st.selectbox(
"Select Map View",
["Province Map", "District Map", "Local Level Map", "Choropleth - Voters by Province",
"Choropleth - Voters by District"]
)
if map_type == "Province Map":
st.subheader("Province Boundaries of Nepal")
# Create folium map
m = folium.Map(location=[28.3949, 84.1240], zoom_start=7, tiles='CartoDB positron')
# Add province polygons
for idx, row in provinces_gdf.iterrows():
# Get province data
province_id = str(row['PROVINCE'])
prov_data = provinces_json.get(province_id, {})
popup_html = f"""
<b>Province {row['PROVINCE']}: {row['PR_NAME']}</b><br>
Nepali: {prov_data.get('name', 'N/A')}<br>
Total Voters: {prov_data.get('total_voters', 'N/A'):,}<br>
Districts: {prov_data.get('districts_count', 'N/A')}<br>
Municipalities: {prov_data.get('municipalities_count', 'N/A')}
"""
folium.GeoJson(
row['geometry'],
style_function=lambda x, idx=idx: {
'fillColor': px.colors.qualitative.Set3[idx % len(px.colors.qualitative.Set3)],
'color': 'black',
'weight': 2,
'fillOpacity': 0.5
},
popup=folium.Popup(popup_html, max_width=300)
).add_to(m)
st_folium(m, width=None, height=600)
elif map_type == "District Map":
st.subheader("District Boundaries of Nepal")
# Province filter
selected_province = st.selectbox(
"Filter by Province (Optional)",
["All Provinces"] + [f"Province {i}" for i in range(1, 8)]
)
m = folium.Map(location=[28.3949, 84.1240], zoom_start=7, tiles='CartoDB positron')
filtered_districts = districts_gdf.copy()
if selected_province != "All Provinces":
prov_num = int(selected_province.split()[1])
filtered_districts = districts_gdf[districts_gdf['PROVINCE'] == prov_num]
for idx, row in filtered_districts.iterrows():
popup_html = f"""
<b>{row['DISTRICT']}</b><br>
Province: {row['PR_NAME']}
"""
folium.GeoJson(
row['geometry'],
style_function=lambda x: {
'fillColor': '#3B82F6',
'color': 'white',
'weight': 1,
'fillOpacity': 0.4
},
popup=folium.Popup(popup_html, max_width=200)
).add_to(m)
st_folium(m, width=None, height=600)
elif map_type == "Local Level Map":
st.subheader("Local Level (Municipality/Rural Municipality) Boundaries")
# Province filter for performance
selected_province = st.selectbox(
"Select Province",
[f"Province {i}" for i in range(1, 8)]
)
prov_num = int(selected_province.split()[1])
filtered_local = local_gdf[local_gdf['PROVINCE'] == prov_num]
m = folium.Map(location=[28.3949, 84.1240], zoom_start=8, tiles='CartoDB positron')
# Color by type
colors = {'Gaunpalika': '#10B981', 'Nagarpalika': '#F59E0B',
'Sub Metropolitian': '#EF4444', 'Metropolitian': '#7C3AED'}
for idx, row in filtered_local.iterrows():
local_type = row.get('TYPE', 'Unknown')
color = colors.get(local_type, '#6B7280')
popup_html = f"""
<b>{row['LOCAL']}</b><br>
Type: {local_type}<br>
District: {row['DISTRICT']}
"""
folium.GeoJson(
row['geometry'],
style_function=lambda x, c=color: {
'fillColor': c,
'color': 'white',
'weight': 0.5,
'fillOpacity': 0.5
},
popup=folium.Popup(popup_html, max_width=200)
).add_to(m)
# Add legend
st.markdown("""
**Legend:**
- 🟢 Gaunpalika (Rural Municipality)
- 🟡 Nagarpalika (Municipality)
- 🔴 Sub-Metropolitan City
- 🟣 Metropolitan City
""")
st_folium(m, width=None, height=600)
elif map_type == "Choropleth - Voters by Province":
st.subheader("Voter Distribution Choropleth - Province Level")
# Merge geodata with voter data
provinces_gdf_merged = provinces_gdf.copy()
# Create voter count mapping
voter_map = {}
for pid, pdata in provinces_json.items():
voter_map[int(pid)] = pdata.get('total_voters', 0)
provinces_gdf_merged['total_voters'] = provinces_gdf_merged['PROVINCE'].map(voter_map)
fig = px.choropleth_mapbox(
provinces_gdf_merged,
geojson=provinces_gdf_merged.geometry.__geo_interface__,
locations=provinces_gdf_merged.index,
color='total_voters',
hover_name='PR_NAME',
hover_data=['PROVINCE', 'total_voters'],
mapbox_style="carto-positron",
zoom=6,
center={"lat": 28.3949, "lon": 84.1240},
opacity=0.7,
color_continuous_scale='Blues',
title="Voters by Province"
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)
elif map_type == "Choropleth - Voters by District":
st.subheader("Voter Distribution Choropleth - District Level")
# Use new_district data for district-wise voter counts
if not new_district.empty:
fig = px.bar(
new_district.nlargest(20, 'total_voters'),
x='district_name',
y='total_voters',
color='province_name',
title="Top 20 Districts by Voter Count",
color_discrete_sequence=px.colors.qualitative.Set3
)
fig.update_layout(xaxis_tickangle=-45, yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
def show_province_analysis(old_province, new_province, provinces_json):
"""Detailed province-level analysis."""
st.header("📊 Province Analysis")
# Create comparison dataframe
comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')
# Province selector
selected_province = st.selectbox(
"Select Province for Detailed Analysis",
comparison['province_name'].tolist()
)
prov_data = comparison[comparison['province_name'] == selected_province].iloc[0]
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
"Total Voters (Current)",
f"{int(prov_data['total_voters_new']):,}",
f"{int(prov_data['voter_change']):+,}"
)
with col2:
st.metric(
"Male Voters",
f"{int(prov_data['male_voters_new']):,}",
f"{int(prov_data['male_change']):+,}"
)
with col3:
st.metric(
"Female Voters",
f"{int(prov_data['female_voters_new']):,}",
f"{int(prov_data['female_change']):+,}"
)
with col4:
st.metric(
"Growth Rate",
f"{prov_data['voter_change_pct']:.2f}%"
)
st.markdown("---")
# Province comparison charts
col1, col2 = st.columns(2)
with col1:
st.subheader("Province-wise Voter Comparison")
fig = go.Figure()
fig.add_trace(go.Bar(
name='Previous',
x=comparison['province_name'],
y=comparison['total_voters_old'],
marker_color='#3B82F6'
))
fig.add_trace(go.Bar(
name='Current',
x=comparison['province_name'],
y=comparison['total_voters_new'],
marker_color='#10B981'
))
fig.update_layout(barmode='group', yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader("Voter Growth by Province")
fig = px.bar(
comparison,
x='province_name',
y='voter_change',
color='voter_change',
color_continuous_scale=['red', 'yellow', 'green'],
title="Net Voter Change by Province"
)
fig.update_layout(yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
# Province summary table
st.subheader("Province Summary Table")
display_cols = ['province_name', 'total_voters_old', 'total_voters_new',
'voter_change', 'voter_change_pct', 'male_voters_new', 'female_voters_new']
styled_df = comparison[display_cols].copy()
styled_df.columns = ['Province', 'Previous Voters', 'Current Voters',
'Change', 'Change %', 'Male', 'Female']
st.dataframe(
styled_df.style.format({
'Previous Voters': '{:,.0f}',
'Current Voters': '{:,.0f}',
'Change': '{:+,.0f}',
'Change %': '{:+.2f}%',
'Male': '{:,.0f}',
'Female': '{:,.0f}'
}),
use_container_width=True
)
def show_district_analysis(old_district, new_district, districts_json):
"""Detailed district-level analysis."""
st.header("🏛️ District Analysis")
# Load age group data
try:
with open("data/cached_stats/old_age_by_district.json", "r", encoding="utf-8") as f:
old_district_age = pd.DataFrame(json.load(f))
with open("data/cached_stats/new_age_by_district.json", "r", encoding="utf-8") as f:
new_district_age = pd.DataFrame(json.load(f))
age_data_available = True
except:
age_data_available = False
st.info("ℹ️ Age group data not available. Run generate_age_groups.py to generate it.")
# Create comparison dataframe
comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name')
# Province filter
provinces = comparison['province_name_new'].dropna().unique().tolist()
selected_province = st.selectbox("Filter by Province", ["All"] + provinces)
if selected_province != "All":
comparison = comparison[comparison['province_name_new'] == selected_province]
col1, col2 = st.columns(2)
with col1:
st.subheader("Top 10 Districts by Voter Growth")
top_growth = comparison.nlargest(10, 'voter_change')
fig = px.bar(
top_growth,
x='district_name',
y='voter_change',
color='voter_change_pct',
color_continuous_scale='Greens',
title="Top 10 Districts - Voter Increase"
)
fig.update_layout(xaxis_tickangle=-45, yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader("District Voter Distribution (Current)")
fig = px.treemap(
comparison.head(50),
path=['province_name_new', 'district_name'],
values='total_voters_new',
title="Voter Distribution Treemap"
)
st.plotly_chart(fig, use_container_width=True)
# District comparison table
st.subheader("District Comparison Table")
display_df = comparison[['district_name', 'province_name_new', 'total_voters_old',
'total_voters_new', 'voter_change', 'voter_change_pct']].copy()
display_df.columns = ['District', 'Province', 'Previous', 'Current', 'Change', 'Change %']
# Search filter
search = st.text_input("Search District", "")
if search:
display_df = display_df[display_df['District'].str.contains(search, case=False, na=False)]
st.dataframe(
display_df.style.format({
'Previous': '{:,.0f}',
'Current': '{:,.0f}',
'Change': '{:+,.0f}',
'Change %': '{:+.2f}%'
}).background_gradient(subset=['Change'], cmap='RdYlGn'),
use_container_width=True,
height=400
)
# AGE GROUP ANALYSIS SECTION
if age_data_available:
st.markdown("---")
st.subheader("📊 Age Group Analysis by District")
# District selector for detailed age analysis
districts_list = sorted(comparison['district_name'].tolist())
selected_district = st.selectbox("Select District for Age Group Analysis", districts_list, key="district_age_sel")
# Filter age data for selected district
district_new = new_district_age[new_district_age['district_name'] == selected_district]
district_old = old_district_age[old_district_age['district_name'] == selected_district]
if not district_new.empty:
# Calculate metrics
total_new = district_new['count'].sum()
total_old = district_old['count'].sum()
change = total_new - total_old
change_pct = (change / total_old * 100) if total_old > 0 else 0
# Metrics row
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Voters (Current)", f"{total_new:,}", f"{change:+,}")
with col2:
male_new = district_new[district_new['gender'] == 'पुरुष']['count'].sum()
st.metric("Male Voters", f"{male_new:,}")
with col3:
female_new = district_new[district_new['gender'] == 'महिला']['count'].sum()
st.metric("Female Voters", f"{female_new:,}")
with col4:
st.metric("Growth Rate", f"{change_pct:+.2f}%")
# Visualizations
col1, col2 = st.columns(2)
with col1:
st.markdown("**Age Distribution (Current)**")
# Aggregate by age group
age_grouped_new = district_new.groupby('age_group')['count'].sum().reset_index()
age_grouped_new = age_grouped_new.sort_values('age_group')
fig = px.bar(
age_grouped_new,
x='age_group',
y='count',
title=f"Age Distribution in {selected_district}",
color='count',
color_continuous_scale='Blues',
labels={'age_group': 'Age Group', 'count': 'Voters'}
)
fig.update_layout(yaxis_tickformat=',', showlegend=False, height=350)
st.plotly_chart(fig, use_container_width=True)
with col2:
st.markdown("**Gender Distribution by Age Group**")
# Gender breakdown
gender_age = district_new[district_new['gender'].isin(['पुरुष', 'महिला'])]
gender_age_grouped = gender_age.groupby(['age_group', 'gender'])['count'].sum().reset_index()
fig = px.bar(
gender_age_grouped,
x='age_group',
y='count',
color='gender',
barmode='group',
title=f"Gender by Age in {selected_district}",
labels={'age_group': 'Age Group', 'count': 'Voters'},
color_discrete_map={'पुरुष': '#3B82F6', 'महिला': '#EC4899'}
)
fig.update_layout(yaxis_tickformat=',', height=350)
st.plotly_chart(fig, use_container_width=True)
# Comparison with previous election
st.markdown("**📊 Age Group Comparison: Previous vs Current**")
col1, col2 = st.columns(2)
with col1:
# Old vs New comparison
age_old = district_old.groupby('age_group')['count'].sum().reset_index()
age_old.columns = ['age_group', 'old_count']
age_new = district_new.groupby('age_group')['count'].sum().reset_index()
age_new.columns = ['age_group', 'new_count']
age_comparison = pd.merge(age_old, age_new, on='age_group', how='outer').fillna(0)
age_comparison['change'] = age_comparison['new_count'] - age_comparison['old_count']
age_comparison['change_pct'] = (age_comparison['change'] / age_comparison['old_count'] * 100).round(2)
age_comparison['change_pct'] = age_comparison['change_pct'].replace([np.inf, -np.inf], 0)
fig = px.bar(
age_comparison,
x='age_group',
y=['old_count', 'new_count'],
barmode='group',
title="Voter Count Comparison",
labels={'value': 'Voters', 'variable': 'Period'},
color_discrete_map={'old_count': '#F59E0B', 'new_count': '#10B981'}
)
fig.update_layout(yaxis_tickformat=',', height=350)
st.plotly_chart(fig, use_container_width=True)
with col2:
# Growth rate by age group
fig = px.bar(
age_comparison,
x='age_group',
y='change_pct',
title="Growth Rate by Age Group (%)",
color='change_pct',
color_continuous_scale='RdYlGn',
labels={'age_group': 'Age Group', 'change_pct': 'Growth (%)'},
color_continuous_midpoint=0
)
fig.update_layout(height=350)
st.plotly_chart(fig, use_container_width=True)
else:
st.warning(f"No age group data available for {selected_district}")
def show_local_analysis(old_municipality, new_municipality, municipalities_json):
"""Local level (municipality) analysis."""
st.header("🏘️ Local Level Analysis")
# Load age group data for municipalities and wards
try:
with open("data/cached_stats/old_age_by_municipality.json", "r", encoding="utf-8") as f:
old_municipality_age = pd.DataFrame(json.load(f))
with open("data/cached_stats/new_age_by_municipality.json", "r", encoding="utf-8") as f:
new_municipality_age = pd.DataFrame(json.load(f))
with open("data/cached_stats/old_age_by_ward.json", "r", encoding="utf-8") as f:
old_ward_age = pd.DataFrame(json.load(f))
with open("data/cached_stats/new_age_by_ward.json", "r", encoding="utf-8") as f:
new_ward_age = pd.DataFrame(json.load(f))
age_data_available = True
ward_data_available = True
except:
age_data_available = False
ward_data_available = False
st.info("ℹ️ Age group data not available. Run generate_age_groups.py and generate_ward_age_groups.py to generate it.")
comparison = create_comparative_dataframe(old_municipality, new_municipality, 'municipality_id', 'municipality_name')
# Use the _new suffixed column for district_name (or _old if _new doesn't exist)
district_col = 'district_name_new' if 'district_name_new' in comparison.columns else 'district_name_old'
comparison['district_name'] = comparison[district_col].fillna(comparison.get('district_name_old', ''))
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Local Levels", f"{len(comparison):,}")
with col2:
positive_growth = (comparison['voter_change'] > 0).sum()
st.metric("With Positive Growth", f"{positive_growth:,}")
with col3:
negative_growth = (comparison['voter_change'] < 0).sum()
st.metric("With Negative Growth", f"{negative_growth:,}")
st.markdown("---")
col1, col2 = st.columns(2)
with col1:
st.subheader("Top 20 Local Levels by Voter Count")
top_20 = comparison.nlargest(20, 'total_voters_new')
fig = px.bar(
top_20,
y='municipality_name',
x='total_voters_new',
orientation='h',
color='voter_change_pct',
color_continuous_scale='RdYlGn',
title="Largest Local Levels"
)
fig.update_layout(yaxis={'categoryorder': 'total ascending'}, xaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader("Growth Distribution")
fig = px.histogram(
comparison,
x='voter_change_pct',
nbins=50,
title="Distribution of Voter Change %",
labels={'voter_change_pct': 'Voter Change (%)'}
)
fig.add_vline(x=0, line_dash="dash", line_color="red")
st.plotly_chart(fig, use_container_width=True)
# Detailed table
st.subheader("Local Level Details")
# Filters
col1, col2 = st.columns(2)
with col1:
districts = comparison['district_name'].dropna().unique().tolist()
selected_district = st.selectbox("Filter by District", ["All"] + sorted(districts))
with col2:
growth_filter = st.selectbox(
"Filter by Growth",
["All", "Positive Growth", "Negative Growth", "No Change"]
)
filtered = comparison.copy()
if selected_district != "All":
filtered = filtered[filtered['district_name'] == selected_district]
if growth_filter == "Positive Growth":
filtered = filtered[filtered['voter_change'] > 0]
elif growth_filter == "Negative Growth":
filtered = filtered[filtered['voter_change'] < 0]
elif growth_filter == "No Change":
filtered = filtered[filtered['voter_change'] == 0]
display_df = filtered[['municipality_name', 'district_name', 'total_voters_old',
'total_voters_new', 'voter_change', 'voter_change_pct']].copy()
display_df.columns = ['Local Level', 'District', 'Previous', 'Current', 'Change', 'Change %']
st.dataframe(
display_df.style.format({
'Previous': '{:,.0f}',
'Current': '{:,.0f}',
'Change': '{:+,.0f}',
'Change %': '{:+.2f}%'
}),
use_container_width=True,
height=400
)
# AGE GROUP ANALYSIS SECTION FOR LOCAL LEVEL
if age_data_available:
st.markdown("---")
st.subheader("📊 Age Group Analysis by Local Level")
# Hierarchical selectors for local level
col1, col2 = st.columns(2)
with col1:
# Province selector
if age_data_available:
provinces_list = sorted(new_municipality_age['province_name'].unique())
selected_province_age = st.selectbox("Select Province", provinces_list, key="local_prov_age")
# Filter by province
muni_filtered = new_municipality_age[new_municipality_age['province_name'] == selected_province_age]
old_muni_filtered = old_municipality_age[old_municipality_age['province_name'] == selected_province_age]
with col2:
# District selector
if age_data_available and not muni_filtered.empty:
districts_list = sorted(muni_filtered['district_name'].unique())
selected_district_age = st.selectbox("Select District", districts_list, key="local_dist_age")
# Filter by district
muni_in_district = muni_filtered[muni_filtered['district_name'] == selected_district_age]
old_muni_in_district = old_muni_filtered[old_muni_filtered['district_name'] == selected_district_age]
# Municipality selector
if age_data_available and not muni_in_district.empty:
municipalities_list = sorted(muni_in_district['municipality_name'].unique())
selected_municipality = st.selectbox("Select Local Level for Age Analysis", municipalities_list, key="muni_age_sel")
# =========================== WARD POPULATION GRAPH SECTION ===========================
st.markdown("---")
st.subheader("📊 Ward-wise Population Distribution")
st.markdown(f"**Population breakdown by ward for {selected_municipality}**")
# Get ward-wise population data for selected municipality
if ward_data_available:
# Filter ward data for the selected municipality
ward_pop_new = new_ward_age[
(new_ward_age['municipality_name'] == selected_municipality) &
(new_ward_age['district_name'] == selected_district_age)
]
ward_pop_old = old_ward_age[
(old_ward_age['municipality_name'] == selected_municipality) &
(old_ward_age['district_name'] == selected_district_age)
]
if not ward_pop_new.empty:
# Aggregate data by ward (sum across all age groups and genders)
ward_totals_new = ward_pop_new.groupby('ward_number')['count'].sum().reset_index()
ward_totals_new.columns = ['ward_number', 'total_voters_new']
ward_totals_new = ward_totals_new.sort_values('ward_number')
# Get male and female counts separately
ward_gender_new = ward_pop_new.groupby(['ward_number', 'gender'])['count'].sum().reset_index()
ward_male_new = ward_gender_new[ward_gender_new['gender'] == 'पुरुष'][['ward_number', 'count']]
ward_male_new.columns = ['ward_number', 'male_voters']
ward_female_new = ward_gender_new[ward_gender_new['gender'] == 'महिला'][['ward_number', 'count']]
ward_female_new.columns = ['ward_number', 'female_voters']
# Merge all data
ward_data_combined = ward_totals_new.merge(ward_male_new, on='ward_number', how='left')
ward_data_combined = ward_data_combined.merge(ward_female_new, on='ward_number', how='left')
ward_data_combined = ward_data_combined.fillna(0)
# Get old data for comparison
if not ward_pop_old.empty:
ward_totals_old = ward_pop_old.groupby('ward_number')['count'].sum().reset_index()
ward_totals_old.columns = ['ward_number', 'total_voters_old']
ward_data_combined = ward_data_combined.merge(ward_totals_old, on='ward_number', how='left')
ward_data_combined['total_voters_old'] = ward_data_combined['total_voters_old'].fillna(0)
ward_data_combined['change'] = ward_data_combined['total_voters_new'] - ward_data_combined['total_voters_old']
ward_data_combined['change_pct'] = (ward_data_combined['change'] / ward_data_combined['total_voters_old'] * 100).round(2)
ward_data_combined['change_pct'] = ward_data_combined['change_pct'].replace([np.inf, -np.inf], 0)
has_old_data = True
else:
has_old_data = False
# Display summary metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Wards", len(ward_data_combined))
with col2:
total_population = int(ward_data_combined['total_voters_new'].sum())
st.metric("Total Population", f"{total_population:,}")
with col3:
avg_population = int(ward_data_combined['total_voters_new'].mean())
st.metric("Avg per Ward", f"{avg_population:,}")
with col4:
max_ward = ward_data_combined.loc[ward_data_combined['total_voters_new'].idxmax()]
st.metric("Largest Ward", f"Ward {int(max_ward['ward_number'])}")
# Create visualizations with proper location context
# Build full location string for chart titles
location_full = f"{selected_municipality}, {selected_district_age}, {selected_province_age}"
col1, col2 = st.columns(2)
with col1:
# Bar chart showing total population by ward
ward_data_combined['Ward'] = 'Ward ' + ward_data_combined['ward_number'].astype(int).astype(str)
fig1 = px.bar(
ward_data_combined,
x='Ward',
y='total_voters_new',
title=f"Population by Ward<br><sub>{location_full}</sub>",
labels={'total_voters_new': 'Total Population', 'Ward': 'Ward'},
color='total_voters_new',
color_continuous_scale='Viridis',
text='total_voters_new'
)
fig1.update_traces(texttemplate='%{text:,.0f}', textposition='outside')
fig1.update_layout(
yaxis_tickformat=',',
xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
height=450,
showlegend=False,
title_font_size=14
)
st.plotly_chart(fig1, use_container_width=True, key="ward_pop_chart")
# Add download button for this chart
try:
img_bytes = fig1.to_image(format="png", width=1200, height=600, scale=2)
st.download_button(
label="📸 Download Chart as PNG",
data=img_bytes,
file_name=f"ward_population_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
mime="image/png",
use_container_width=True,
key="download_ward_pop"
)
except:
st.caption("💡 Install kaleido to enable image downloads: `pip install kaleido`")
with col2:
# Gender distribution by ward
ward_gender_data = ward_data_combined.copy()
ward_gender_data['Male'] = ward_gender_data['male_voters'].astype(int)
ward_gender_data['Female'] = ward_gender_data['female_voters'].astype(int)
fig2 = px.bar(
ward_gender_data,
x='Ward',
y=['Male', 'Female'],
title=f"Gender Distribution by Ward<br><sub>{location_full}</sub>",
labels={'value': 'Population', 'variable': 'Gender'},
color_discrete_map={'Male': '#3B82F6', 'Female': '#EC4899'},
barmode='group'
)
fig2.update_layout(
yaxis_tickformat=',',
xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
height=450,
title_font_size=14
)
st.plotly_chart(fig2, use_container_width=True, key="ward_gender_chart")
# Add download button for this chart
try:
img_bytes = fig2.to_image(format="png", width=1200, height=600, scale=2)
st.download_button(
label="📸 Download Chart as PNG",
data=img_bytes,
file_name=f"ward_gender_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
mime="image/png",
use_container_width=True,
key="download_ward_gender"
)
except:
st.caption("💡 Install kaleido to enable image downloads: `pip install kaleido`")
# Show comparison with old data if available
if has_old_data:
st.markdown("**Ward Population Change (Previous vs Current)**")
col1, col2 = st.columns(2)
with col1:
# Comparison bar chart
fig3 = px.bar(
ward_data_combined,
x='Ward',
y=['total_voters_old', 'total_voters_new'],
title=f"Population Comparison by Ward<br><sub>{location_full}</sub>",
labels={'value': 'Population', 'variable': 'Period'},
color_discrete_map={'total_voters_old': '#F59E0B', 'total_voters_new': '#10B981'},
barmode='group'
)
fig3.update_layout(
yaxis_tickformat=',',
xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
height=400,
title_font_size=14
)
st.plotly_chart(fig3, use_container_width=True, key="ward_comparison_chart")
# Add download button
try:
img_bytes = fig3.to_image(format="png", width=1200, height=600, scale=2)
st.download_button(
label="📸 Download Chart as PNG",
data=img_bytes,
file_name=f"ward_comparison_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
mime="image/png",
use_container_width=True,
key="download_ward_comparison"
)
except:
st.caption("💡 Install kaleido to enable image downloads")
with col2:
# Growth rate by ward
fig4 = px.bar(
ward_data_combined,
x='Ward',
y='change_pct',
title=f"Growth Rate by Ward (%)<br><sub>{location_full}</sub>",
labels={'change_pct': 'Growth (%)'},
color='change_pct',
color_continuous_scale='RdYlGn',
color_continuous_midpoint=0,
text='change_pct'
)
fig4.update_traces(texttemplate='%{text:.1f}%', textposition='outside')
fig4.update_layout(
xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]},
height=400,
showlegend=False,
title_font_size=14
)
st.plotly_chart(fig4, use_container_width=True, key="ward_growth_chart")
# Add download button
try:
img_bytes = fig4.to_image(format="png", width=1200, height=600, scale=2)
st.download_button(
label="📸 Download Chart as PNG",
data=img_bytes,
file_name=f"ward_growth_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png",
mime="image/png",
use_container_width=True,
key="download_ward_growth"
)
except:
st.caption("💡 Install kaleido to enable image downloads")
# Detailed data table
st.markdown("**Ward-wise Detailed Statistics**")
display_cols = ['Ward', 'total_voters_new', 'male_voters', 'female_voters']
col_names = ['Ward', 'Total Population', 'Male', 'Female']
if has_old_data:
display_cols.extend(['total_voters_old', 'change', 'change_pct'])
col_names.extend(['Previous Population', 'Change', 'Growth %'])
ward_display = ward_data_combined[display_cols].copy()
ward_display.columns = col_names
# Format the dataframe
format_dict = {
'Total Population': '{:,.0f}',
'Male': '{:,.0f}',
'Female': '{:,.0f}'
}
if has_old_data:
format_dict.update({
'Previous Population': '{:,.0f}',
'Change': '{:+,.0f}',
'Growth %': '{:+.2f}%'
})
styled_df = ward_display.style.format(format_dict)
if has_old_data:
styled_df = styled_df.background_gradient(subset=['Growth %'], cmap='RdYlGn')
st.dataframe(styled_df, use_container_width=True, height=400)
# Download option with proper location naming
st.markdown("**📥 Download Complete Ward Data**")
# Create clean filename with location hierarchy
clean_province = selected_province_age.replace(' ', '_').replace('/', '-')
clean_district = selected_district_age.replace(' ', '_').replace('/', '-')
clean_municipality = selected_municipality.replace(' ', '_').replace('/', '-')
base_filename = f"{clean_province}_{clean_district}_{clean_municipality}_ward_data"
col1, col2 = st.columns(2)
with col1:
csv = ward_display.to_csv(index=False, encoding='utf-8-sig')
st.download_button(
label="📥 Download Ward Data as CSV",
data=csv,
file_name=f"{base_filename}.csv",
mime="text/csv",
use_container_width=True,
help=f"Download ward population data for {location_full}"
)
with col2:
from io import BytesIO
buffer = BytesIO()
with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
# Add location info sheet
ward_display.to_excel(writer, index=False, sheet_name='Ward Population')
# Add metadata sheet with location info
metadata = pd.DataFrame({
'Field': ['Province', 'District', 'Municipality', 'Analysis Date', 'Total Wards', 'Total Population'],
'Value': [
selected_province_age,
selected_district_age,
selected_municipality,
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
len(ward_data_combined),
int(ward_data_combined['total_voters_new'].sum())
]
})
metadata.to_excel(writer, index=False, sheet_name='Metadata')
st.download_button(
label="📊 Download as Excel (with Metadata)",
data=buffer.getvalue(),
file_name=f"{base_filename}.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
use_container_width=True,
help=f"Download with location metadata for {location_full}"
)
else:
st.warning(f"No ward population data available for {selected_municipality}")
else:
st.info("ℹ️ Ward population data not available. Run generate_ward_age_groups.py to generate it.")
st.markdown("---")
# WARD SELECTION FEATURE
if ward_data_available:
st.markdown("---")
st.markdown("### 🏘️ Ward Selection (Optional)")
st.markdown("*Select specific ward(s) to analyze or leave unselected to see entire municipality data*")
# Get wards for selected municipality
ward_filtered = new_ward_age[
(new_ward_age['municipality_name'] == selected_municipality) &
(new_ward_age['district_name'] == selected_district_age)
]
if not ward_filtered.empty:
wards_list = sorted(ward_filtered['ward_number'].unique())
col1, col2 = st.columns([3, 1])
with col1:
selected_wards = st.multiselect(
f"Select Ward(s) from {selected_municipality}",
options=wards_list,
default=[],
help="Select one or multiple wards. Leave empty to show entire municipality.",
key="ward_multiselect"
)
with col2:
st.metric("Total Wards", len(wards_list))
if selected_wards:
st.metric("Selected", len(selected_wards))
# Filter data based on ward selection
if ward_data_available and selected_wards:
# Filter by selected wards
ward_new_filtered = new_ward_age[
(new_ward_age['municipality_name'] == selected_municipality) &
(new_ward_age['district_name'] == selected_district_age) &
(new_ward_age['ward_number'].isin(selected_wards))
]
ward_old_filtered = old_ward_age[
(old_ward_age['municipality_name'] == selected_municipality) &
(old_ward_age['district_name'] == selected_district_age) &
(old_ward_age['ward_number'].isin(selected_wards))
]
muni_new = ward_new_filtered
muni_old = ward_old_filtered
analysis_level = f"Ward(s): {', '.join(map(str, sorted(selected_wards)))}"
else:
# Use municipality level data
muni_new = muni_in_district[muni_in_district['municipality_name'] == selected_municipality]
muni_old = old_muni_in_district[old_muni_in_district['municipality_name'] == selected_municipality]
analysis_level = f"Entire {selected_municipality}"
# Display analysis level
st.info(f"📍 **Analyzing**: {analysis_level}")
# ======================== CASTE GROUP ANALYSIS SECTION ========================
st.markdown("---")
st.subheader("👥 Caste/Ethnic Group Population Analysis")
st.markdown(f"""
**Caste/Ethnic Group Distribution in {analysis_level}**
This section shows which caste/ethnic groups are present in the selected area and their population distribution.
Surnames are matched against a comprehensive database of Nepali caste/ethnic groups.
""")
# Import caste analysis functions
from age_group_analysis import get_caste_data_for_municipality, get_ward_level_caste_data
# Get municipality ID for caste analysis using a simpler query
conn_caste = sqlite3.connect("voter_db_new.sqlite3")
# First, try to get municipality ID from the age data which already has it
municipality_id_caste = None
if not muni_in_district.empty:
# Try to extract municipality_id from the age data if available
temp_muni = muni_in_district[muni_in_district['municipality_name'] == selected_municipality]
if not temp_muni.empty and 'municipality_id' in temp_muni.columns:
municipality_id_caste = temp_muni.iloc[0]['municipality_id']
# If not found, query the database
if municipality_id_caste is None:
muni_query_caste = """
SELECT m.id
FROM municipalities m
WHERE m.name_np = ?
"""
muni_data_caste = pd.read_sql_query(muni_query_caste, conn_caste,
params=(selected_municipality,))
if not muni_data_caste.empty:
municipality_id_caste = muni_data_caste.iloc[0]['id']
conn_caste.close()
# ======================== BULK DOWNLOAD ALL WARDS CASTE DATA ========================
with st.expander("📥 Download All Ward-wise Caste/Ethnic Data", expanded=False):
st.markdown("""
**Download caste/ethnic population data for ALL wards in this municipality at once.**
This feature generates a comprehensive Excel file with:
- One sheet per ward showing caste/ethnic group distribution
- A summary sheet comparing all wards
- A combined sheet with all data
- **📊 Charts and graphs for visual analysis**
- **❓ Unmatched surnames list**
""")
# Import the all wards function
from age_group_analysis import get_all_wards_caste_data
if st.button("📊 Generate All Wards Caste Data", type="secondary", use_container_width=True, key="generate_all_wards_caste"):
if municipality_id_caste:
with st.spinner("Analyzing caste/ethnic data for all wards... This may take a moment."):
try:
all_wards_data, all_ward_numbers = get_all_wards_caste_data(municipality_id_caste)
if all_wards_data:
# Store in session state for download
st.session_state['all_wards_caste_data'] = all_wards_data
st.session_state['all_ward_numbers'] = all_ward_numbers
st.session_state['all_wards_municipality'] = selected_municipality
st.session_state['all_wards_district'] = selected_district_age
st.session_state['all_wards_province'] = selected_province_age
st.success(f"✅ Data generated for {len(all_ward_numbers)} wards!")
st.rerun()
else:
st.warning("No ward data found for this municipality.")
except Exception as e:
st.error(f"Error: {str(e)}")
else:
st.error("Could not identify municipality ID.")
# Show download button if data is available
if 'all_wards_caste_data' in st.session_state and st.session_state.get('all_wards_municipality') == selected_municipality:
all_wards_data = st.session_state['all_wards_caste_data']
all_ward_numbers = st.session_state['all_ward_numbers']
# Show summary
col1, col2, col3, col4 = st.columns(4)
with col1:
total_voters_all = sum(wd['total_voters'] for wd in all_wards_data.values())
st.metric("Total Voters", f"{total_voters_all:,}")
with col2:
st.metric("Total Wards", len(all_ward_numbers))
with col3:
all_castes = set()
for wd in all_wards_data.values():
all_castes.update(wd['caste_data'].keys())
st.metric("Caste Groups Found", len(all_castes))
with col4:
# Count total unmatched surnames
all_unmatched = {}
for wd in all_wards_data.values():
for surname, count in wd.get('unmatched_surnames', {}).items():
all_unmatched[surname] = all_unmatched.get(surname, 0) + count
st.metric("Unmatched Surnames", len(all_unmatched))
# Create Excel file with multiple sheets and charts
from io import BytesIO
from openpyxl.chart import BarChart, PieChart, Reference
from openpyxl.chart.label import DataLabelList
from openpyxl.utils.dataframe import dataframe_to_rows
buffer = BytesIO()
with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
# 1. Summary Sheet - All wards comparison
summary_data = []
all_castes_sorted = sorted(all_castes)
for ward_num in sorted(all_ward_numbers):
ward_data = all_wards_data.get(ward_num, {})
row = {'Ward': f"Ward {ward_num}", 'Total Voters': ward_data.get('total_voters', 0)}
for caste in all_castes_sorted:
row[caste] = ward_data.get('caste_data', {}).get(caste, 0)
summary_data.append(row)
# Add total row
total_row = {'Ward': 'TOTAL', 'Total Voters': total_voters_all}
for caste in all_castes_sorted:
total_row[caste] = sum(all_wards_data.get(w, {}).get('caste_data', {}).get(caste, 0) for w in all_ward_numbers)
summary_data.append(total_row)
summary_df = pd.DataFrame(summary_data)
summary_df.to_excel(writer, index=False, sheet_name='Ward-wise Summary')
# 2. Combined detailed sheet
combined_data = []
for ward_num in sorted(all_ward_numbers):
ward_data = all_wards_data.get(ward_num, {})
total = ward_data.get('total_voters', 0)
for caste, count in sorted(ward_data.get('caste_data', {}).items(), key=lambda x: x[1], reverse=True):
pct = round(count / total * 100, 2) if total > 0 else 0
combined_data.append({
'Ward': ward_num,
'Caste/Ethnic Group': caste,
'Population': count,
'Percentage': pct
})
combined_df = pd.DataFrame(combined_data)
combined_df.to_excel(writer, index=False, sheet_name='All Wards Combined')
# 3. Municipality Total - Caste Summary with Chart
muni_caste_totals = {}
for wd in all_wards_data.values():
for caste, count in wd.get('caste_data', {}).items():
muni_caste_totals[caste] = muni_caste_totals.get(caste, 0) + count
muni_caste_df = pd.DataFrame([
{'S.N.': i+1, 'Caste/Ethnic Group': caste, 'Population': count,
'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0}
for i, (caste, count) in enumerate(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True))
])
muni_caste_df.to_excel(writer, index=False, sheet_name='Municipality Total')
# Add bar chart to Municipality Total sheet
ws_muni = writer.sheets['Municipality Total']
# Create bar chart for top 15 castes
chart_data_rows = min(16, len(muni_caste_df) + 1) # +1 for header
if chart_data_rows > 2:
bar_chart = BarChart()
bar_chart.type = "bar"
bar_chart.style = 10
bar_chart.title = f"Top Caste/Ethnic Groups - {selected_municipality}"
bar_chart.y_axis.title = "Caste/Ethnic Group"
bar_chart.x_axis.title = "Population"
data = Reference(ws_muni, min_col=3, min_row=1, max_row=chart_data_rows, max_col=3)
cats = Reference(ws_muni, min_col=2, min_row=2, max_row=chart_data_rows)
bar_chart.add_data(data, titles_from_data=True)
bar_chart.set_categories(cats)
bar_chart.shape = 4
bar_chart.width = 20
bar_chart.height = 12
ws_muni.add_chart(bar_chart, "F2")
# 4. Individual ward sheets with unmatched surnames detail
# Import SURNAME_TO_CASTE for matching info
from age_group_analysis import SURNAME_TO_CASTE
for ward_num in sorted(all_ward_numbers):
ward_data = all_wards_data.get(ward_num, {})
total = ward_data.get('total_voters', 0)
if ward_data.get('caste_data'):
ward_rows = []
for i, (caste, count) in enumerate(sorted(ward_data['caste_data'].items(), key=lambda x: x[1], reverse=True), 1):
pct = round(count / total * 100, 2) if total > 0 else 0
ward_rows.append({
'S.N.': i,
'Caste/Ethnic Group': caste,
'Population': count,
'Percentage': pct
})
ward_df = pd.DataFrame(ward_rows)
sheet_name = f"Ward {ward_num}"[:31] # Excel sheet name limit
ward_df.to_excel(writer, index=False, sheet_name=sheet_name)
# Get the worksheet to add more content
ws_ward = writer.sheets[sheet_name]
# Track the next available row
next_row = len(ward_rows) + 4 # Leave 2 empty rows after caste data
# ===== ALL SURNAMES SECTION =====
ward_all_surnames = ward_data.get('surname_counts', {})
if ward_all_surnames:
# Add section header for ALL surnames
ws_ward.cell(row=next_row, column=1, value="📝 All Surnames (Matched & Unmatched)")
# Add column headers
ws_ward.cell(row=next_row + 1, column=1, value="S.N.")
ws_ward.cell(row=next_row + 1, column=2, value="Surname")
ws_ward.cell(row=next_row + 1, column=3, value="Count")
ws_ward.cell(row=next_row + 1, column=4, value="Percentage")
ws_ward.cell(row=next_row + 1, column=5, value="Matched Caste")
# Add all surnames data sorted by count
sorted_all_surnames = sorted(ward_all_surnames.items(), key=lambda x: x[1], reverse=True)
for idx, (surname, count) in enumerate(sorted_all_surnames, 1):
pct = round(count / total * 100, 2) if total > 0 else 0
matched_caste = SURNAME_TO_CASTE.get(surname, '❌ NOT MATCHED')
ws_ward.cell(row=next_row + 1 + idx, column=1, value=idx)
ws_ward.cell(row=next_row + 1 + idx, column=2, value=surname)
ws_ward.cell(row=next_row + 1 + idx, column=3, value=count)
ws_ward.cell(row=next_row + 1 + idx, column=4, value=f"{pct}%")
ws_ward.cell(row=next_row + 1 + idx, column=5, value=matched_caste)
# Update next_row for unmatched section
next_row = next_row + len(sorted_all_surnames) + 4
# ===== UNMATCHED SURNAMES SECTION =====
ward_unmatched = ward_data.get('unmatched_surnames', {})
if ward_unmatched:
# Add section header
ws_ward.cell(row=next_row, column=1, value="❓ Unmatched Surnames Only (अन्य/अज्ञात)")
# Add column headers
ws_ward.cell(row=next_row + 1, column=1, value="S.N.")
ws_ward.cell(row=next_row + 1, column=2, value="Surname")
ws_ward.cell(row=next_row + 1, column=3, value="Count")
ws_ward.cell(row=next_row + 1, column=4, value="Percentage")
# Add unmatched surnames data sorted by count
sorted_unmatched = sorted(ward_unmatched.items(), key=lambda x: x[1], reverse=True)
for idx, (surname, count) in enumerate(sorted_unmatched, 1):
pct = round(count / total * 100, 2) if total > 0 else 0
ws_ward.cell(row=next_row + 1 + idx, column=1, value=idx)
ws_ward.cell(row=next_row + 1 + idx, column=2, value=surname)
ws_ward.cell(row=next_row + 1 + idx, column=3, value=count)
ws_ward.cell(row=next_row + 1 + idx, column=4, value=f"{pct}%")
# Add summary of unmatched
total_unmatched = sum(ward_unmatched.values())
summary_row = next_row + len(sorted_unmatched) + 3
ws_ward.cell(row=summary_row, column=1, value="Total Unmatched:")
ws_ward.cell(row=summary_row, column=2, value=f"{len(ward_unmatched)} surnames")
ws_ward.cell(row=summary_row, column=3, value=total_unmatched)
ws_ward.cell(row=summary_row, column=4, value=f"{round(total_unmatched/total*100, 2) if total > 0 else 0}%")
# Add pie chart to each ward sheet (for top 10)
chart_rows = min(11, len(ward_rows) + 1)
if chart_rows > 2:
pie_chart = PieChart()
pie_chart.title = f"Ward {ward_num} - Caste Distribution"
data = Reference(ws_ward, min_col=3, min_row=1, max_row=chart_rows)
cats = Reference(ws_ward, min_col=2, min_row=2, max_row=chart_rows)
pie_chart.add_data(data, titles_from_data=True)
pie_chart.set_categories(cats)
pie_chart.width = 14
pie_chart.height = 10
ws_ward.add_chart(pie_chart, "F2")
# 5. Unmatched Surnames Sheet
all_unmatched_combined = {}
ward_unmatched_detail = []
for ward_num in sorted(all_ward_numbers):
ward_data = all_wards_data.get(ward_num, {})
unmatched = ward_data.get('unmatched_surnames', {})
for surname, count in unmatched.items():
all_unmatched_combined[surname] = all_unmatched_combined.get(surname, 0) + count
ward_unmatched_detail.append({
'Ward': ward_num,
'Unmatched Surname': surname,
'Count': count
})
# Summary of unmatched surnames
if all_unmatched_combined:
unmatched_summary_df = pd.DataFrame([
{'S.N.': i+1, 'Unmatched Surname': surname, 'Total Count': count,
'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0}
for i, (surname, count) in enumerate(sorted(all_unmatched_combined.items(), key=lambda x: x[1], reverse=True))
])
unmatched_summary_df.to_excel(writer, index=False, sheet_name='Unmatched Surnames')
# Add chart for top unmatched surnames
ws_unmatched = writer.sheets['Unmatched Surnames']
chart_rows = min(21, len(unmatched_summary_df) + 1)
if chart_rows > 2:
bar_chart_unmatched = BarChart()
bar_chart_unmatched.type = "bar"
bar_chart_unmatched.style = 11
bar_chart_unmatched.title = "Top Unmatched Surnames"
bar_chart_unmatched.y_axis.title = "Surname"
bar_chart_unmatched.x_axis.title = "Count"
data = Reference(ws_unmatched, min_col=3, min_row=1, max_row=chart_rows, max_col=3)
cats = Reference(ws_unmatched, min_col=2, min_row=2, max_row=chart_rows)
bar_chart_unmatched.add_data(data, titles_from_data=True)
bar_chart_unmatched.set_categories(cats)
bar_chart_unmatched.width = 18
bar_chart_unmatched.height = 12
ws_unmatched.add_chart(bar_chart_unmatched, "F2")
# Ward-wise unmatched detail
if ward_unmatched_detail:
ward_unmatched_df = pd.DataFrame(ward_unmatched_detail)
ward_unmatched_df.to_excel(writer, index=False, sheet_name='Unmatched by Ward')
# 6. All Surnames Sheet (matched and unmatched)
all_surnames_combined = {}
for ward_num in sorted(all_ward_numbers):
ward_data = all_wards_data.get(ward_num, {})
for surname, count in ward_data.get('surname_counts', {}).items():
all_surnames_combined[surname] = all_surnames_combined.get(surname, 0) + count
if all_surnames_combined:
# Import SURNAME_TO_CASTE for matching
from age_group_analysis import SURNAME_TO_CASTE
all_surnames_df = pd.DataFrame([
{
'S.N.': i+1,
'Surname': surname,
'Count': count,
'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0,
'Matched Caste': SURNAME_TO_CASTE.get(surname, 'NOT MATCHED')
}
for i, (surname, count) in enumerate(sorted(all_surnames_combined.items(), key=lambda x: x[1], reverse=True))
])
all_surnames_df.to_excel(writer, index=False, sheet_name='All Surnames')
# 7. Metadata sheet
total_unmatched_voters = sum(all_unmatched_combined.values()) if all_unmatched_combined else 0
metadata = pd.DataFrame({
'Field': ['Province', 'District', 'Municipality', 'Total Wards', 'Total Voters',
'Total Caste Groups', 'Total Unique Surnames', 'Unmatched Surnames Count',
'Unmatched Voters Count', 'Match Rate (%)', 'Generated On'],
'Value': [
selected_province_age,
selected_district_age,
selected_municipality,
len(all_ward_numbers),
total_voters_all,
len(all_castes),
len(all_surnames_combined) if all_surnames_combined else 0,
len(all_unmatched_combined),
total_unmatched_voters,
round((total_voters_all - total_unmatched_voters) / total_voters_all * 100, 2) if total_voters_all > 0 else 0,
datetime.now().strftime("%Y-%m-%d %H:%M:%S")
]
})
metadata.to_excel(writer, index=False, sheet_name='Metadata')
# Download button
filename = f"{selected_municipality.replace(' ', '_')}_all_wards_caste_data.xlsx"
st.download_button(
label="📥 Download All Wards Caste Data (Excel with Charts)",
data=buffer.getvalue(),
file_name=filename,
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
use_container_width=True
)
# Show preview charts in Streamlit
st.markdown("---")
st.markdown("**📊 Preview Charts**")
# Municipality total caste distribution chart
muni_caste_totals = {}
for wd in all_wards_data.values():
for caste, count in wd.get('caste_data', {}).items():
muni_caste_totals[caste] = muni_caste_totals.get(caste, 0) + count
top_castes = dict(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True)[:15])
col1, col2 = st.columns(2)
with col1:
fig_bar = px.bar(
x=list(top_castes.values()),
y=list(top_castes.keys()),
orientation='h',
title=f"Top 15 Caste/Ethnic Groups - {selected_municipality}",
labels={'x': 'Population', 'y': 'Caste/Ethnic Group'}
)
fig_bar.update_layout(yaxis={'categoryorder': 'total ascending'}, height=500)
st.plotly_chart(fig_bar, use_container_width=True)
with col2:
# Pie chart for top 10
top_10 = dict(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True)[:10])
fig_pie = px.pie(
values=list(top_10.values()),
names=list(top_10.keys()),
title=f"Caste Distribution (Top 10) - {selected_municipality}"
)
fig_pie.update_layout(height=500)
st.plotly_chart(fig_pie, use_container_width=True)
# Show unmatched surnames preview
if all_unmatched:
st.markdown("**❓ Top Unmatched Surnames (Preview)**")
top_unmatched = dict(sorted(all_unmatched.items(), key=lambda x: x[1], reverse=True)[:10])
unmatched_preview_df = pd.DataFrame([
{'Surname': s, 'Count': c} for s, c in top_unmatched.items()
])
st.dataframe(unmatched_preview_df, use_container_width=True, hide_index=True)
# Also show a preview table
st.markdown("**Preview: Ward-wise Caste Distribution Summary**")
preview_df = summary_df.head(10)
st.dataframe(preview_df, use_container_width=True)
st.markdown("---")
# Analyze button
if st.button("🔍 Analyze Caste/Ethnic Distribution", type="primary", use_container_width=True):
with st.spinner("Analyzing caste/ethnic group distribution..."):
if municipality_id_caste:
try:
# Clear any previous cache to ensure fresh data
if 'caste_analysis_data_local' in st.session_state:
del st.session_state['caste_analysis_data_local']
if ward_data_available and selected_wards:
# Ward-specific analysis - convert to list of ints
ward_list = [int(w) for w in selected_wards]
st.info(f"Querying municipality {municipality_id_caste} with wards {ward_list}")
caste_data, total_analyzed, surname_counts, unmatched_surnames = get_ward_level_caste_data(municipality_id_caste, ward_list)
else:
# Full municipality analysis
st.info(f"Querying entire municipality {municipality_id_caste}")
caste_data, total_analyzed, surname_counts, unmatched_surnames = get_caste_data_for_municipality(municipality_id_caste)
if caste_data and total_analyzed > 0:
# Store in session state
st.session_state['caste_analysis_data_local'] = caste_data
st.session_state['caste_analysis_total_local'] = total_analyzed
st.session_state['caste_analysis_scope_local'] = analysis_level
st.session_state['surname_counts_local'] = surname_counts
st.session_state['unmatched_surnames_local'] = unmatched_surnames
st.success(f"✅ Analysis complete! Found {total_analyzed:,} voters across {len(caste_data)} caste/ethnic groups")
st.rerun()
else:
st.warning(f"⚠️ Query returned {total_analyzed} voters and {len(caste_data) if caste_data else 0} caste groups.")
except Exception as e:
st.error(f"Error during analysis: {str(e)}")
import traceback
st.code(traceback.format_exc())
else:
st.error(f"❌ Could not identify municipality ID for: {selected_municipality}")
# Display caste analysis results if available
if 'caste_analysis_data_local' in st.session_state and st.session_state['caste_analysis_data_local']:
caste_data = st.session_state['caste_analysis_data_local']
total_analyzed = st.session_state['caste_analysis_total_local']
analysis_scope_caste = st.session_state.get('caste_analysis_scope_local', 'Selected Area')
st.markdown("---")
st.markdown(f"### 📊 Caste/Ethnic Group Distribution Results")
st.info(f"**Analysis Scope:** {analysis_scope_caste} | **Total Voters Analyzed:** {total_analyzed:,}")
# Convert to DataFrame for visualization
caste_df = pd.DataFrame([
{'Caste/Ethnic Group': caste, 'Population': count, 'Percentage': round(count/total_analyzed*100, 2)}
for caste, count in sorted(caste_data.items(), key=lambda x: x[1], reverse=True)
])
# Display summary metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Groups", len(caste_data))
with col2:
largest_group = caste_df.iloc[0]
group_name = largest_group['Caste/Ethnic Group'].split('(')[0].strip()
st.metric("Largest Group", group_name[:20] + "..." if len(group_name) > 20 else group_name)
with col3:
st.metric("Population", f"{largest_group['Population']:,}")
with col4:
st.metric("Share", f"{largest_group['Percentage']:.1f}%")
# Visualizations in tabs
tab1, tab2, tab3, tab4, tab5 = st.tabs(["📊 Bar Chart", "🥧 Pie Chart", "📋 Data Table", "📝 All Surnames", "❓ Unmatched Surnames"])
with tab1:
# Bar chart - top groups
top_n = min(20, len(caste_df))
fig = px.bar(
caste_df.head(top_n),
x='Population',
y='Caste/Ethnic Group',
orientation='h',
title=f"Top {top_n} Caste/Ethnic Groups by Population",
color='Population',
color_continuous_scale='Viridis',
text='Population'
)
fig.update_traces(texttemplate='%{text:,}', textposition='outside')
fig.update_layout(
yaxis={'categoryorder':'total ascending'},
height=max(400, top_n * 25),
showlegend=False
)
st.plotly_chart(fig, use_container_width=True)
with tab2:
# Pie chart - top 10 + others
if len(caste_df) > 10:
top_10 = caste_df.head(10).copy()
others_count = caste_df.iloc[10:]['Population'].sum()
others_row = pd.DataFrame([{
'Caste/Ethnic Group': 'Others (Combined)',
'Population': others_count,
'Percentage': round(others_count/total_analyzed*100, 2)
}])
pie_data = pd.concat([top_10, others_row], ignore_index=True)
else:
pie_data = caste_df
fig = px.pie(
pie_data,
values='Population',
names='Caste/Ethnic Group',
title="Caste/Ethnic Group Distribution",
color_discrete_sequence=px.colors.qualitative.Set3
)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)
with tab3:
# Detailed table with serial numbers
caste_df_display = caste_df.copy()
caste_df_display.insert(0, 'S.N.', range(1, len(caste_df_display) + 1))
st.markdown(f"**Complete List - Serially Ordered by Population**")
st.dataframe(
caste_df_display.style.format({
'Population': '{:,}',
'Percentage': '{:.2f}%'
}).background_gradient(subset=['Population'], cmap='YlOrRd'),
use_container_width=True,
height=500
)
# Download buttons
col1, col2 = st.columns(2)
with col1:
csv = caste_df_display.to_csv(index=False, encoding='utf-8-sig')
st.download_button(
label="📥 Download as CSV",
data=csv,
file_name=f"caste_distribution_{selected_municipality.replace(' ', '_')}.csv",
mime="text/csv",
use_container_width=True
)
with col2:
from io import BytesIO
buffer = BytesIO()
with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
caste_df_display.to_excel(writer, index=False, sheet_name='Caste Distribution')
st.download_button(
label="📊 Download as Excel",
data=buffer.getvalue(),
file_name=f"caste_distribution_{selected_municipality.replace(' ', '_')}.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
use_container_width=True
)
with tab4:
# All surnames found
st.markdown("**All Surnames Found in the Data**")
st.info("This shows every surname detected, sorted by frequency")
if 'surname_counts_local' in st.session_state:
surname_counts = st.session_state['surname_counts_local']
surname_df = pd.DataFrame([
{'S.N.': i+1, 'Surname': surname, 'Count': count, 'Percentage': round(count/total_analyzed*100, 2)}
for i, (surname, count) in enumerate(sorted(surname_counts.items(), key=lambda x: x[1], reverse=True))
])
st.metric("Total Unique Surnames", len(surname_df))
st.dataframe(
surname_df.style.format({
'Count': '{:,}',
'Percentage': '{:.2f}%'
}).background_gradient(subset=['Count'], cmap='Blues'),
use_container_width=True,
height=500
)
# Download buttons
col1, col2 = st.columns(2)
with col1:
csv_surnames = surname_df.to_csv(index=False, encoding='utf-8-sig')
st.download_button(
label="📥 Download as CSV",
data=csv_surnames,
file_name=f"all_surnames_{selected_municipality.replace(' ', '_')}.csv",
mime="text/csv",
use_container_width=True
)
with col2:
from io import BytesIO
buffer = BytesIO()
with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
surname_df.to_excel(writer, index=False, sheet_name='All Surnames')
st.download_button(
label="📊 Download as Excel",
data=buffer.getvalue(),
file_name=f"all_surnames_{selected_municipality.replace(' ', '_')}.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
use_container_width=True
)
with tab5:
# Unmatched surnames
st.markdown("**Surnames NOT in Caste Database**")
st.warning("These surnames were found but don't match any caste/ethnic group in the database")
if 'unmatched_surnames_local' in st.session_state:
unmatched = st.session_state['unmatched_surnames_local']
if unmatched:
unmatched_df = pd.DataFrame([
{'S.N.': i+1, 'Surname': surname, 'Count': count, 'Percentage': round(count/total_analyzed*100, 2)}
for i, (surname, count) in enumerate(sorted(unmatched.items(), key=lambda x: x[1], reverse=True))
])
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Unmatched Surnames", len(unmatched_df))
with col2:
total_unmatched = unmatched_df['Count'].sum()
st.metric("Total Voters", f"{total_unmatched:,}")
with col3:
unmatched_pct = (total_unmatched / total_analyzed * 100)
st.metric("% of Total", f"{unmatched_pct:.1f}%")
st.markdown("**Top 50 Unmatched Surnames**")
st.dataframe(
unmatched_df.head(50).style.format({
'Count': '{:,}',
'Percentage': '{:.2f}%'
}).background_gradient(subset=['Count'], cmap='Oranges'),
use_container_width=True,
height=500
)
# Download buttons
col1, col2 = st.columns(2)
with col1:
csv_unmatched = unmatched_df.to_csv(index=False, encoding='utf-8-sig')
st.download_button(
label="📥 Download as CSV",
data=csv_unmatched,
file_name=f"unmatched_surnames_{selected_municipality.replace(' ', '_')}.csv",
mime="text/csv",
use_container_width=True
)
with col2:
from io import BytesIO
buffer = BytesIO()
with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
unmatched_df.to_excel(writer, index=False, sheet_name='Unmatched Surnames')
st.download_button(
label="📊 Download as Excel",
data=buffer.getvalue(),
file_name=f"unmatched_surnames_{selected_municipality.replace(' ', '_')}.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
use_container_width=True
)
else:
st.success("✅ All surnames matched successfully!")
st.markdown("---")
st.subheader("📊 Age Group Analysis")
if not muni_new.empty:
# Calculate metrics
total_new = muni_new['count'].sum()
total_old = muni_old['count'].sum()
change = total_new - total_old
change_pct = (change / total_old * 100) if total_old > 0 else 0
# Metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Voters (Current)", f"{total_new:,}", f"{change:+,}")
with col2:
male_new = muni_new[muni_new['gender'] == 'पुरुष']['count'].sum()
st.metric("Male Voters", f"{male_new:,}")
with col3:
female_new = muni_new[muni_new['gender'] == 'महिला']['count'].sum()
st.metric("Female Voters", f"{female_new:,}")
with col4:
st.metric("Growth Rate", f"{change_pct:+.2f}%")
# Visualizations
col1, col2 = st.columns(2)
with col1:
st.markdown("**Age Distribution (Current)**")
# Group by age
age_grouped = muni_new.groupby('age_group')['count'].sum().reset_index()
title_text = f"Age Distribution in {selected_municipality}"
if ward_data_available and selected_wards:
if len(selected_wards) == 1:
title_text = f"Age Distribution - Ward {selected_wards[0]}"
else:
title_text = f"Age Distribution - {len(selected_wards)} Wards"
fig = px.pie(
age_grouped,
values='count',
names='age_group',
title=title_text,
color_discrete_sequence=px.colors.qualitative.Set3
)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(height=350)
st.plotly_chart(fig, use_container_width=True)
with col2:
st.markdown("**Gender Distribution by Age**")
gender_data = muni_new[muni_new['gender'].isin(['पुरुष', 'महिला'])]
gender_grouped = gender_data.groupby(['age_group', 'gender'])['count'].sum().reset_index()
title_text_gender = f"Gender by Age in {selected_municipality}"
if ward_data_available and selected_wards:
if len(selected_wards) == 1:
title_text_gender = f"Gender by Age - Ward {selected_wards[0]}"
else:
title_text_gender = f"Gender by Age - {len(selected_wards)} Wards"
fig = px.bar(
gender_grouped,
x='age_group',
y='count',
color='gender',
title=title_text_gender,
color_discrete_map={'पुरुष': '#3B82F6', 'महिला': '#EC4899'}
)
fig.update_layout(yaxis_tickformat=',', height=350)
st.plotly_chart(fig, use_container_width=True)
# Comparison with previous election
st.markdown("**📊 Age Group Comparison: Previous vs Current**")
col1, col2 = st.columns(2)
with col1:
# Old vs New comparison
age_old = muni_old.groupby('age_group')['count'].sum().reset_index()
age_old.columns = ['age_group', 'old_count']
age_new = muni_new.groupby('age_group')['count'].sum().reset_index()
age_new.columns = ['age_group', 'new_count']
age_comparison = pd.merge(age_old, age_new, on='age_group', how='outer').fillna(0)
age_comparison['change'] = age_comparison['new_count'] - age_comparison['old_count']
age_comparison['change_pct'] = (age_comparison['change'] / age_comparison['old_count'] * 100).round(2)
age_comparison['change_pct'] = age_comparison['change_pct'].replace([np.inf, -np.inf], 0)
fig = px.bar(
age_comparison,
x='age_group',
y=['old_count', 'new_count'],
barmode='group',
title="Voter Count Comparison",
labels={'value': 'Voters', 'variable': 'Period'},
color_discrete_map={'old_count': '#F59E0B', 'new_count': '#10B981'}
)
fig.update_layout(yaxis_tickformat=',', height=350)
st.plotly_chart(fig, use_container_width=True)
with col2:
# Growth rate by age group
fig = px.bar(
age_comparison,
x='age_group',
y='change_pct',
title="Growth Rate by Age Group (%)",
color='change_pct',
color_continuous_scale='RdYlGn',
labels={'age_group': 'Age Group', 'change_pct': 'Growth (%)'},
color_continuous_midpoint=0
)
fig.update_layout(height=350)
st.plotly_chart(fig, use_container_width=True)
# Comparison table
st.markdown("**Age Group Statistics Table**")
age_comparison_display = age_comparison[['age_group', 'old_count', 'new_count', 'change', 'change_pct']].copy()
age_comparison_display.columns = ['Age Group', 'Previous', 'Current', 'Change', 'Growth %']
st.dataframe(
age_comparison_display.style.format({
'Previous': '{:,.0f}',
'Current': '{:,.0f}',
'Change': '{:+,.0f}',
'Growth %': '{:+.2f}'
}),
use_container_width=True
)
# Ward-by-ward comparison if multiple wards selected
if ward_data_available and selected_wards and len(selected_wards) > 1:
st.markdown("---")
st.markdown("**📊 Ward-by-Ward Comparison**")
# Prepare ward comparison data
ward_comparison_list = []
for ward_num in selected_wards:
ward_data_new = ward_new_filtered[ward_new_filtered['ward_number'] == ward_num]
ward_data_old = ward_old_filtered[ward_old_filtered['ward_number'] == ward_num]
total_new_ward = ward_data_new['count'].sum()
total_old_ward = ward_data_old['count'].sum()
change_ward = total_new_ward - total_old_ward
change_pct_ward = (change_ward / total_old_ward * 100) if total_old_ward > 0 else 0
male_ward = ward_data_new[ward_data_new['gender'] == 'पुरुष']['count'].sum()
female_ward = ward_data_new[ward_data_new['gender'] == 'महिला']['count'].sum()
ward_comparison_list.append({
'Ward': f"Ward {ward_num}",
'Previous': total_old_ward,
'Current': total_new_ward,
'Change': change_ward,
'Growth %': change_pct_ward,
'Male': male_ward,
'Female': female_ward
})
ward_comparison_df = pd.DataFrame(ward_comparison_list)
col1, col2 = st.columns(2)
with col1:
# Ward comparison bar chart
fig = px.bar(
ward_comparison_df,
x='Ward',
y='Current',
title="Current Voters by Ward",
color='Current',
color_continuous_scale='Viridis'
)
fig.update_layout(yaxis_tickformat=',', showlegend=False)
st.plotly_chart(fig, use_container_width=True)
with col2:
# Growth rate comparison
fig = px.bar(
ward_comparison_df,
x='Ward',
y='Growth %',
title="Growth Rate by Ward",
color='Growth %',
color_continuous_scale='RdYlGn',
color_continuous_midpoint=0
)
st.plotly_chart(fig, use_container_width=True)
# Ward comparison table
st.dataframe(
ward_comparison_df.style.format({
'Previous': '{:,.0f}',
'Current': '{:,.0f}',
'Change': '{:+,.0f}',
'Growth %': '{:+.2f}%',
'Male': '{:,.0f}',
'Female': '{:,.0f}'
}).background_gradient(subset=['Growth %'], cmap='RdYlGn'),
use_container_width=True
)
else:
st.warning(f"No age group data available for {selected_municipality}")
def show_comparative_study(old_province, new_province, old_district, new_district,
old_municipality, new_municipality, old_age, new_age):
"""Comprehensive comparative study."""
st.header("📈 Comparative Study: Old vs Current Voter Data")
# Province comparison
prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')
# Summary statistics
st.subheader("📊 Summary Statistics")
col1, col2, col3 = st.columns(3)
old_total = old_province['total_voters'].sum()
new_total = new_province['total_voters'].sum()
with col1:
st.markdown("### Previous Election")
st.write(f"**Total Voters:** {old_total:,}")
st.write(f"**Male:** {old_province['male_voters'].sum():,}")
st.write(f"**Female:** {old_province['female_voters'].sum():,}")
st.write(f"**Avg Age:** {old_province['avg_age'].mean():.1f}")
with col2:
st.markdown("### Current Election")
st.write(f"**Total Voters:** {new_total:,}")
st.write(f"**Male:** {new_province['male_voters'].sum():,}")
st.write(f"**Female:** {new_province['female_voters'].sum():,}")
st.write(f"**Avg Age:** {new_province['avg_age'].mean():.1f}")
with col3:
st.markdown("### Change Analysis")
change = new_total - old_total
change_pct = (change / old_total) * 100
st.write(f"**Net Change:** {change:+,}")
st.write(f"**Growth Rate:** {change_pct:+.2f}%")
st.write(f"**Male Change:** {new_province['male_voters'].sum() - old_province['male_voters'].sum():+,}")
st.write(f"**Female Change:** {new_province['female_voters'].sum() - old_province['female_voters'].sum():+,}")
st.markdown("---")
# Visual comparisons
st.subheader("📉 Visual Comparison Analysis")
tab1, tab2, tab3, tab4 = st.tabs(["Province Level", "Gender Analysis", "Age Analysis", "Growth Trends"])
with tab1:
col1, col2 = st.columns(2)
with col1:
# Side by side comparison
fig = make_subplots(rows=1, cols=2, subplot_titles=("Previous Election", "Current Election"),
specs=[[{"type": "pie"}, {"type": "pie"}]])
fig.add_trace(
go.Pie(labels=old_province['province_name'], values=old_province['total_voters'], name="Previous"),
row=1, col=1
)
fig.add_trace(
go.Pie(labels=new_province['province_name'], values=new_province['total_voters'], name="Current"),
row=1, col=2
)
fig.update_layout(title_text="Province Distribution Comparison")
st.plotly_chart(fig, use_container_width=True)
with col2:
# Growth by province
fig = px.bar(
prov_comparison,
x='province_name',
y=['total_voters_old', 'total_voters_new'],
barmode='group',
title="Province-wise Voter Count Comparison",
labels={'value': 'Voters', 'variable': 'Period'}
)
fig.update_layout(yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
with tab2:
col1, col2 = st.columns(2)
with col1:
# Gender ratio comparison
gender_comparison = pd.DataFrame({
'Province': prov_comparison['province_name'].tolist() * 2,
'Gender': ['Male'] * len(prov_comparison) + ['Female'] * len(prov_comparison),
'Previous': prov_comparison['male_voters_old'].tolist() + prov_comparison['female_voters_old'].tolist(),
'Current': prov_comparison['male_voters_new'].tolist() + prov_comparison['female_voters_new'].tolist()
})
fig = px.sunburst(
gender_comparison,
path=['Gender', 'Province'],
values='Current',
title="Gender Distribution by Province (Current)"
)
st.plotly_chart(fig, use_container_width=True)
with col2:
# Gender ratio change
prov_comparison['male_ratio_old'] = prov_comparison['male_voters_old'] / prov_comparison['total_voters_old'] * 100
prov_comparison['male_ratio_new'] = prov_comparison['male_voters_new'] / prov_comparison['total_voters_new'] * 100
prov_comparison['ratio_change'] = prov_comparison['male_ratio_new'] - prov_comparison['male_ratio_old']
fig = px.bar(
prov_comparison,
x='province_name',
y='ratio_change',
color='ratio_change',
color_continuous_scale='RdYlGn',
title="Male Ratio Change by Province (%)"
)
st.plotly_chart(fig, use_container_width=True)
with tab3:
col1, col2 = st.columns(2)
with col1:
# Age group comparison
old_age_grouped = old_age.groupby('age_group')['count'].sum().reset_index()
old_age_grouped['period'] = 'Previous'
new_age_grouped = new_age.groupby('age_group')['count'].sum().reset_index()
new_age_grouped['period'] = 'Current'
age_combined = pd.concat([old_age_grouped, new_age_grouped])
fig = px.bar(
age_combined,
x='age_group',
y='count',
color='period',
barmode='group',
title="Age Distribution Comparison",
color_discrete_map={'Previous': '#3B82F6', 'Current': '#10B981'}
)
fig.update_layout(yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
with col2:
# Age change analysis
age_change = pd.merge(
old_age_grouped[['age_group', 'count']],
new_age_grouped[['age_group', 'count']],
on='age_group',
suffixes=('_old', '_new')
)
age_change['change'] = age_change['count_new'] - age_change['count_old']
age_change['change_pct'] = (age_change['change'] / age_change['count_old'] * 100).round(2)
fig = px.bar(
age_change,
x='age_group',
y='change_pct',
color='change_pct',
color_continuous_scale='RdYlGn',
title="Voter Change by Age Group (%)"
)
st.plotly_chart(fig, use_container_width=True)
with tab4:
# Growth analysis
col1, col2 = st.columns(2)
with col1:
fig = px.scatter(
prov_comparison,
x='total_voters_old',
y='total_voters_new',
size='voter_change',
color='voter_change_pct',
hover_name='province_name',
title="Voter Growth Scatter Plot",
labels={'total_voters_old': 'Previous Voters', 'total_voters_new': 'Current Voters'},
color_continuous_scale='RdYlGn'
)
# Add diagonal line (no change line)
max_val = max(prov_comparison['total_voters_new'].max(), prov_comparison['total_voters_old'].max())
fig.add_trace(go.Scatter(x=[0, max_val], y=[0, max_val], mode='lines',
name='No Change Line', line=dict(dash='dash', color='gray')))
fig.update_layout(xaxis_tickformat=',', yaxis_tickformat=',')
st.plotly_chart(fig, use_container_width=True)
with col2:
# Growth rate ranking
sorted_growth = prov_comparison.sort_values('voter_change_pct', ascending=True)
fig = px.bar(
sorted_growth,
y='province_name',
x='voter_change_pct',
orientation='h',
color='voter_change_pct',
color_continuous_scale='RdYlGn',
title="Growth Rate by Province (%)"
)
st.plotly_chart(fig, use_container_width=True)
def show_detailed_reports(old_province, new_province, old_district, new_district,
old_municipality, new_municipality, old_overall, new_overall):
"""Generate detailed reports."""
st.header("📋 Detailed Reports & Export")
report_type = st.selectbox(
"Select Report Type",
["Executive Summary", "Province Report", "District Report", "Full Data Export"]
)
if report_type == "Executive Summary":
st.subheader("📑 Executive Summary Report")
old_total = int(old_overall['total_voters'].iloc[0])
new_total = int(new_overall['total_voters'].iloc[0])
change = new_total - old_total
change_pct = (change / old_total) * 100
st.markdown(f"""
## Nepal Election Voter Data Analysis Report
### Executive Summary
**Report Date:** January 11, 2026
---
### Key Findings
1. **Total Voter Base Growth**
- Previous Election: **{old_total:,}** voters
- Current Election: **{new_total:,}** voters
- Net Increase: **{change:+,}** voters ({change_pct:+.2f}%)
2. **Gender Distribution**
- Male Voters (Current): **{int(new_overall['male_voters'].iloc[0]):,}**
- Female Voters (Current): **{int(new_overall['female_voters'].iloc[0]):,}**
- Gender Ratio: **{(new_overall['male_voters'].iloc[0]/new_overall['female_voters'].iloc[0]):.2f}** M:F
3. **Age Demographics**
- Average Voter Age: **{new_overall['avg_age'].iloc[0]:.1f}** years
- Age Range: **{int(new_overall['min_age'].iloc[0])}** - **{int(new_overall['max_age'].iloc[0])}** years
4. **Administrative Coverage**
- Provinces: **7**
- Districts: **77**
- Local Levels: **753**
---
### Provincial Highlights
| Province | Previous Voters | Current Voters | Change | Growth % |
|----------|-----------------|----------------|--------|----------|
""")
prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')
for _, row in prov_comparison.iterrows():
st.markdown(f"| {row['province_name']} | {int(row['total_voters_old']):,} | {int(row['total_voters_new']):,} | {int(row['voter_change']):+,} | {row['voter_change_pct']:+.2f}% |")
st.markdown("""
---
### Conclusions
The voter registration data shows a healthy growth in the electoral base, with all provinces
showing positive growth. This indicates successful voter registration drives and natural
population growth reaching voting age.
The gender balance has slightly improved with female participation showing steady growth.
The average age of voters remains stable, suggesting a balanced demographic distribution.
""")
elif report_type == "Province Report":
st.subheader("📊 Province-wise Detailed Report")
prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')
st.dataframe(
prov_comparison.style.format({
'total_voters_old': '{:,.0f}',
'total_voters_new': '{:,.0f}',
'voter_change': '{:+,.0f}',
'voter_change_pct': '{:+.2f}%',
'male_voters_old': '{:,.0f}',
'male_voters_new': '{:,.0f}',
'female_voters_old': '{:,.0f}',
'female_voters_new': '{:,.0f}'
}),
use_container_width=True
)
# Download button
csv = prov_comparison.to_csv(index=False)
st.download_button(
label="📥 Download Province Report (CSV)",
data=csv,
file_name="province_voter_report.csv",
mime="text/csv"
)
elif report_type == "District Report":
st.subheader("🏛️ District-wise Detailed Report")
dist_comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name')
st.dataframe(
dist_comparison.style.format({
'total_voters_old': '{:,.0f}',
'total_voters_new': '{:,.0f}',
'voter_change': '{:+,.0f}',
'voter_change_pct': '{:+.2f}%'
}),
use_container_width=True,
height=500
)
csv = dist_comparison.to_csv(index=False)
st.download_button(
label="📥 Download District Report (CSV)",
data=csv,
file_name="district_voter_report.csv",
mime="text/csv"
)
elif report_type == "Full Data Export":
st.subheader("📦 Full Data Export")
col1, col2, col3 = st.columns(3)
with col1:
prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name')
st.download_button(
label="📥 Province Data",
data=prov_comparison.to_csv(index=False),
file_name="province_data.csv",
mime="text/csv"
)
with col2:
dist_comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name')
st.download_button(
label="📥 District Data",
data=dist_comparison.to_csv(index=False),
file_name="district_data.csv",
mime="text/csv"
)
with col3:
muni_comparison = create_comparative_dataframe(old_municipality, new_municipality, 'municipality_id', 'municipality_name')
st.download_button(
label="📥 Local Level Data",
data=muni_comparison.to_csv(index=False),
file_name="local_level_data.csv",
mime="text/csv"
)
if __name__ == "__main__":
app_entry()