Spaces:
Paused
Paused
| """ | |
| Nepal Election Analysis - GIS Visualization & Comparative Voter Study | |
| ===================================================================== | |
| A comprehensive application for analyzing voter data changes between two election periods | |
| with interactive GIS maps and statistical visualizations. | |
| Author: Data Analysis Project | |
| Date: January 2026 | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import geopandas as gpd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| import sqlite3 | |
| import json | |
| import folium | |
| from streamlit_folium import st_folium | |
| import os | |
| from io import BytesIO | |
| from datetime import datetime | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Nepal Election Analysis", | |
| page_icon="🗳️", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS for better styling | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 2.5rem; | |
| font-weight: bold; | |
| color: #1E3A8A; | |
| text-align: center; | |
| margin-bottom: 1rem; | |
| } | |
| .sub-header { | |
| font-size: 1.5rem; | |
| color: #374151; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .metric-card { | |
| background-color: #F3F4F6; | |
| padding: 1rem; | |
| border-radius: 0.5rem; | |
| text-align: center; | |
| } | |
| .positive-change { | |
| color: #059669; | |
| font-weight: bold; | |
| } | |
| .negative-change { | |
| color: #DC2626; | |
| font-weight: bold; | |
| } | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 24px; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| height: 50px; | |
| padding-left: 20px; | |
| padding-right: 20px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ── Login & User Tracking ──────────────────────────────────────────────────── | |
| LOGIN_TRACKING_FILE = "data/login_tracking.json" | |
| def init_login_tracking_file(): | |
| """Ensure the login tracking JSON file exists.""" | |
| os.makedirs(os.path.dirname(LOGIN_TRACKING_FILE), exist_ok=True) | |
| if not os.path.exists(LOGIN_TRACKING_FILE): | |
| with open(LOGIN_TRACKING_FILE, "w") as f: | |
| json.dump([], f) | |
| def record_login(name: str, location: str): | |
| """Append a login record to the tracking file.""" | |
| init_login_tracking_file() | |
| with open(LOGIN_TRACKING_FILE, "r") as f: | |
| records = json.load(f) | |
| records.append({ | |
| "name": name, | |
| "location": location, | |
| "login_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| }) | |
| with open(LOGIN_TRACKING_FILE, "w") as f: | |
| json.dump(records, f, indent=2) | |
| def load_login_records(): | |
| """Read all login records.""" | |
| init_login_tracking_file() | |
| with open(LOGIN_TRACKING_FILE, "r") as f: | |
| return json.load(f) | |
| def show_login_page(): | |
| """Render the login form and validate credentials.""" | |
| st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Please log in to continue</p>', unsafe_allow_html=True) | |
| with st.form("login_form"): | |
| username = st.text_input("Username") | |
| password = st.text_input("Password", type="password") | |
| submitted = st.form_submit_button("Login") | |
| if submitted: | |
| try: | |
| admin_pw = st.secrets["ADMIN_PASSWORD"] | |
| except Exception: | |
| admin_pw = os.environ.get("ADMIN_PASSWORD", "") | |
| try: | |
| user_pw = st.secrets["USER_PASSWORD"] | |
| except Exception: | |
| user_pw = os.environ.get("USER_PASSWORD", "") | |
| if username == "limsim" and password == admin_pw: | |
| st.session_state["authenticated"] = True | |
| st.session_state["username"] = "limsim" | |
| st.session_state["is_admin"] = True | |
| st.session_state["needs_user_info"] = False | |
| st.session_state["user_display_name"] = "Admin (limsim)" | |
| st.session_state["user_location"] = "" | |
| st.session_state["login_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| record_login("Admin (limsim)", "—") | |
| st.rerun() | |
| elif username == "user_nepal" and password == user_pw: | |
| st.session_state["authenticated"] = True | |
| st.session_state["username"] = "user_nepal" | |
| st.session_state["is_admin"] = False | |
| st.session_state["needs_user_info"] = True | |
| st.rerun() | |
| else: | |
| st.error("Invalid username or password.") | |
| def show_user_info_page(): | |
| """Collect name and location from user_nepal before granting full access.""" | |
| st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Tell us about yourself</p>', unsafe_allow_html=True) | |
| with st.form("user_info_form"): | |
| name = st.text_input("Your Name") | |
| location = st.text_input("Your Location") | |
| submitted = st.form_submit_button("Continue") | |
| if submitted: | |
| if not name.strip() or not location.strip(): | |
| st.warning("Please fill in both fields.") | |
| else: | |
| st.session_state["needs_user_info"] = False | |
| st.session_state["user_display_name"] = name.strip() | |
| st.session_state["user_location"] = location.strip() | |
| st.session_state["login_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| record_login(name.strip(), location.strip()) | |
| st.rerun() | |
| def show_admin_dashboard(): | |
| """Admin-only dashboard showing login metrics and records.""" | |
| st.header("🔐 Admin: Login Records") | |
| records = load_login_records() | |
| if not records: | |
| st.info("No login records yet.") | |
| return | |
| df = pd.DataFrame(records) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Total Logins", len(df)) | |
| with col2: | |
| st.metric("Unique Users", df["name"].nunique()) | |
| st.subheader("Login History") | |
| st.dataframe( | |
| df.sort_values("login_time", ascending=False).reset_index(drop=True), | |
| use_container_width=True, | |
| ) | |
| csv_data = df.to_csv(index=False) | |
| st.download_button( | |
| label="📥 Download CSV", | |
| data=csv_data, | |
| file_name="login_records.csv", | |
| mime="text/csv", | |
| ) | |
| def app_entry(): | |
| """Gate: unauthenticated → login, needs info → info page, else → main().""" | |
| if not st.session_state.get("authenticated", False): | |
| show_login_page() | |
| return | |
| if st.session_state.get("needs_user_info", False): | |
| show_user_info_page() | |
| return | |
| main() | |
| def load_shapefiles(): | |
| """Load all Nepal shapefiles for provinces, districts, and local levels.""" | |
| base_path = "data/gis/raw/NPL_new_wgs" | |
| # Load country boundary | |
| country_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_0.shp") | |
| # Load provinces (7 provinces) | |
| provinces_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_1.shp") | |
| # Load districts (77 districts) | |
| districts_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_2.shp") | |
| # Load local levels (municipalities) | |
| local_gdf = gpd.read_file(f"{base_path}/hermes_NPL_new_wgs_3.shp") | |
| return country_gdf, provinces_gdf, districts_gdf, local_gdf | |
| def load_json_data(): | |
| """Load administrative data from JSON files.""" | |
| base_path = "data/admin/web_data" | |
| with open(f"{base_path}/provinces.json", "r", encoding="utf-8") as f: | |
| provinces_data = json.load(f) | |
| with open(f"{base_path}/districts.json", "r", encoding="utf-8") as f: | |
| districts_data = json.load(f) | |
| with open(f"{base_path}/municipalities.json", "r", encoding="utf-8") as f: | |
| municipalities_data = json.load(f) | |
| return provinces_data, districts_data, municipalities_data | |
| def load_cached_stats(): | |
| """Load pre-computed statistics from JSON files for instant loading.""" | |
| with open("data/cached_stats/old_stats.json", "r", encoding="utf-8") as f: | |
| old_stats = json.load(f) | |
| with open("data/cached_stats/new_stats.json", "r", encoding="utf-8") as f: | |
| new_stats = json.load(f) | |
| # Convert to DataFrames | |
| old_province = pd.DataFrame(old_stats['provinces']) | |
| old_district = pd.DataFrame(old_stats['districts']) | |
| old_municipality = pd.DataFrame(old_stats['municipalities']) | |
| old_overall = pd.DataFrame([old_stats['overall']]) | |
| old_age = pd.DataFrame(old_stats['age_distribution']) | |
| new_province = pd.DataFrame(new_stats['provinces']) | |
| new_district = pd.DataFrame(new_stats['districts']) | |
| new_municipality = pd.DataFrame(new_stats['municipalities']) | |
| new_overall = pd.DataFrame([new_stats['overall']]) | |
| new_age = pd.DataFrame(new_stats['age_distribution']) | |
| return (old_province, old_district, old_municipality, old_overall, old_age, | |
| new_province, new_district, new_municipality, new_overall, new_age) | |
| def get_database_stats_fast(db_path, provinces_json, districts_json, municipalities_json): | |
| """Get statistics using pre-aggregated JSON data for speed.""" | |
| # Convert JSON data to DataFrames (already aggregated - no need for heavy DB queries) | |
| province_stats = pd.DataFrame([ | |
| { | |
| 'province_id': int(k), | |
| 'province_name': v['name'], | |
| 'total_voters': v['total_voters'], | |
| 'male_voters': v['male'], | |
| 'female_voters': v['female'], | |
| 'avg_age': v['avg_age'], | |
| 'districts_count': v['districts_count'], | |
| 'municipalities_count': v['municipalities_count'] | |
| } | |
| for k, v in provinces_json.items() | |
| ]) | |
| district_stats = pd.DataFrame([ | |
| { | |
| 'district_id': int(k), | |
| 'district_name': v['name'], | |
| 'province_id': v['province_id'], | |
| 'total_voters': v['total_voters'], | |
| 'male_voters': v['male'], | |
| 'female_voters': v['female'], | |
| 'avg_age': v['avg_age'], | |
| 'municipalities_count': v['municipalities_count'] | |
| } | |
| for k, v in districts_json.items() | |
| ]) | |
| # Add province names to districts | |
| prov_name_map = {int(k): v['name'] for k, v in provinces_json.items()} | |
| district_stats['province_name'] = district_stats['province_id'].map(prov_name_map) | |
| municipality_stats = pd.DataFrame([ | |
| { | |
| 'municipality_id': int(k), | |
| 'municipality_name': v['name'], | |
| 'district_id': v['district_id'], | |
| 'total_voters': v['total_voters'], | |
| 'male_voters': v['male'], | |
| 'female_voters': v['female'], | |
| 'avg_age': v['avg_age'] | |
| } | |
| for k, v in municipalities_json.items() | |
| ]) | |
| # Add district names to municipalities | |
| dist_name_map = {int(k): v['name'] for k, v in districts_json.items()} | |
| dist_prov_map = {int(k): v['province_id'] for k, v in districts_json.items()} | |
| municipality_stats['district_name'] = municipality_stats['district_id'].map(dist_name_map) | |
| municipality_stats['province_id'] = municipality_stats['district_id'].map(dist_prov_map) | |
| # Calculate overall stats from province data | |
| overall_stats = pd.DataFrame([{ | |
| 'total_voters': province_stats['total_voters'].sum(), | |
| 'male_voters': province_stats['male_voters'].sum(), | |
| 'female_voters': province_stats['female_voters'].sum(), | |
| 'avg_age': province_stats['avg_age'].mean(), | |
| 'min_age': 18, | |
| 'max_age': 105 | |
| }]) | |
| # Age distribution - estimate from avg_age (or use simple query if needed) | |
| # Using estimated distribution based on typical voter demographics | |
| total = overall_stats['total_voters'].iloc[0] | |
| age_distribution = pd.DataFrame([ | |
| {'age_group': '18-25', 'count': int(total * 0.15), 'gender': 'पुरुष'}, | |
| {'age_group': '18-25', 'count': int(total * 0.14), 'gender': 'महिला'}, | |
| {'age_group': '26-35', 'count': int(total * 0.18), 'gender': 'पुरुष'}, | |
| {'age_group': '26-35', 'count': int(total * 0.17), 'gender': 'महिला'}, | |
| {'age_group': '36-45', 'count': int(total * 0.14), 'gender': 'पुरुष'}, | |
| {'age_group': '36-45', 'count': int(total * 0.13), 'gender': 'महिला'}, | |
| {'age_group': '46-55', 'count': int(total * 0.10), 'gender': 'पुरुष'}, | |
| {'age_group': '46-55', 'count': int(total * 0.09), 'gender': 'महिला'}, | |
| {'age_group': '56-65', 'count': int(total * 0.06), 'gender': 'पुरुष'}, | |
| {'age_group': '56-65', 'count': int(total * 0.05), 'gender': 'महिला'}, | |
| {'age_group': '65+', 'count': int(total * 0.04), 'gender': 'पुरुष'}, | |
| {'age_group': '65+', 'count': int(total * 0.03), 'gender': 'महिला'}, | |
| ]) | |
| return province_stats, district_stats, municipality_stats, overall_stats, age_distribution | |
| def get_age_distribution_from_db(db_path): | |
| """Get actual age distribution from database - separate cached query.""" | |
| conn = sqlite3.connect(db_path) | |
| age_dist_query = """ | |
| SELECT | |
| CASE | |
| WHEN age BETWEEN 18 AND 25 THEN '18-25' | |
| WHEN age BETWEEN 26 AND 35 THEN '26-35' | |
| WHEN age BETWEEN 36 AND 45 THEN '36-45' | |
| WHEN age BETWEEN 46 AND 55 THEN '46-55' | |
| WHEN age BETWEEN 56 AND 65 THEN '56-65' | |
| WHEN age > 65 THEN '65+' | |
| ELSE 'Unknown' | |
| END as age_group, | |
| COUNT(*) as count, | |
| gender | |
| FROM voters | |
| GROUP BY age_group, gender | |
| ORDER BY age_group | |
| """ | |
| age_distribution = pd.read_sql_query(age_dist_query, conn) | |
| conn.close() | |
| return age_distribution | |
| def get_old_database_stats_fast(db_path): | |
| """Get statistics from old database using optimized queries.""" | |
| conn = sqlite3.connect(db_path) | |
| # Get province-level aggregated stats with a simpler approach | |
| # First get voter counts per booth | |
| province_query = """ | |
| SELECT | |
| p.id as province_id, | |
| p.name_np as province_name, | |
| COUNT(v.id) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age | |
| FROM voters v | |
| JOIN voting_booths vb ON v.booth_id = vb.id | |
| JOIN wards w ON vb.ward_id = w.id | |
| JOIN municipalities m ON w.municipality_id = m.id | |
| JOIN districts d ON m.district_id = d.id | |
| JOIN provinces p ON d.province_id = p.id | |
| GROUP BY p.id, p.name_np | |
| ORDER BY p.id | |
| """ | |
| # Use a faster approach - get total counts first | |
| total_query = """ | |
| SELECT COUNT(*) as total_voters, | |
| SUM(CASE WHEN gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(age) as avg_age | |
| FROM voters | |
| """ | |
| overall_stats = pd.read_sql_query(total_query, conn) | |
| overall_stats['min_age'] = 18 | |
| overall_stats['max_age'] = 105 | |
| # Get province stats using indexed booth_id | |
| province_query_fast = """ | |
| WITH booth_province AS ( | |
| SELECT vb.id as booth_id, p.id as province_id, p.name_np as province_name | |
| FROM voting_booths vb | |
| JOIN wards w ON vb.ward_id = w.id | |
| JOIN municipalities m ON w.municipality_id = m.id | |
| JOIN districts d ON m.district_id = d.id | |
| JOIN provinces p ON d.province_id = p.id | |
| ) | |
| SELECT | |
| bp.province_id, | |
| bp.province_name, | |
| COUNT(v.id) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age | |
| FROM voters v | |
| JOIN booth_province bp ON v.booth_id = bp.booth_id | |
| GROUP BY bp.province_id, bp.province_name | |
| ORDER BY bp.province_id | |
| """ | |
| province_stats = pd.read_sql_query(province_query_fast, conn) | |
| # Get district stats | |
| district_query_fast = """ | |
| WITH booth_district AS ( | |
| SELECT vb.id as booth_id, d.id as district_id, d.name_np as district_name, | |
| d.province_id, p.name_np as province_name | |
| FROM voting_booths vb | |
| JOIN wards w ON vb.ward_id = w.id | |
| JOIN municipalities m ON w.municipality_id = m.id | |
| JOIN districts d ON m.district_id = d.id | |
| JOIN provinces p ON d.province_id = p.id | |
| ) | |
| SELECT | |
| bd.district_id, | |
| bd.district_name, | |
| bd.province_id, | |
| bd.province_name, | |
| COUNT(v.id) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age | |
| FROM voters v | |
| JOIN booth_district bd ON v.booth_id = bd.booth_id | |
| GROUP BY bd.district_id, bd.district_name, bd.province_id, bd.province_name | |
| ORDER BY bd.province_id, bd.district_id | |
| """ | |
| district_stats = pd.read_sql_query(district_query_fast, conn) | |
| # Get municipality stats | |
| municipality_query_fast = """ | |
| WITH booth_muni AS ( | |
| SELECT vb.id as booth_id, m.id as municipality_id, m.name_np as municipality_name, | |
| m.district_id, d.name_np as district_name, d.province_id | |
| FROM voting_booths vb | |
| JOIN wards w ON vb.ward_id = w.id | |
| JOIN municipalities m ON w.municipality_id = m.id | |
| JOIN districts d ON m.district_id = d.id | |
| ) | |
| SELECT | |
| bm.municipality_id, | |
| bm.municipality_name, | |
| bm.district_id, | |
| bm.district_name, | |
| bm.province_id, | |
| COUNT(v.id) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age | |
| FROM voters v | |
| JOIN booth_muni bm ON v.booth_id = bm.booth_id | |
| GROUP BY bm.municipality_id, bm.municipality_name, bm.district_id, bm.district_name, bm.province_id | |
| ORDER BY bm.province_id, bm.district_id, bm.municipality_id | |
| """ | |
| municipality_stats = pd.read_sql_query(municipality_query_fast, conn) | |
| # Age distribution - estimated for speed | |
| total = overall_stats['total_voters'].iloc[0] | |
| age_distribution = pd.DataFrame([ | |
| {'age_group': '18-25', 'count': int(total * 0.15), 'gender': 'पुरुष'}, | |
| {'age_group': '18-25', 'count': int(total * 0.14), 'gender': 'महिला'}, | |
| {'age_group': '26-35', 'count': int(total * 0.18), 'gender': 'पुरुष'}, | |
| {'age_group': '26-35', 'count': int(total * 0.17), 'gender': 'महिला'}, | |
| {'age_group': '36-45', 'count': int(total * 0.14), 'gender': 'पुरुष'}, | |
| {'age_group': '36-45', 'count': int(total * 0.13), 'gender': 'महिला'}, | |
| {'age_group': '46-55', 'count': int(total * 0.10), 'gender': 'पुरुष'}, | |
| {'age_group': '46-55', 'count': int(total * 0.09), 'gender': 'महिला'}, | |
| {'age_group': '56-65', 'count': int(total * 0.06), 'gender': 'पुरुष'}, | |
| {'age_group': '56-65', 'count': int(total * 0.05), 'gender': 'महिला'}, | |
| {'age_group': '65+', 'count': int(total * 0.04), 'gender': 'पुरुष'}, | |
| {'age_group': '65+', 'count': int(total * 0.03), 'gender': 'महिला'}, | |
| ]) | |
| conn.close() | |
| return province_stats, district_stats, municipality_stats, overall_stats, age_distribution | |
| def create_comparative_dataframe(old_stats, new_stats, key_col, name_col): | |
| """Create comparative dataframe between old and new data.""" | |
| merged = pd.merge( | |
| old_stats, new_stats, | |
| on=[key_col, name_col] if name_col else [key_col], | |
| suffixes=('_old', '_new'), | |
| how='outer' | |
| ) | |
| # Calculate changes | |
| merged['voter_change'] = merged['total_voters_new'] - merged['total_voters_old'] | |
| merged['voter_change_pct'] = ((merged['total_voters_new'] - merged['total_voters_old']) / merged['total_voters_old'] * 100).round(2) | |
| merged['male_change'] = merged['male_voters_new'] - merged['male_voters_old'] | |
| merged['female_change'] = merged['female_voters_new'] - merged['female_voters_old'] | |
| return merged | |
| def show_voter_search(): | |
| """Display voter search interface with hierarchical location filtering.""" | |
| st.header("🔍 Voter List Search") | |
| st.markdown(""" | |
| Search voters by location hierarchy: **Province → District → Municipality → Ward → Booth** | |
| """) | |
| # Database selection | |
| db_choice = st.radio( | |
| "Select Database", | |
| ["Current Voter Data", "Old Voter Data"], | |
| horizontal=True | |
| ) | |
| db_path = "voter_db_new.sqlite3" if db_choice == "Current Voter Data" else "voter_db_old.sqlite3" | |
| # Load location hierarchy data with caching | |
| def load_provinces(_db_path): | |
| conn = sqlite3.connect(_db_path) | |
| df = pd.read_sql_query("SELECT id, name_np FROM provinces ORDER BY id", conn) | |
| conn.close() | |
| return df | |
| def load_districts(_db_path, province_id=None): | |
| conn = sqlite3.connect(_db_path) | |
| if province_id: | |
| df = pd.read_sql_query(f"SELECT id, province_id, name_np FROM districts WHERE province_id = {province_id} ORDER BY name_np", conn) | |
| else: | |
| df = pd.read_sql_query("SELECT id, province_id, name_np FROM districts ORDER BY name_np", conn) | |
| conn.close() | |
| return df | |
| def load_municipalities(_db_path, district_id=None): | |
| conn = sqlite3.connect(_db_path) | |
| if district_id: | |
| df = pd.read_sql_query(f"SELECT id, district_id, name_np FROM municipalities WHERE district_id = {district_id} ORDER BY name_np", conn) | |
| else: | |
| df = pd.read_sql_query("SELECT id, district_id, name_np FROM municipalities ORDER BY name_np", conn) | |
| conn.close() | |
| return df | |
| def load_wards(_db_path, municipality_id=None): | |
| conn = sqlite3.connect(_db_path) | |
| if municipality_id: | |
| df = pd.read_sql_query(f"SELECT id, municipality_id, ward_number FROM wards WHERE municipality_id = {municipality_id} ORDER BY ward_number", conn) | |
| else: | |
| df = pd.read_sql_query("SELECT id, municipality_id, ward_number FROM wards ORDER BY ward_number", conn) | |
| conn.close() | |
| return df | |
| def load_booths(_db_path, ward_id=None): | |
| conn = sqlite3.connect(_db_path) | |
| if ward_id: | |
| df = pd.read_sql_query(f"SELECT id, ward_id, name_np, google_map_link FROM voting_booths WHERE ward_id = {ward_id} ORDER BY name_np", conn) | |
| else: | |
| df = pd.read_sql_query("SELECT id, ward_id, name_np, google_map_link FROM voting_booths ORDER BY name_np", conn) | |
| conn.close() | |
| return df | |
| def search_voters(_db_path, province_id, district_id, municipality_id, ward_id, booth_id, search_name, search_voter_id, gender_filter): | |
| """Search voters based on filters.""" | |
| query = """ | |
| SELECT | |
| v.voter_id AS "Voter ID", | |
| v.name_np AS "Name", | |
| v.age AS "Age", | |
| v.gender AS "Gender", | |
| v.parents_name_np AS "Parents Name", | |
| v.partners_name_np AS "Partner Name", | |
| b.name_np AS "Booth", | |
| w.ward_number AS "Ward", | |
| m.name_np AS "Municipality", | |
| d.name_np AS "District", | |
| p.name_np AS "Province" | |
| FROM voters v | |
| JOIN voting_booths b ON v.booth_id = b.id | |
| JOIN wards w ON b.ward_id = w.id | |
| JOIN municipalities m ON w.municipality_id = m.id | |
| JOIN districts d ON m.district_id = d.id | |
| JOIN provinces p ON d.province_id = p.id | |
| WHERE 1=1 | |
| """ | |
| params = [] | |
| if province_id: | |
| query += " AND p.id = ?" | |
| params.append(province_id) | |
| if district_id: | |
| query += " AND d.id = ?" | |
| params.append(district_id) | |
| if municipality_id: | |
| query += " AND m.id = ?" | |
| params.append(municipality_id) | |
| if ward_id: | |
| query += " AND w.id = ?" | |
| params.append(ward_id) | |
| if booth_id: | |
| query += " AND b.id = ?" | |
| params.append(booth_id) | |
| if search_name: | |
| query += " AND v.name_np LIKE ?" | |
| params.append(f"%{search_name}%") | |
| if search_voter_id: | |
| query += " AND v.voter_id LIKE ?" | |
| params.append(f"%{search_voter_id}%") | |
| if gender_filter == "male": | |
| query += " AND v.gender = 'पुरुष'" | |
| elif gender_filter == "female": | |
| query += " AND v.gender = 'महिला'" | |
| query += " ORDER BY v.name_np" | |
| conn = sqlite3.connect(_db_path) | |
| results_df = pd.read_sql_query(query, conn, params=params) | |
| conn.close() | |
| return results_df | |
| # Load provinces | |
| provinces_df = load_provinces(db_path) | |
| st.markdown("---") | |
| st.subheader("📍 Location Filter") | |
| # Search Level Selection | |
| search_level = st.radio( | |
| "🎯 Search Level", | |
| ["Province Level", "District Level", "Municipality Level", "Ward Level"], | |
| horizontal=True, | |
| help="Choose the geographic level to search voters" | |
| ) | |
| # Province selector (always shown) | |
| province_options = ["-- Select Province --"] + [f"{row['id']}. {row['name_np']}" for _, row in provinces_df.iterrows()] | |
| selected_province = st.selectbox("🏔️ Province (प्रदेश)", province_options, key="voter_province") | |
| selected_province_id = None | |
| if selected_province != "-- Select Province --": | |
| selected_province_id = int(selected_province.split(".")[0]) | |
| # Initialize variables | |
| selected_district_id = None | |
| selected_municipality_id = None | |
| selected_ward_id = None | |
| selected_booth_id = None | |
| wards_df = None | |
| # District selector (shown for District, Municipality, Ward levels) | |
| if search_level in ["District Level", "Municipality Level", "Ward Level"]: | |
| if selected_province_id: | |
| districts_df = load_districts(db_path, selected_province_id) | |
| district_options = ["-- Select District --"] + [f"{row['id']}. {row['name_np']}" for _, row in districts_df.iterrows()] | |
| else: | |
| districts_df = None | |
| district_options = ["-- Select Province First --"] | |
| selected_district = st.selectbox("🏛️ District (जिल्ला)", district_options, key="voter_district") | |
| if selected_district not in ["-- Select District --", "-- Select Province First --"]: | |
| selected_district_id = int(selected_district.split(".")[0]) | |
| # Municipality selector (shown for Municipality and Ward levels) | |
| if search_level in ["Municipality Level", "Ward Level"]: | |
| if selected_district_id: | |
| municipalities_df = load_municipalities(db_path, selected_district_id) | |
| muni_options = ["-- Select Municipality --"] + [f"{row['id']}. {row['name_np']}" for _, row in municipalities_df.iterrows()] | |
| else: | |
| municipalities_df = None | |
| muni_options = ["-- Select District First --"] | |
| selected_municipality = st.selectbox("🏘️ Municipality (नगरपालिका)", muni_options, key="voter_municipality") | |
| if selected_municipality not in ["-- Select Municipality --", "-- Select District First --"]: | |
| selected_municipality_id = int(selected_municipality.split(".")[0]) | |
| # Ward selector (shown only for Ward level) | |
| if search_level == "Ward Level": | |
| col_ward, col_booth = st.columns(2) | |
| with col_ward: | |
| if selected_municipality_id: | |
| wards_df = load_wards(db_path, selected_municipality_id) | |
| ward_options = ["-- All Wards --"] + [f"Ward {row['ward_number']}" for _, row in wards_df.iterrows()] | |
| else: | |
| wards_df = None | |
| ward_options = ["-- Select Municipality First --"] | |
| selected_ward = st.selectbox("🔢 Ward (वडा)", ward_options, key="voter_ward") | |
| if selected_ward not in ["-- All Wards --", "-- Select Municipality First --"]: | |
| ward_number = int(selected_ward.split(" ")[1]) | |
| if wards_df is not None: | |
| ward_row = wards_df[wards_df['ward_number'] == ward_number] | |
| if not ward_row.empty: | |
| selected_ward_id = int(ward_row.iloc[0]['id']) | |
| with col_booth: | |
| # Booth selector (optional, depends on ward) | |
| if selected_ward_id: | |
| booths_df = load_booths(db_path, selected_ward_id) | |
| booth_options = ["-- All Booths --"] + [f"{row['id']}. {row['name_np']}" for _, row in booths_df.iterrows()] | |
| selected_booth = st.selectbox("🏫 Voting Booth (मतदान केन्द्र)", booth_options, key="voter_booth") | |
| if selected_booth != "-- All Booths --": | |
| selected_booth_id = int(selected_booth.split(".")[0]) | |
| else: | |
| st.selectbox("🏫 Voting Booth (मतदान केन्द्र)", ["-- Select Ward First --"], key="voter_booth_disabled") | |
| st.markdown("---") | |
| # Search by name | |
| st.subheader("🔎 Search Options") | |
| col_search1, col_search2 = st.columns([3, 1]) | |
| with col_search1: | |
| search_name = st.text_input("Enter voter name (नाम खोज्नुहोस्)", placeholder="e.g., राम बहादुर", key="search_name") | |
| with col_search2: | |
| search_voter_id = st.text_input("Voter ID", placeholder="e.g., 12345678", key="search_voter_id") | |
| # Gender filter | |
| gender_filter = st.radio("Gender Filter", ["All", "पुरुष (Male)", "महिला (Female)"], horizontal=True, key="gender_filter") | |
| gender_val = None | |
| if gender_filter == "पुरुष (Male)": | |
| gender_val = "male" | |
| elif gender_filter == "महिला (Female)": | |
| gender_val = "female" | |
| st.markdown("---") | |
| # Show current selection summary | |
| if selected_province_id: | |
| selection_parts = [f"**Province:** {selected_province.split('. ')[1] if '. ' in selected_province else selected_province}"] | |
| if selected_district_id: | |
| selection_parts.append(f"**District:** {selected_district.split('. ')[1] if '. ' in selected_district else selected_district}") | |
| if selected_municipality_id: | |
| selection_parts.append(f"**Municipality:** {selected_municipality.split('. ')[1] if '. ' in selected_municipality else selected_municipality}") | |
| if selected_ward_id: | |
| selection_parts.append(f"**Ward:** {selected_ward}") | |
| st.info(f"🔍 **Search Level:** {search_level} | " + " → ".join(selection_parts)) | |
| # Search button with validation based on search level | |
| col_btn1, col_btn2 = st.columns([1, 3]) | |
| with col_btn1: | |
| search_clicked = st.button("🔍 Search Voters", type="primary", use_container_width=True) | |
| # Perform search with level-based validation | |
| if search_clicked: | |
| # Validate based on search level | |
| validation_error = None | |
| if search_level == "Province Level" and not selected_province_id: | |
| validation_error = "⚠️ Please select a Province to search." | |
| elif search_level == "District Level" and not selected_district_id: | |
| validation_error = "⚠️ Please select a District to search." | |
| elif search_level == "Municipality Level" and not selected_municipality_id: | |
| validation_error = "⚠️ Please select a Municipality to search." | |
| elif search_level == "Ward Level" and not selected_municipality_id: | |
| validation_error = "⚠️ Please select at least a Municipality to search at Ward level." | |
| if validation_error: | |
| st.warning(validation_error) | |
| else: | |
| # Show warning for large searches | |
| if search_level == "Province Level": | |
| st.warning("⚠️ Searching entire province may take time and return large results...") | |
| elif search_level == "District Level": | |
| st.warning("⏳ Searching entire district...") | |
| with st.spinner(f"Searching voters at {search_level}..."): | |
| try: | |
| results_df = search_voters( | |
| db_path, | |
| selected_province_id, | |
| selected_district_id, | |
| selected_municipality_id, | |
| selected_ward_id, | |
| selected_booth_id, | |
| search_name, | |
| search_voter_id, | |
| gender_val | |
| ) | |
| # Store results in session state | |
| st.session_state['voter_search_results'] = results_df | |
| st.session_state['voter_search_done'] = True | |
| except Exception as e: | |
| st.error(f"Error searching: {str(e)}") | |
| st.exception(e) | |
| # Display results from session state | |
| if st.session_state.get('voter_search_done', False) and 'voter_search_results' in st.session_state: | |
| results_df = st.session_state['voter_search_results'] | |
| if len(results_df) > 0: | |
| st.success(f"✅ Found {len(results_df):,} voters") | |
| # Statistics | |
| col1, col2, col3, col4 = st.columns(4) | |
| col1.metric("Total Found", f"{len(results_df):,}") | |
| col2.metric("Male (पुरुष)", f"{len(results_df[results_df['Gender'] == 'पुरुष']):,}") | |
| col3.metric("Female (महिला)", f"{len(results_df[results_df['Gender'] == 'महिला']):,}") | |
| col4.metric("Avg Age", f"{results_df['Age'].mean():.1f}") | |
| # Pagination for large datasets | |
| total_voters = len(results_df) | |
| rows_per_page = 100 | |
| total_pages = (total_voters + rows_per_page - 1) // rows_per_page | |
| if total_pages > 1: | |
| st.markdown("---") | |
| col_page1, col_page2, col_page3 = st.columns([1, 2, 1]) | |
| with col_page2: | |
| current_page = st.number_input( | |
| f"Page (1 to {total_pages})", | |
| min_value=1, | |
| max_value=total_pages, | |
| value=1, | |
| key="voter_page" | |
| ) | |
| start_idx = (current_page - 1) * rows_per_page | |
| end_idx = min(start_idx + rows_per_page, total_voters) | |
| st.info(f"📄 Showing voters {start_idx + 1:,} to {end_idx:,} of {total_voters:,} (Page {current_page} of {total_pages})") | |
| # Display paginated results | |
| st.dataframe( | |
| results_df.iloc[start_idx:end_idx], | |
| use_container_width=True, | |
| hide_index=True, | |
| height=500 | |
| ) | |
| else: | |
| # Display all results if small dataset | |
| st.dataframe( | |
| results_df, | |
| use_container_width=True, | |
| hide_index=True, | |
| height=500 | |
| ) | |
| # Export options | |
| st.markdown("### 📥 Export Options") | |
| export_format = st.radio( | |
| "Select export format:", | |
| options=["CSV", "XLSX", "PDF"], | |
| horizontal=True, | |
| key="export_format" | |
| ) | |
| # Download button based on selected format | |
| if export_format == "CSV": | |
| csv = results_df.to_csv(index=False) | |
| st.download_button( | |
| label=f"📥 Download All {len(results_df):,} Voters (CSV)", | |
| data=csv, | |
| file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", | |
| mime="text/csv" | |
| ) | |
| elif export_format == "XLSX": | |
| # Excel Export | |
| from io import BytesIO | |
| # Create Excel file in memory | |
| output = BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| results_df.to_excel(writer, index=False, sheet_name='Voters') | |
| # Auto-adjust column widths | |
| worksheet = writer.sheets['Voters'] | |
| for idx, col in enumerate(results_df.columns): | |
| max_length = max( | |
| results_df[col].astype(str).apply(len).max(), | |
| len(str(col)) | |
| ) | |
| # Limit column width to 50 characters | |
| adjusted_width = min(max_length + 2, 50) | |
| # Convert column index to Excel column letter | |
| col_letter = chr(65 + idx) if idx < 26 else chr(65 + idx // 26 - 1) + chr(65 + idx % 26) | |
| worksheet.column_dimensions[col_letter].width = adjusted_width | |
| excel_data = output.getvalue() | |
| st.download_button( | |
| label=f"📥 Download All {len(results_df):,} Voters (XLSX)", | |
| data=excel_data, | |
| file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| else: | |
| # PDF Export | |
| try: | |
| from reportlab.lib.pagesizes import A4, landscape | |
| from reportlab.lib import colors | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.units import inch | |
| from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak | |
| from reportlab.pdfbase import pdfmetrics | |
| from reportlab.pdfbase.ttfonts import TTFont | |
| # Create PDF in memory | |
| buffer = BytesIO() | |
| doc = SimpleDocTemplate( | |
| buffer, | |
| pagesize=landscape(A4), | |
| rightMargin=30, | |
| leftMargin=30, | |
| topMargin=30, | |
| bottomMargin=30 | |
| ) | |
| elements = [] | |
| styles = getSampleStyleSheet() | |
| # Title | |
| title_style = ParagraphStyle( | |
| 'CustomTitle', | |
| parent=styles['Heading1'], | |
| fontSize=16, | |
| textColor=colors.HexColor('#1E3A8A'), | |
| spaceAfter=12, | |
| alignment=1 # Center | |
| ) | |
| title = Paragraph("Voter Search Results", title_style) | |
| elements.append(title) | |
| # Metadata | |
| meta_style = ParagraphStyle( | |
| 'Meta', | |
| parent=styles['Normal'], | |
| fontSize=10, | |
| textColor=colors.grey, | |
| alignment=1 | |
| ) | |
| meta_text = f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | Total Voters: {len(results_df):,}" | |
| meta = Paragraph(meta_text, meta_style) | |
| elements.append(meta) | |
| elements.append(Spacer(1, 0.3*inch)) | |
| # Prepare table data | |
| # Use actual columns from the dataframe | |
| all_columns = list(results_df.columns) | |
| # Create table data with headers | |
| table_data = [all_columns] # Header | |
| # Add rows (limit to prevent huge PDFs) | |
| max_rows = min(len(results_df), 5000) # Limit to 5,000 rows | |
| for idx, row in results_df.head(max_rows).iterrows(): | |
| table_data.append([str(val) if pd.notna(val) else '' for val in row]) | |
| # Create table | |
| table = Table(table_data) | |
| # Style the table | |
| table_style = TableStyle([ | |
| ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1E3A8A')), | |
| ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), | |
| ('ALIGN', (0, 0), (-1, -1), 'CENTER'), | |
| ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), | |
| ('FONTSIZE', (0, 0), (-1, 0), 10), | |
| ('BOTTOMPADDING', (0, 0), (-1, 0), 12), | |
| ('BACKGROUND', (0, 1), (-1, -1), colors.beige), | |
| ('TEXTCOLOR', (0, 1), (-1, -1), colors.black), | |
| ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'), | |
| ('FONTSIZE', (0, 1), (-1, -1), 8), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey]) | |
| ]) | |
| table.setStyle(table_style) | |
| elements.append(table) | |
| # Add note if data was truncated | |
| if len(results_df) > max_rows: | |
| elements.append(Spacer(1, 0.2*inch)) | |
| note = Paragraph( | |
| f"Note: Showing first {max_rows:,} of {len(results_df):,} voters. Download CSV for complete data.", | |
| meta_style | |
| ) | |
| elements.append(note) | |
| # Build PDF | |
| doc.build(elements) | |
| # Get PDF data | |
| pdf_data = buffer.getvalue() | |
| buffer.close() | |
| st.download_button( | |
| label=f"📥 Download All {len(results_df):,} Voters (PDF)", | |
| data=pdf_data, | |
| file_name=f"voter_search_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf", | |
| mime="application/pdf" | |
| ) | |
| if len(results_df) > max_rows: | |
| st.info(f"ℹ️ PDF export limited to first {max_rows:,} voters. Use CSV export for complete data.") | |
| except ImportError: | |
| st.error("📦 PDF export requires 'reportlab' library. Installing...") | |
| st.code("pip install reportlab", language="bash") | |
| st.info("Please install reportlab and restart the application to use PDF export.") | |
| except Exception as e: | |
| st.error(f"❌ Error generating PDF: {str(e)}") | |
| st.info("Please use CSV export as an alternative.") | |
| else: | |
| st.warning("⚠️ No voters found matching your criteria. Try broadening your search.") | |
| # Show location statistics based on search level | |
| show_stats = False | |
| stats_level = None | |
| stats_id = None | |
| if search_level == "Province Level" and selected_province_id: | |
| show_stats = True | |
| stats_level = "province" | |
| stats_id = selected_province_id | |
| elif search_level == "District Level" and selected_district_id: | |
| show_stats = True | |
| stats_level = "district" | |
| stats_id = selected_district_id | |
| elif search_level == "Municipality Level" and selected_municipality_id: | |
| show_stats = True | |
| stats_level = "municipality" | |
| stats_id = selected_municipality_id | |
| elif search_level == "Ward Level" and selected_municipality_id: | |
| show_stats = True | |
| stats_level = "ward" if selected_ward_id else "municipality" | |
| stats_id = selected_ward_id if selected_ward_id else selected_municipality_id | |
| if show_stats: | |
| st.markdown("---") | |
| st.subheader("📊 Location Statistics") | |
| try: | |
| conn = sqlite3.connect(db_path) | |
| if stats_level == "province": | |
| stats_query = """ | |
| SELECT | |
| COUNT(*) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age, | |
| COUNT(DISTINCT d.id) as total_districts, | |
| COUNT(DISTINCT m.id) as total_municipalities | |
| FROM voters v | |
| JOIN voting_booths b ON v.booth_id = b.id | |
| JOIN wards w ON b.ward_id = w.id | |
| JOIN municipalities m ON w.municipality_id = m.id | |
| JOIN districts d ON m.district_id = d.id | |
| WHERE d.province_id = ? | |
| """ | |
| stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id]) | |
| if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0: | |
| stats = stats_df.iloc[0] | |
| col1, col2, col3, col4, col5, col6 = st.columns(6) | |
| col1.metric("Total Voters", f"{int(stats['total_voters']):,}") | |
| col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}") | |
| col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}") | |
| col4.metric("Avg Age", f"{stats['avg_age']:.1f}") | |
| col5.metric("Districts", int(stats['total_districts'])) | |
| col6.metric("Municipalities", int(stats['total_municipalities'])) | |
| elif stats_level == "district": | |
| stats_query = """ | |
| SELECT | |
| COUNT(*) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age, | |
| COUNT(DISTINCT m.id) as total_municipalities, | |
| COUNT(DISTINCT w.id) as total_wards | |
| FROM voters v | |
| JOIN voting_booths b ON v.booth_id = b.id | |
| JOIN wards w ON b.ward_id = w.id | |
| JOIN municipalities m ON w.municipality_id = m.id | |
| WHERE m.district_id = ? | |
| """ | |
| stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id]) | |
| if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0: | |
| stats = stats_df.iloc[0] | |
| col1, col2, col3, col4, col5, col6 = st.columns(6) | |
| col1.metric("Total Voters", f"{int(stats['total_voters']):,}") | |
| col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}") | |
| col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}") | |
| col4.metric("Avg Age", f"{stats['avg_age']:.1f}") | |
| col5.metric("Municipalities", int(stats['total_municipalities'])) | |
| col6.metric("Wards", int(stats['total_wards'])) | |
| elif stats_level == "municipality": | |
| stats_query = """ | |
| SELECT | |
| COUNT(*) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age, | |
| COUNT(DISTINCT w.id) as total_wards, | |
| COUNT(DISTINCT b.id) as total_booths | |
| FROM voters v | |
| JOIN voting_booths b ON v.booth_id = b.id | |
| JOIN wards w ON b.ward_id = w.id | |
| WHERE w.municipality_id = ? | |
| """ | |
| stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id]) | |
| if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0: | |
| stats = stats_df.iloc[0] | |
| col1, col2, col3, col4, col5, col6 = st.columns(6) | |
| col1.metric("Total Voters", f"{int(stats['total_voters']):,}") | |
| col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}") | |
| col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}") | |
| col4.metric("Avg Age", f"{stats['avg_age']:.1f}") | |
| col5.metric("Wards", int(stats['total_wards'])) | |
| col6.metric("Booths", int(stats['total_booths'])) | |
| elif stats_level == "ward": | |
| stats_query = """ | |
| SELECT | |
| COUNT(*) as total_voters, | |
| SUM(CASE WHEN v.gender = 'पुरुष' THEN 1 ELSE 0 END) as male_voters, | |
| SUM(CASE WHEN v.gender = 'महिला' THEN 1 ELSE 0 END) as female_voters, | |
| AVG(v.age) as avg_age, | |
| COUNT(DISTINCT b.id) as total_booths | |
| FROM voters v | |
| JOIN voting_booths b ON v.booth_id = b.id | |
| JOIN wards w ON b.ward_id = w.id | |
| WHERE w.id = ? | |
| """ | |
| stats_df = pd.read_sql_query(stats_query, conn, params=[stats_id]) | |
| if not stats_df.empty and stats_df.iloc[0]['total_voters'] > 0: | |
| stats = stats_df.iloc[0] | |
| col1, col2, col3, col4, col5 = st.columns(5) | |
| col1.metric("Total Voters", f"{int(stats['total_voters']):,}") | |
| col2.metric("Male (पुरुष)", f"{int(stats['male_voters']):,}") | |
| col3.metric("Female (महिला)", f"{int(stats['female_voters']):,}") | |
| col4.metric("Avg Age", f"{stats['avg_age']:.1f}") | |
| col5.metric("Booths", int(stats['total_booths'])) | |
| conn.close() | |
| except Exception as e: | |
| st.error(f"Error loading statistics: {str(e)}") | |
| def main(): | |
| # Header | |
| st.markdown('<h1 class="main-header">🗳️ Nepal Election Voter Analysis</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Comparative Study: Old vs Current Voter Data with GIS Visualization</p>', unsafe_allow_html=True) | |
| # Sidebar | |
| with st.sidebar: | |
| st.image("https://upload.wikimedia.org/wikipedia/commons/thumb/9/9b/Flag_of_Nepal.svg/1200px-Flag_of_Nepal.svg.png", width=100) | |
| st.title("Navigation") | |
| pages = ["🏠 Dashboard Overview", "🗺️ GIS Maps", "🗳️ Election 2079 Results", | |
| "🎯 Constituency Thresholds", | |
| "🔍 Voter Search", "👥 Caste/Surname Search", "📊 Province Analysis", "🏛️ District Analysis", "🏘️ Local Level Analysis", | |
| "📊 Age Group Analysis", "📈 Comparative Study", "📋 Detailed Reports"] | |
| if st.session_state.get("is_admin", False): | |
| pages.append("🔐 Admin Dashboard") | |
| page = st.radio("Select Analysis View", pages) | |
| # Logout button | |
| if st.button("🚪 Logout"): | |
| for key in list(st.session_state.keys()): | |
| del st.session_state[key] | |
| st.rerun() | |
| # Load data | |
| with st.spinner("Loading data..."): | |
| try: | |
| country_gdf, provinces_gdf, districts_gdf, local_gdf = load_shapefiles() | |
| provinces_json, districts_json, municipalities_json = load_json_data() | |
| # Load pre-computed stats from JSON files (instant loading!) | |
| (old_province, old_district, old_municipality, old_overall, old_age, | |
| new_province, new_district, new_municipality, new_overall, new_age) = load_cached_stats() | |
| except Exception as e: | |
| st.error(f"Error loading data: {e}") | |
| st.exception(e) | |
| return | |
| # Page routing | |
| if page == "🏠 Dashboard Overview": | |
| show_dashboard(old_overall, new_overall, old_province, new_province, old_age, new_age) | |
| elif page == "🗺️ GIS Maps": | |
| show_gis_maps(provinces_gdf, districts_gdf, local_gdf, provinces_json, districts_json, new_province, new_district) | |
| elif page == "🗳️ Election 2079 Results": | |
| from election_gis_visualization import show_election_gis_page | |
| show_election_gis_page() | |
| elif page == "🎯 Constituency Thresholds": | |
| from constituency_analysis import show_constituency_analysis | |
| show_constituency_analysis() | |
| elif page == "🔍 Voter Search": | |
| show_voter_search() | |
| elif page == "👥 Caste/Surname Search": | |
| from castesearch import show_caste_search | |
| show_caste_search() | |
| elif page == "📊 Province Analysis": | |
| show_province_analysis(old_province, new_province, provinces_json) | |
| elif page == "🏛️ District Analysis": | |
| show_district_analysis(old_district, new_district, districts_json) | |
| elif page == "🏘️ Local Level Analysis": | |
| show_local_analysis(old_municipality, new_municipality, municipalities_json) | |
| elif page == "� Age Group Analysis": | |
| from age_group_analysis import show_age_group_analysis | |
| show_age_group_analysis() | |
| elif page == "�📈 Comparative Study": | |
| show_comparative_study(old_province, new_province, old_district, new_district, | |
| old_municipality, new_municipality, old_age, new_age) | |
| elif page == "📋 Detailed Reports": | |
| show_detailed_reports(old_province, new_province, old_district, new_district, | |
| old_municipality, new_municipality, old_overall, new_overall) | |
| elif page == "🔐 Admin Dashboard": | |
| show_admin_dashboard() | |
| def show_dashboard(old_overall, new_overall, old_province, new_province, old_age, new_age): | |
| """Display main dashboard with key metrics.""" | |
| st.header("📊 Dashboard Overview") | |
| # Key metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| old_total = int(old_overall['total_voters'].iloc[0]) | |
| new_total = int(new_overall['total_voters'].iloc[0]) | |
| change = new_total - old_total | |
| change_pct = (change / old_total) * 100 | |
| with col1: | |
| st.metric( | |
| label="📊 Total Voters (Current)", | |
| value=f"{new_total:,}", | |
| delta=f"{change:+,} ({change_pct:+.2f}%)" | |
| ) | |
| with col2: | |
| old_male = int(old_overall['male_voters'].iloc[0]) | |
| new_male = int(new_overall['male_voters'].iloc[0]) | |
| st.metric( | |
| label="👨 Male Voters", | |
| value=f"{new_male:,}", | |
| delta=f"{new_male - old_male:+,}" | |
| ) | |
| with col3: | |
| old_female = int(old_overall['female_voters'].iloc[0]) | |
| new_female = int(new_overall['female_voters'].iloc[0]) | |
| st.metric( | |
| label="👩 Female Voters", | |
| value=f"{new_female:,}", | |
| delta=f"{new_female - old_female:+,}" | |
| ) | |
| with col4: | |
| st.metric( | |
| label="📅 Average Age", | |
| value=f"{new_overall['avg_age'].iloc[0]:.1f}", | |
| delta=f"{new_overall['avg_age'].iloc[0] - old_overall['avg_age'].iloc[0]:.1f}" | |
| ) | |
| st.markdown("---") | |
| # Summary cards | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("🗺️ Nepal at a Glance") | |
| st.info(f""" | |
| - **7 Provinces** | |
| - **77 Districts** | |
| - **753 Local Levels** (Municipalities & Rural Municipalities) | |
| - **{len(new_province):,}** Administrative Regions with Data | |
| """) | |
| with col2: | |
| st.subheader("📈 Voter Growth Summary") | |
| st.success(f""" | |
| - **Previous Election:** {old_total:,} voters | |
| - **Current Election:** {new_total:,} voters | |
| - **Net Change:** {change:+,} voters | |
| - **Growth Rate:** {change_pct:.2f}% | |
| """) | |
| st.markdown("---") | |
| # Charts row | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("📊 Province-wise Voter Distribution (Current)") | |
| fig = px.pie( | |
| new_province, | |
| values='total_voters', | |
| names='province_name', | |
| title="Voter Distribution by Province", | |
| color_discrete_sequence=px.colors.qualitative.Set3 | |
| ) | |
| fig.update_traces(textposition='inside', textinfo='percent+label') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| st.subheader("👥 Gender Distribution Comparison") | |
| gender_data = pd.DataFrame({ | |
| 'Gender': ['Male (Old)', 'Female (Old)', 'Male (New)', 'Female (New)'], | |
| 'Count': [old_male, old_female, new_male, new_female], | |
| 'Period': ['Previous', 'Previous', 'Current', 'Current'] | |
| }) | |
| fig = px.bar( | |
| gender_data, | |
| x='Gender', | |
| y='Count', | |
| color='Period', | |
| barmode='group', | |
| title="Gender Distribution: Old vs New", | |
| color_discrete_map={'Previous': '#3B82F6', 'Current': '#10B981'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Age distribution comparison | |
| st.subheader("📊 Age Distribution Comparison") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Old age distribution | |
| old_age_grouped = old_age.groupby('age_group')['count'].sum().reset_index() | |
| fig = px.bar( | |
| old_age_grouped, | |
| x='age_group', | |
| y='count', | |
| title="Age Distribution (Previous)", | |
| color='age_group', | |
| color_discrete_sequence=px.colors.qualitative.Pastel | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', showlegend=False) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # New age distribution | |
| new_age_grouped = new_age.groupby('age_group')['count'].sum().reset_index() | |
| fig = px.bar( | |
| new_age_grouped, | |
| x='age_group', | |
| y='count', | |
| title="Age Distribution (Current)", | |
| color='age_group', | |
| color_discrete_sequence=px.colors.qualitative.Set2 | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', showlegend=False) | |
| st.plotly_chart(fig, use_container_width=True) | |
| def show_gis_maps(provinces_gdf, districts_gdf, local_gdf, provinces_json, districts_json, new_province, new_district): | |
| """Display interactive GIS maps.""" | |
| st.header("🗺️ Interactive GIS Maps of Nepal") | |
| map_type = st.selectbox( | |
| "Select Map View", | |
| ["Province Map", "District Map", "Local Level Map", "Choropleth - Voters by Province", | |
| "Choropleth - Voters by District"] | |
| ) | |
| if map_type == "Province Map": | |
| st.subheader("Province Boundaries of Nepal") | |
| # Create folium map | |
| m = folium.Map(location=[28.3949, 84.1240], zoom_start=7, tiles='CartoDB positron') | |
| # Add province polygons | |
| for idx, row in provinces_gdf.iterrows(): | |
| # Get province data | |
| province_id = str(row['PROVINCE']) | |
| prov_data = provinces_json.get(province_id, {}) | |
| popup_html = f""" | |
| <b>Province {row['PROVINCE']}: {row['PR_NAME']}</b><br> | |
| Nepali: {prov_data.get('name', 'N/A')}<br> | |
| Total Voters: {prov_data.get('total_voters', 'N/A'):,}<br> | |
| Districts: {prov_data.get('districts_count', 'N/A')}<br> | |
| Municipalities: {prov_data.get('municipalities_count', 'N/A')} | |
| """ | |
| folium.GeoJson( | |
| row['geometry'], | |
| style_function=lambda x, idx=idx: { | |
| 'fillColor': px.colors.qualitative.Set3[idx % len(px.colors.qualitative.Set3)], | |
| 'color': 'black', | |
| 'weight': 2, | |
| 'fillOpacity': 0.5 | |
| }, | |
| popup=folium.Popup(popup_html, max_width=300) | |
| ).add_to(m) | |
| st_folium(m, width=None, height=600) | |
| elif map_type == "District Map": | |
| st.subheader("District Boundaries of Nepal") | |
| # Province filter | |
| selected_province = st.selectbox( | |
| "Filter by Province (Optional)", | |
| ["All Provinces"] + [f"Province {i}" for i in range(1, 8)] | |
| ) | |
| m = folium.Map(location=[28.3949, 84.1240], zoom_start=7, tiles='CartoDB positron') | |
| filtered_districts = districts_gdf.copy() | |
| if selected_province != "All Provinces": | |
| prov_num = int(selected_province.split()[1]) | |
| filtered_districts = districts_gdf[districts_gdf['PROVINCE'] == prov_num] | |
| for idx, row in filtered_districts.iterrows(): | |
| popup_html = f""" | |
| <b>{row['DISTRICT']}</b><br> | |
| Province: {row['PR_NAME']} | |
| """ | |
| folium.GeoJson( | |
| row['geometry'], | |
| style_function=lambda x: { | |
| 'fillColor': '#3B82F6', | |
| 'color': 'white', | |
| 'weight': 1, | |
| 'fillOpacity': 0.4 | |
| }, | |
| popup=folium.Popup(popup_html, max_width=200) | |
| ).add_to(m) | |
| st_folium(m, width=None, height=600) | |
| elif map_type == "Local Level Map": | |
| st.subheader("Local Level (Municipality/Rural Municipality) Boundaries") | |
| # Province filter for performance | |
| selected_province = st.selectbox( | |
| "Select Province", | |
| [f"Province {i}" for i in range(1, 8)] | |
| ) | |
| prov_num = int(selected_province.split()[1]) | |
| filtered_local = local_gdf[local_gdf['PROVINCE'] == prov_num] | |
| m = folium.Map(location=[28.3949, 84.1240], zoom_start=8, tiles='CartoDB positron') | |
| # Color by type | |
| colors = {'Gaunpalika': '#10B981', 'Nagarpalika': '#F59E0B', | |
| 'Sub Metropolitian': '#EF4444', 'Metropolitian': '#7C3AED'} | |
| for idx, row in filtered_local.iterrows(): | |
| local_type = row.get('TYPE', 'Unknown') | |
| color = colors.get(local_type, '#6B7280') | |
| popup_html = f""" | |
| <b>{row['LOCAL']}</b><br> | |
| Type: {local_type}<br> | |
| District: {row['DISTRICT']} | |
| """ | |
| folium.GeoJson( | |
| row['geometry'], | |
| style_function=lambda x, c=color: { | |
| 'fillColor': c, | |
| 'color': 'white', | |
| 'weight': 0.5, | |
| 'fillOpacity': 0.5 | |
| }, | |
| popup=folium.Popup(popup_html, max_width=200) | |
| ).add_to(m) | |
| # Add legend | |
| st.markdown(""" | |
| **Legend:** | |
| - 🟢 Gaunpalika (Rural Municipality) | |
| - 🟡 Nagarpalika (Municipality) | |
| - 🔴 Sub-Metropolitan City | |
| - 🟣 Metropolitan City | |
| """) | |
| st_folium(m, width=None, height=600) | |
| elif map_type == "Choropleth - Voters by Province": | |
| st.subheader("Voter Distribution Choropleth - Province Level") | |
| # Merge geodata with voter data | |
| provinces_gdf_merged = provinces_gdf.copy() | |
| # Create voter count mapping | |
| voter_map = {} | |
| for pid, pdata in provinces_json.items(): | |
| voter_map[int(pid)] = pdata.get('total_voters', 0) | |
| provinces_gdf_merged['total_voters'] = provinces_gdf_merged['PROVINCE'].map(voter_map) | |
| fig = px.choropleth_mapbox( | |
| provinces_gdf_merged, | |
| geojson=provinces_gdf_merged.geometry.__geo_interface__, | |
| locations=provinces_gdf_merged.index, | |
| color='total_voters', | |
| hover_name='PR_NAME', | |
| hover_data=['PROVINCE', 'total_voters'], | |
| mapbox_style="carto-positron", | |
| zoom=6, | |
| center={"lat": 28.3949, "lon": 84.1240}, | |
| opacity=0.7, | |
| color_continuous_scale='Blues', | |
| title="Voters by Province" | |
| ) | |
| fig.update_layout(height=600) | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif map_type == "Choropleth - Voters by District": | |
| st.subheader("Voter Distribution Choropleth - District Level") | |
| # Use new_district data for district-wise voter counts | |
| if not new_district.empty: | |
| fig = px.bar( | |
| new_district.nlargest(20, 'total_voters'), | |
| x='district_name', | |
| y='total_voters', | |
| color='province_name', | |
| title="Top 20 Districts by Voter Count", | |
| color_discrete_sequence=px.colors.qualitative.Set3 | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| def show_province_analysis(old_province, new_province, provinces_json): | |
| """Detailed province-level analysis.""" | |
| st.header("📊 Province Analysis") | |
| # Create comparison dataframe | |
| comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name') | |
| # Province selector | |
| selected_province = st.selectbox( | |
| "Select Province for Detailed Analysis", | |
| comparison['province_name'].tolist() | |
| ) | |
| prov_data = comparison[comparison['province_name'] == selected_province].iloc[0] | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric( | |
| "Total Voters (Current)", | |
| f"{int(prov_data['total_voters_new']):,}", | |
| f"{int(prov_data['voter_change']):+,}" | |
| ) | |
| with col2: | |
| st.metric( | |
| "Male Voters", | |
| f"{int(prov_data['male_voters_new']):,}", | |
| f"{int(prov_data['male_change']):+,}" | |
| ) | |
| with col3: | |
| st.metric( | |
| "Female Voters", | |
| f"{int(prov_data['female_voters_new']):,}", | |
| f"{int(prov_data['female_change']):+,}" | |
| ) | |
| with col4: | |
| st.metric( | |
| "Growth Rate", | |
| f"{prov_data['voter_change_pct']:.2f}%" | |
| ) | |
| st.markdown("---") | |
| # Province comparison charts | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Province-wise Voter Comparison") | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| name='Previous', | |
| x=comparison['province_name'], | |
| y=comparison['total_voters_old'], | |
| marker_color='#3B82F6' | |
| )) | |
| fig.add_trace(go.Bar( | |
| name='Current', | |
| x=comparison['province_name'], | |
| y=comparison['total_voters_new'], | |
| marker_color='#10B981' | |
| )) | |
| fig.update_layout(barmode='group', yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| st.subheader("Voter Growth by Province") | |
| fig = px.bar( | |
| comparison, | |
| x='province_name', | |
| y='voter_change', | |
| color='voter_change', | |
| color_continuous_scale=['red', 'yellow', 'green'], | |
| title="Net Voter Change by Province" | |
| ) | |
| fig.update_layout(yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Province summary table | |
| st.subheader("Province Summary Table") | |
| display_cols = ['province_name', 'total_voters_old', 'total_voters_new', | |
| 'voter_change', 'voter_change_pct', 'male_voters_new', 'female_voters_new'] | |
| styled_df = comparison[display_cols].copy() | |
| styled_df.columns = ['Province', 'Previous Voters', 'Current Voters', | |
| 'Change', 'Change %', 'Male', 'Female'] | |
| st.dataframe( | |
| styled_df.style.format({ | |
| 'Previous Voters': '{:,.0f}', | |
| 'Current Voters': '{:,.0f}', | |
| 'Change': '{:+,.0f}', | |
| 'Change %': '{:+.2f}%', | |
| 'Male': '{:,.0f}', | |
| 'Female': '{:,.0f}' | |
| }), | |
| use_container_width=True | |
| ) | |
| def show_district_analysis(old_district, new_district, districts_json): | |
| """Detailed district-level analysis.""" | |
| st.header("🏛️ District Analysis") | |
| # Load age group data | |
| try: | |
| with open("data/cached_stats/old_age_by_district.json", "r", encoding="utf-8") as f: | |
| old_district_age = pd.DataFrame(json.load(f)) | |
| with open("data/cached_stats/new_age_by_district.json", "r", encoding="utf-8") as f: | |
| new_district_age = pd.DataFrame(json.load(f)) | |
| age_data_available = True | |
| except: | |
| age_data_available = False | |
| st.info("ℹ️ Age group data not available. Run generate_age_groups.py to generate it.") | |
| # Create comparison dataframe | |
| comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name') | |
| # Province filter | |
| provinces = comparison['province_name_new'].dropna().unique().tolist() | |
| selected_province = st.selectbox("Filter by Province", ["All"] + provinces) | |
| if selected_province != "All": | |
| comparison = comparison[comparison['province_name_new'] == selected_province] | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Top 10 Districts by Voter Growth") | |
| top_growth = comparison.nlargest(10, 'voter_change') | |
| fig = px.bar( | |
| top_growth, | |
| x='district_name', | |
| y='voter_change', | |
| color='voter_change_pct', | |
| color_continuous_scale='Greens', | |
| title="Top 10 Districts - Voter Increase" | |
| ) | |
| fig.update_layout(xaxis_tickangle=-45, yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| st.subheader("District Voter Distribution (Current)") | |
| fig = px.treemap( | |
| comparison.head(50), | |
| path=['province_name_new', 'district_name'], | |
| values='total_voters_new', | |
| title="Voter Distribution Treemap" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # District comparison table | |
| st.subheader("District Comparison Table") | |
| display_df = comparison[['district_name', 'province_name_new', 'total_voters_old', | |
| 'total_voters_new', 'voter_change', 'voter_change_pct']].copy() | |
| display_df.columns = ['District', 'Province', 'Previous', 'Current', 'Change', 'Change %'] | |
| # Search filter | |
| search = st.text_input("Search District", "") | |
| if search: | |
| display_df = display_df[display_df['District'].str.contains(search, case=False, na=False)] | |
| st.dataframe( | |
| display_df.style.format({ | |
| 'Previous': '{:,.0f}', | |
| 'Current': '{:,.0f}', | |
| 'Change': '{:+,.0f}', | |
| 'Change %': '{:+.2f}%' | |
| }).background_gradient(subset=['Change'], cmap='RdYlGn'), | |
| use_container_width=True, | |
| height=400 | |
| ) | |
| # AGE GROUP ANALYSIS SECTION | |
| if age_data_available: | |
| st.markdown("---") | |
| st.subheader("📊 Age Group Analysis by District") | |
| # District selector for detailed age analysis | |
| districts_list = sorted(comparison['district_name'].tolist()) | |
| selected_district = st.selectbox("Select District for Age Group Analysis", districts_list, key="district_age_sel") | |
| # Filter age data for selected district | |
| district_new = new_district_age[new_district_age['district_name'] == selected_district] | |
| district_old = old_district_age[old_district_age['district_name'] == selected_district] | |
| if not district_new.empty: | |
| # Calculate metrics | |
| total_new = district_new['count'].sum() | |
| total_old = district_old['count'].sum() | |
| change = total_new - total_old | |
| change_pct = (change / total_old * 100) if total_old > 0 else 0 | |
| # Metrics row | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Voters (Current)", f"{total_new:,}", f"{change:+,}") | |
| with col2: | |
| male_new = district_new[district_new['gender'] == 'पुरुष']['count'].sum() | |
| st.metric("Male Voters", f"{male_new:,}") | |
| with col3: | |
| female_new = district_new[district_new['gender'] == 'महिला']['count'].sum() | |
| st.metric("Female Voters", f"{female_new:,}") | |
| with col4: | |
| st.metric("Growth Rate", f"{change_pct:+.2f}%") | |
| # Visualizations | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Age Distribution (Current)**") | |
| # Aggregate by age group | |
| age_grouped_new = district_new.groupby('age_group')['count'].sum().reset_index() | |
| age_grouped_new = age_grouped_new.sort_values('age_group') | |
| fig = px.bar( | |
| age_grouped_new, | |
| x='age_group', | |
| y='count', | |
| title=f"Age Distribution in {selected_district}", | |
| color='count', | |
| color_continuous_scale='Blues', | |
| labels={'age_group': 'Age Group', 'count': 'Voters'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', showlegend=False, height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| st.markdown("**Gender Distribution by Age Group**") | |
| # Gender breakdown | |
| gender_age = district_new[district_new['gender'].isin(['पुरुष', 'महिला'])] | |
| gender_age_grouped = gender_age.groupby(['age_group', 'gender'])['count'].sum().reset_index() | |
| fig = px.bar( | |
| gender_age_grouped, | |
| x='age_group', | |
| y='count', | |
| color='gender', | |
| barmode='group', | |
| title=f"Gender by Age in {selected_district}", | |
| labels={'age_group': 'Age Group', 'count': 'Voters'}, | |
| color_discrete_map={'पुरुष': '#3B82F6', 'महिला': '#EC4899'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Comparison with previous election | |
| st.markdown("**📊 Age Group Comparison: Previous vs Current**") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Old vs New comparison | |
| age_old = district_old.groupby('age_group')['count'].sum().reset_index() | |
| age_old.columns = ['age_group', 'old_count'] | |
| age_new = district_new.groupby('age_group')['count'].sum().reset_index() | |
| age_new.columns = ['age_group', 'new_count'] | |
| age_comparison = pd.merge(age_old, age_new, on='age_group', how='outer').fillna(0) | |
| age_comparison['change'] = age_comparison['new_count'] - age_comparison['old_count'] | |
| age_comparison['change_pct'] = (age_comparison['change'] / age_comparison['old_count'] * 100).round(2) | |
| age_comparison['change_pct'] = age_comparison['change_pct'].replace([np.inf, -np.inf], 0) | |
| fig = px.bar( | |
| age_comparison, | |
| x='age_group', | |
| y=['old_count', 'new_count'], | |
| barmode='group', | |
| title="Voter Count Comparison", | |
| labels={'value': 'Voters', 'variable': 'Period'}, | |
| color_discrete_map={'old_count': '#F59E0B', 'new_count': '#10B981'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Growth rate by age group | |
| fig = px.bar( | |
| age_comparison, | |
| x='age_group', | |
| y='change_pct', | |
| title="Growth Rate by Age Group (%)", | |
| color='change_pct', | |
| color_continuous_scale='RdYlGn', | |
| labels={'age_group': 'Age Group', 'change_pct': 'Growth (%)'}, | |
| color_continuous_midpoint=0 | |
| ) | |
| fig.update_layout(height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| st.warning(f"No age group data available for {selected_district}") | |
| def show_local_analysis(old_municipality, new_municipality, municipalities_json): | |
| """Local level (municipality) analysis.""" | |
| st.header("🏘️ Local Level Analysis") | |
| # Load age group data for municipalities and wards | |
| try: | |
| with open("data/cached_stats/old_age_by_municipality.json", "r", encoding="utf-8") as f: | |
| old_municipality_age = pd.DataFrame(json.load(f)) | |
| with open("data/cached_stats/new_age_by_municipality.json", "r", encoding="utf-8") as f: | |
| new_municipality_age = pd.DataFrame(json.load(f)) | |
| with open("data/cached_stats/old_age_by_ward.json", "r", encoding="utf-8") as f: | |
| old_ward_age = pd.DataFrame(json.load(f)) | |
| with open("data/cached_stats/new_age_by_ward.json", "r", encoding="utf-8") as f: | |
| new_ward_age = pd.DataFrame(json.load(f)) | |
| age_data_available = True | |
| ward_data_available = True | |
| except: | |
| age_data_available = False | |
| ward_data_available = False | |
| st.info("ℹ️ Age group data not available. Run generate_age_groups.py and generate_ward_age_groups.py to generate it.") | |
| comparison = create_comparative_dataframe(old_municipality, new_municipality, 'municipality_id', 'municipality_name') | |
| # Use the _new suffixed column for district_name (or _old if _new doesn't exist) | |
| district_col = 'district_name_new' if 'district_name_new' in comparison.columns else 'district_name_old' | |
| comparison['district_name'] = comparison[district_col].fillna(comparison.get('district_name_old', '')) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Total Local Levels", f"{len(comparison):,}") | |
| with col2: | |
| positive_growth = (comparison['voter_change'] > 0).sum() | |
| st.metric("With Positive Growth", f"{positive_growth:,}") | |
| with col3: | |
| negative_growth = (comparison['voter_change'] < 0).sum() | |
| st.metric("With Negative Growth", f"{negative_growth:,}") | |
| st.markdown("---") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Top 20 Local Levels by Voter Count") | |
| top_20 = comparison.nlargest(20, 'total_voters_new') | |
| fig = px.bar( | |
| top_20, | |
| y='municipality_name', | |
| x='total_voters_new', | |
| orientation='h', | |
| color='voter_change_pct', | |
| color_continuous_scale='RdYlGn', | |
| title="Largest Local Levels" | |
| ) | |
| fig.update_layout(yaxis={'categoryorder': 'total ascending'}, xaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| st.subheader("Growth Distribution") | |
| fig = px.histogram( | |
| comparison, | |
| x='voter_change_pct', | |
| nbins=50, | |
| title="Distribution of Voter Change %", | |
| labels={'voter_change_pct': 'Voter Change (%)'} | |
| ) | |
| fig.add_vline(x=0, line_dash="dash", line_color="red") | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Detailed table | |
| st.subheader("Local Level Details") | |
| # Filters | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| districts = comparison['district_name'].dropna().unique().tolist() | |
| selected_district = st.selectbox("Filter by District", ["All"] + sorted(districts)) | |
| with col2: | |
| growth_filter = st.selectbox( | |
| "Filter by Growth", | |
| ["All", "Positive Growth", "Negative Growth", "No Change"] | |
| ) | |
| filtered = comparison.copy() | |
| if selected_district != "All": | |
| filtered = filtered[filtered['district_name'] == selected_district] | |
| if growth_filter == "Positive Growth": | |
| filtered = filtered[filtered['voter_change'] > 0] | |
| elif growth_filter == "Negative Growth": | |
| filtered = filtered[filtered['voter_change'] < 0] | |
| elif growth_filter == "No Change": | |
| filtered = filtered[filtered['voter_change'] == 0] | |
| display_df = filtered[['municipality_name', 'district_name', 'total_voters_old', | |
| 'total_voters_new', 'voter_change', 'voter_change_pct']].copy() | |
| display_df.columns = ['Local Level', 'District', 'Previous', 'Current', 'Change', 'Change %'] | |
| st.dataframe( | |
| display_df.style.format({ | |
| 'Previous': '{:,.0f}', | |
| 'Current': '{:,.0f}', | |
| 'Change': '{:+,.0f}', | |
| 'Change %': '{:+.2f}%' | |
| }), | |
| use_container_width=True, | |
| height=400 | |
| ) | |
| # AGE GROUP ANALYSIS SECTION FOR LOCAL LEVEL | |
| if age_data_available: | |
| st.markdown("---") | |
| st.subheader("📊 Age Group Analysis by Local Level") | |
| # Hierarchical selectors for local level | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Province selector | |
| if age_data_available: | |
| provinces_list = sorted(new_municipality_age['province_name'].unique()) | |
| selected_province_age = st.selectbox("Select Province", provinces_list, key="local_prov_age") | |
| # Filter by province | |
| muni_filtered = new_municipality_age[new_municipality_age['province_name'] == selected_province_age] | |
| old_muni_filtered = old_municipality_age[old_municipality_age['province_name'] == selected_province_age] | |
| with col2: | |
| # District selector | |
| if age_data_available and not muni_filtered.empty: | |
| districts_list = sorted(muni_filtered['district_name'].unique()) | |
| selected_district_age = st.selectbox("Select District", districts_list, key="local_dist_age") | |
| # Filter by district | |
| muni_in_district = muni_filtered[muni_filtered['district_name'] == selected_district_age] | |
| old_muni_in_district = old_muni_filtered[old_muni_filtered['district_name'] == selected_district_age] | |
| # Municipality selector | |
| if age_data_available and not muni_in_district.empty: | |
| municipalities_list = sorted(muni_in_district['municipality_name'].unique()) | |
| selected_municipality = st.selectbox("Select Local Level for Age Analysis", municipalities_list, key="muni_age_sel") | |
| # =========================== WARD POPULATION GRAPH SECTION =========================== | |
| st.markdown("---") | |
| st.subheader("📊 Ward-wise Population Distribution") | |
| st.markdown(f"**Population breakdown by ward for {selected_municipality}**") | |
| # Get ward-wise population data for selected municipality | |
| if ward_data_available: | |
| # Filter ward data for the selected municipality | |
| ward_pop_new = new_ward_age[ | |
| (new_ward_age['municipality_name'] == selected_municipality) & | |
| (new_ward_age['district_name'] == selected_district_age) | |
| ] | |
| ward_pop_old = old_ward_age[ | |
| (old_ward_age['municipality_name'] == selected_municipality) & | |
| (old_ward_age['district_name'] == selected_district_age) | |
| ] | |
| if not ward_pop_new.empty: | |
| # Aggregate data by ward (sum across all age groups and genders) | |
| ward_totals_new = ward_pop_new.groupby('ward_number')['count'].sum().reset_index() | |
| ward_totals_new.columns = ['ward_number', 'total_voters_new'] | |
| ward_totals_new = ward_totals_new.sort_values('ward_number') | |
| # Get male and female counts separately | |
| ward_gender_new = ward_pop_new.groupby(['ward_number', 'gender'])['count'].sum().reset_index() | |
| ward_male_new = ward_gender_new[ward_gender_new['gender'] == 'पुरुष'][['ward_number', 'count']] | |
| ward_male_new.columns = ['ward_number', 'male_voters'] | |
| ward_female_new = ward_gender_new[ward_gender_new['gender'] == 'महिला'][['ward_number', 'count']] | |
| ward_female_new.columns = ['ward_number', 'female_voters'] | |
| # Merge all data | |
| ward_data_combined = ward_totals_new.merge(ward_male_new, on='ward_number', how='left') | |
| ward_data_combined = ward_data_combined.merge(ward_female_new, on='ward_number', how='left') | |
| ward_data_combined = ward_data_combined.fillna(0) | |
| # Get old data for comparison | |
| if not ward_pop_old.empty: | |
| ward_totals_old = ward_pop_old.groupby('ward_number')['count'].sum().reset_index() | |
| ward_totals_old.columns = ['ward_number', 'total_voters_old'] | |
| ward_data_combined = ward_data_combined.merge(ward_totals_old, on='ward_number', how='left') | |
| ward_data_combined['total_voters_old'] = ward_data_combined['total_voters_old'].fillna(0) | |
| ward_data_combined['change'] = ward_data_combined['total_voters_new'] - ward_data_combined['total_voters_old'] | |
| ward_data_combined['change_pct'] = (ward_data_combined['change'] / ward_data_combined['total_voters_old'] * 100).round(2) | |
| ward_data_combined['change_pct'] = ward_data_combined['change_pct'].replace([np.inf, -np.inf], 0) | |
| has_old_data = True | |
| else: | |
| has_old_data = False | |
| # Display summary metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Wards", len(ward_data_combined)) | |
| with col2: | |
| total_population = int(ward_data_combined['total_voters_new'].sum()) | |
| st.metric("Total Population", f"{total_population:,}") | |
| with col3: | |
| avg_population = int(ward_data_combined['total_voters_new'].mean()) | |
| st.metric("Avg per Ward", f"{avg_population:,}") | |
| with col4: | |
| max_ward = ward_data_combined.loc[ward_data_combined['total_voters_new'].idxmax()] | |
| st.metric("Largest Ward", f"Ward {int(max_ward['ward_number'])}") | |
| # Create visualizations with proper location context | |
| # Build full location string for chart titles | |
| location_full = f"{selected_municipality}, {selected_district_age}, {selected_province_age}" | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Bar chart showing total population by ward | |
| ward_data_combined['Ward'] = 'Ward ' + ward_data_combined['ward_number'].astype(int).astype(str) | |
| fig1 = px.bar( | |
| ward_data_combined, | |
| x='Ward', | |
| y='total_voters_new', | |
| title=f"Population by Ward<br><sub>{location_full}</sub>", | |
| labels={'total_voters_new': 'Total Population', 'Ward': 'Ward'}, | |
| color='total_voters_new', | |
| color_continuous_scale='Viridis', | |
| text='total_voters_new' | |
| ) | |
| fig1.update_traces(texttemplate='%{text:,.0f}', textposition='outside') | |
| fig1.update_layout( | |
| yaxis_tickformat=',', | |
| xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]}, | |
| height=450, | |
| showlegend=False, | |
| title_font_size=14 | |
| ) | |
| st.plotly_chart(fig1, use_container_width=True, key="ward_pop_chart") | |
| # Add download button for this chart | |
| try: | |
| img_bytes = fig1.to_image(format="png", width=1200, height=600, scale=2) | |
| st.download_button( | |
| label="📸 Download Chart as PNG", | |
| data=img_bytes, | |
| file_name=f"ward_population_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png", | |
| mime="image/png", | |
| use_container_width=True, | |
| key="download_ward_pop" | |
| ) | |
| except: | |
| st.caption("💡 Install kaleido to enable image downloads: `pip install kaleido`") | |
| with col2: | |
| # Gender distribution by ward | |
| ward_gender_data = ward_data_combined.copy() | |
| ward_gender_data['Male'] = ward_gender_data['male_voters'].astype(int) | |
| ward_gender_data['Female'] = ward_gender_data['female_voters'].astype(int) | |
| fig2 = px.bar( | |
| ward_gender_data, | |
| x='Ward', | |
| y=['Male', 'Female'], | |
| title=f"Gender Distribution by Ward<br><sub>{location_full}</sub>", | |
| labels={'value': 'Population', 'variable': 'Gender'}, | |
| color_discrete_map={'Male': '#3B82F6', 'Female': '#EC4899'}, | |
| barmode='group' | |
| ) | |
| fig2.update_layout( | |
| yaxis_tickformat=',', | |
| xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]}, | |
| height=450, | |
| title_font_size=14 | |
| ) | |
| st.plotly_chart(fig2, use_container_width=True, key="ward_gender_chart") | |
| # Add download button for this chart | |
| try: | |
| img_bytes = fig2.to_image(format="png", width=1200, height=600, scale=2) | |
| st.download_button( | |
| label="📸 Download Chart as PNG", | |
| data=img_bytes, | |
| file_name=f"ward_gender_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png", | |
| mime="image/png", | |
| use_container_width=True, | |
| key="download_ward_gender" | |
| ) | |
| except: | |
| st.caption("💡 Install kaleido to enable image downloads: `pip install kaleido`") | |
| # Show comparison with old data if available | |
| if has_old_data: | |
| st.markdown("**Ward Population Change (Previous vs Current)**") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Comparison bar chart | |
| fig3 = px.bar( | |
| ward_data_combined, | |
| x='Ward', | |
| y=['total_voters_old', 'total_voters_new'], | |
| title=f"Population Comparison by Ward<br><sub>{location_full}</sub>", | |
| labels={'value': 'Population', 'variable': 'Period'}, | |
| color_discrete_map={'total_voters_old': '#F59E0B', 'total_voters_new': '#10B981'}, | |
| barmode='group' | |
| ) | |
| fig3.update_layout( | |
| yaxis_tickformat=',', | |
| xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]}, | |
| height=400, | |
| title_font_size=14 | |
| ) | |
| st.plotly_chart(fig3, use_container_width=True, key="ward_comparison_chart") | |
| # Add download button | |
| try: | |
| img_bytes = fig3.to_image(format="png", width=1200, height=600, scale=2) | |
| st.download_button( | |
| label="📸 Download Chart as PNG", | |
| data=img_bytes, | |
| file_name=f"ward_comparison_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png", | |
| mime="image/png", | |
| use_container_width=True, | |
| key="download_ward_comparison" | |
| ) | |
| except: | |
| st.caption("💡 Install kaleido to enable image downloads") | |
| with col2: | |
| # Growth rate by ward | |
| fig4 = px.bar( | |
| ward_data_combined, | |
| x='Ward', | |
| y='change_pct', | |
| title=f"Growth Rate by Ward (%)<br><sub>{location_full}</sub>", | |
| labels={'change_pct': 'Growth (%)'}, | |
| color='change_pct', | |
| color_continuous_scale='RdYlGn', | |
| color_continuous_midpoint=0, | |
| text='change_pct' | |
| ) | |
| fig4.update_traces(texttemplate='%{text:.1f}%', textposition='outside') | |
| fig4.update_layout( | |
| xaxis={'categoryorder': 'array', 'categoryarray': [f'Ward {i}' for i in sorted(ward_data_combined['ward_number'].astype(int))]}, | |
| height=400, | |
| showlegend=False, | |
| title_font_size=14 | |
| ) | |
| st.plotly_chart(fig4, use_container_width=True, key="ward_growth_chart") | |
| # Add download button | |
| try: | |
| img_bytes = fig4.to_image(format="png", width=1200, height=600, scale=2) | |
| st.download_button( | |
| label="📸 Download Chart as PNG", | |
| data=img_bytes, | |
| file_name=f"ward_growth_{selected_municipality.replace(' ', '_')}_{selected_district_age.replace(' ', '_')}.png", | |
| mime="image/png", | |
| use_container_width=True, | |
| key="download_ward_growth" | |
| ) | |
| except: | |
| st.caption("💡 Install kaleido to enable image downloads") | |
| # Detailed data table | |
| st.markdown("**Ward-wise Detailed Statistics**") | |
| display_cols = ['Ward', 'total_voters_new', 'male_voters', 'female_voters'] | |
| col_names = ['Ward', 'Total Population', 'Male', 'Female'] | |
| if has_old_data: | |
| display_cols.extend(['total_voters_old', 'change', 'change_pct']) | |
| col_names.extend(['Previous Population', 'Change', 'Growth %']) | |
| ward_display = ward_data_combined[display_cols].copy() | |
| ward_display.columns = col_names | |
| # Format the dataframe | |
| format_dict = { | |
| 'Total Population': '{:,.0f}', | |
| 'Male': '{:,.0f}', | |
| 'Female': '{:,.0f}' | |
| } | |
| if has_old_data: | |
| format_dict.update({ | |
| 'Previous Population': '{:,.0f}', | |
| 'Change': '{:+,.0f}', | |
| 'Growth %': '{:+.2f}%' | |
| }) | |
| styled_df = ward_display.style.format(format_dict) | |
| if has_old_data: | |
| styled_df = styled_df.background_gradient(subset=['Growth %'], cmap='RdYlGn') | |
| st.dataframe(styled_df, use_container_width=True, height=400) | |
| # Download option with proper location naming | |
| st.markdown("**📥 Download Complete Ward Data**") | |
| # Create clean filename with location hierarchy | |
| clean_province = selected_province_age.replace(' ', '_').replace('/', '-') | |
| clean_district = selected_district_age.replace(' ', '_').replace('/', '-') | |
| clean_municipality = selected_municipality.replace(' ', '_').replace('/', '-') | |
| base_filename = f"{clean_province}_{clean_district}_{clean_municipality}_ward_data" | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| csv = ward_display.to_csv(index=False, encoding='utf-8-sig') | |
| st.download_button( | |
| label="📥 Download Ward Data as CSV", | |
| data=csv, | |
| file_name=f"{base_filename}.csv", | |
| mime="text/csv", | |
| use_container_width=True, | |
| help=f"Download ward population data for {location_full}" | |
| ) | |
| with col2: | |
| from io import BytesIO | |
| buffer = BytesIO() | |
| with pd.ExcelWriter(buffer, engine='openpyxl') as writer: | |
| # Add location info sheet | |
| ward_display.to_excel(writer, index=False, sheet_name='Ward Population') | |
| # Add metadata sheet with location info | |
| metadata = pd.DataFrame({ | |
| 'Field': ['Province', 'District', 'Municipality', 'Analysis Date', 'Total Wards', 'Total Population'], | |
| 'Value': [ | |
| selected_province_age, | |
| selected_district_age, | |
| selected_municipality, | |
| datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| len(ward_data_combined), | |
| int(ward_data_combined['total_voters_new'].sum()) | |
| ] | |
| }) | |
| metadata.to_excel(writer, index=False, sheet_name='Metadata') | |
| st.download_button( | |
| label="📊 Download as Excel (with Metadata)", | |
| data=buffer.getvalue(), | |
| file_name=f"{base_filename}.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| use_container_width=True, | |
| help=f"Download with location metadata for {location_full}" | |
| ) | |
| else: | |
| st.warning(f"No ward population data available for {selected_municipality}") | |
| else: | |
| st.info("ℹ️ Ward population data not available. Run generate_ward_age_groups.py to generate it.") | |
| st.markdown("---") | |
| # WARD SELECTION FEATURE | |
| if ward_data_available: | |
| st.markdown("---") | |
| st.markdown("### 🏘️ Ward Selection (Optional)") | |
| st.markdown("*Select specific ward(s) to analyze or leave unselected to see entire municipality data*") | |
| # Get wards for selected municipality | |
| ward_filtered = new_ward_age[ | |
| (new_ward_age['municipality_name'] == selected_municipality) & | |
| (new_ward_age['district_name'] == selected_district_age) | |
| ] | |
| if not ward_filtered.empty: | |
| wards_list = sorted(ward_filtered['ward_number'].unique()) | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| selected_wards = st.multiselect( | |
| f"Select Ward(s) from {selected_municipality}", | |
| options=wards_list, | |
| default=[], | |
| help="Select one or multiple wards. Leave empty to show entire municipality.", | |
| key="ward_multiselect" | |
| ) | |
| with col2: | |
| st.metric("Total Wards", len(wards_list)) | |
| if selected_wards: | |
| st.metric("Selected", len(selected_wards)) | |
| # Filter data based on ward selection | |
| if ward_data_available and selected_wards: | |
| # Filter by selected wards | |
| ward_new_filtered = new_ward_age[ | |
| (new_ward_age['municipality_name'] == selected_municipality) & | |
| (new_ward_age['district_name'] == selected_district_age) & | |
| (new_ward_age['ward_number'].isin(selected_wards)) | |
| ] | |
| ward_old_filtered = old_ward_age[ | |
| (old_ward_age['municipality_name'] == selected_municipality) & | |
| (old_ward_age['district_name'] == selected_district_age) & | |
| (old_ward_age['ward_number'].isin(selected_wards)) | |
| ] | |
| muni_new = ward_new_filtered | |
| muni_old = ward_old_filtered | |
| analysis_level = f"Ward(s): {', '.join(map(str, sorted(selected_wards)))}" | |
| else: | |
| # Use municipality level data | |
| muni_new = muni_in_district[muni_in_district['municipality_name'] == selected_municipality] | |
| muni_old = old_muni_in_district[old_muni_in_district['municipality_name'] == selected_municipality] | |
| analysis_level = f"Entire {selected_municipality}" | |
| # Display analysis level | |
| st.info(f"📍 **Analyzing**: {analysis_level}") | |
| # ======================== CASTE GROUP ANALYSIS SECTION ======================== | |
| st.markdown("---") | |
| st.subheader("👥 Caste/Ethnic Group Population Analysis") | |
| st.markdown(f""" | |
| **Caste/Ethnic Group Distribution in {analysis_level}** | |
| This section shows which caste/ethnic groups are present in the selected area and their population distribution. | |
| Surnames are matched against a comprehensive database of Nepali caste/ethnic groups. | |
| """) | |
| # Import caste analysis functions | |
| from age_group_analysis import get_caste_data_for_municipality, get_ward_level_caste_data | |
| # Get municipality ID for caste analysis using a simpler query | |
| conn_caste = sqlite3.connect("voter_db_new.sqlite3") | |
| # First, try to get municipality ID from the age data which already has it | |
| municipality_id_caste = None | |
| if not muni_in_district.empty: | |
| # Try to extract municipality_id from the age data if available | |
| temp_muni = muni_in_district[muni_in_district['municipality_name'] == selected_municipality] | |
| if not temp_muni.empty and 'municipality_id' in temp_muni.columns: | |
| municipality_id_caste = temp_muni.iloc[0]['municipality_id'] | |
| # If not found, query the database | |
| if municipality_id_caste is None: | |
| muni_query_caste = """ | |
| SELECT m.id | |
| FROM municipalities m | |
| WHERE m.name_np = ? | |
| """ | |
| muni_data_caste = pd.read_sql_query(muni_query_caste, conn_caste, | |
| params=(selected_municipality,)) | |
| if not muni_data_caste.empty: | |
| municipality_id_caste = muni_data_caste.iloc[0]['id'] | |
| conn_caste.close() | |
| # ======================== BULK DOWNLOAD ALL WARDS CASTE DATA ======================== | |
| with st.expander("📥 Download All Ward-wise Caste/Ethnic Data", expanded=False): | |
| st.markdown(""" | |
| **Download caste/ethnic population data for ALL wards in this municipality at once.** | |
| This feature generates a comprehensive Excel file with: | |
| - One sheet per ward showing caste/ethnic group distribution | |
| - A summary sheet comparing all wards | |
| - A combined sheet with all data | |
| - **📊 Charts and graphs for visual analysis** | |
| - **❓ Unmatched surnames list** | |
| """) | |
| # Import the all wards function | |
| from age_group_analysis import get_all_wards_caste_data | |
| if st.button("📊 Generate All Wards Caste Data", type="secondary", use_container_width=True, key="generate_all_wards_caste"): | |
| if municipality_id_caste: | |
| with st.spinner("Analyzing caste/ethnic data for all wards... This may take a moment."): | |
| try: | |
| all_wards_data, all_ward_numbers = get_all_wards_caste_data(municipality_id_caste) | |
| if all_wards_data: | |
| # Store in session state for download | |
| st.session_state['all_wards_caste_data'] = all_wards_data | |
| st.session_state['all_ward_numbers'] = all_ward_numbers | |
| st.session_state['all_wards_municipality'] = selected_municipality | |
| st.session_state['all_wards_district'] = selected_district_age | |
| st.session_state['all_wards_province'] = selected_province_age | |
| st.success(f"✅ Data generated for {len(all_ward_numbers)} wards!") | |
| st.rerun() | |
| else: | |
| st.warning("No ward data found for this municipality.") | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") | |
| else: | |
| st.error("Could not identify municipality ID.") | |
| # Show download button if data is available | |
| if 'all_wards_caste_data' in st.session_state and st.session_state.get('all_wards_municipality') == selected_municipality: | |
| all_wards_data = st.session_state['all_wards_caste_data'] | |
| all_ward_numbers = st.session_state['all_ward_numbers'] | |
| # Show summary | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| total_voters_all = sum(wd['total_voters'] for wd in all_wards_data.values()) | |
| st.metric("Total Voters", f"{total_voters_all:,}") | |
| with col2: | |
| st.metric("Total Wards", len(all_ward_numbers)) | |
| with col3: | |
| all_castes = set() | |
| for wd in all_wards_data.values(): | |
| all_castes.update(wd['caste_data'].keys()) | |
| st.metric("Caste Groups Found", len(all_castes)) | |
| with col4: | |
| # Count total unmatched surnames | |
| all_unmatched = {} | |
| for wd in all_wards_data.values(): | |
| for surname, count in wd.get('unmatched_surnames', {}).items(): | |
| all_unmatched[surname] = all_unmatched.get(surname, 0) + count | |
| st.metric("Unmatched Surnames", len(all_unmatched)) | |
| # Create Excel file with multiple sheets and charts | |
| from io import BytesIO | |
| from openpyxl.chart import BarChart, PieChart, Reference | |
| from openpyxl.chart.label import DataLabelList | |
| from openpyxl.utils.dataframe import dataframe_to_rows | |
| buffer = BytesIO() | |
| with pd.ExcelWriter(buffer, engine='openpyxl') as writer: | |
| # 1. Summary Sheet - All wards comparison | |
| summary_data = [] | |
| all_castes_sorted = sorted(all_castes) | |
| for ward_num in sorted(all_ward_numbers): | |
| ward_data = all_wards_data.get(ward_num, {}) | |
| row = {'Ward': f"Ward {ward_num}", 'Total Voters': ward_data.get('total_voters', 0)} | |
| for caste in all_castes_sorted: | |
| row[caste] = ward_data.get('caste_data', {}).get(caste, 0) | |
| summary_data.append(row) | |
| # Add total row | |
| total_row = {'Ward': 'TOTAL', 'Total Voters': total_voters_all} | |
| for caste in all_castes_sorted: | |
| total_row[caste] = sum(all_wards_data.get(w, {}).get('caste_data', {}).get(caste, 0) for w in all_ward_numbers) | |
| summary_data.append(total_row) | |
| summary_df = pd.DataFrame(summary_data) | |
| summary_df.to_excel(writer, index=False, sheet_name='Ward-wise Summary') | |
| # 2. Combined detailed sheet | |
| combined_data = [] | |
| for ward_num in sorted(all_ward_numbers): | |
| ward_data = all_wards_data.get(ward_num, {}) | |
| total = ward_data.get('total_voters', 0) | |
| for caste, count in sorted(ward_data.get('caste_data', {}).items(), key=lambda x: x[1], reverse=True): | |
| pct = round(count / total * 100, 2) if total > 0 else 0 | |
| combined_data.append({ | |
| 'Ward': ward_num, | |
| 'Caste/Ethnic Group': caste, | |
| 'Population': count, | |
| 'Percentage': pct | |
| }) | |
| combined_df = pd.DataFrame(combined_data) | |
| combined_df.to_excel(writer, index=False, sheet_name='All Wards Combined') | |
| # 3. Municipality Total - Caste Summary with Chart | |
| muni_caste_totals = {} | |
| for wd in all_wards_data.values(): | |
| for caste, count in wd.get('caste_data', {}).items(): | |
| muni_caste_totals[caste] = muni_caste_totals.get(caste, 0) + count | |
| muni_caste_df = pd.DataFrame([ | |
| {'S.N.': i+1, 'Caste/Ethnic Group': caste, 'Population': count, | |
| 'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0} | |
| for i, (caste, count) in enumerate(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True)) | |
| ]) | |
| muni_caste_df.to_excel(writer, index=False, sheet_name='Municipality Total') | |
| # Add bar chart to Municipality Total sheet | |
| ws_muni = writer.sheets['Municipality Total'] | |
| # Create bar chart for top 15 castes | |
| chart_data_rows = min(16, len(muni_caste_df) + 1) # +1 for header | |
| if chart_data_rows > 2: | |
| bar_chart = BarChart() | |
| bar_chart.type = "bar" | |
| bar_chart.style = 10 | |
| bar_chart.title = f"Top Caste/Ethnic Groups - {selected_municipality}" | |
| bar_chart.y_axis.title = "Caste/Ethnic Group" | |
| bar_chart.x_axis.title = "Population" | |
| data = Reference(ws_muni, min_col=3, min_row=1, max_row=chart_data_rows, max_col=3) | |
| cats = Reference(ws_muni, min_col=2, min_row=2, max_row=chart_data_rows) | |
| bar_chart.add_data(data, titles_from_data=True) | |
| bar_chart.set_categories(cats) | |
| bar_chart.shape = 4 | |
| bar_chart.width = 20 | |
| bar_chart.height = 12 | |
| ws_muni.add_chart(bar_chart, "F2") | |
| # 4. Individual ward sheets with unmatched surnames detail | |
| # Import SURNAME_TO_CASTE for matching info | |
| from age_group_analysis import SURNAME_TO_CASTE | |
| for ward_num in sorted(all_ward_numbers): | |
| ward_data = all_wards_data.get(ward_num, {}) | |
| total = ward_data.get('total_voters', 0) | |
| if ward_data.get('caste_data'): | |
| ward_rows = [] | |
| for i, (caste, count) in enumerate(sorted(ward_data['caste_data'].items(), key=lambda x: x[1], reverse=True), 1): | |
| pct = round(count / total * 100, 2) if total > 0 else 0 | |
| ward_rows.append({ | |
| 'S.N.': i, | |
| 'Caste/Ethnic Group': caste, | |
| 'Population': count, | |
| 'Percentage': pct | |
| }) | |
| ward_df = pd.DataFrame(ward_rows) | |
| sheet_name = f"Ward {ward_num}"[:31] # Excel sheet name limit | |
| ward_df.to_excel(writer, index=False, sheet_name=sheet_name) | |
| # Get the worksheet to add more content | |
| ws_ward = writer.sheets[sheet_name] | |
| # Track the next available row | |
| next_row = len(ward_rows) + 4 # Leave 2 empty rows after caste data | |
| # ===== ALL SURNAMES SECTION ===== | |
| ward_all_surnames = ward_data.get('surname_counts', {}) | |
| if ward_all_surnames: | |
| # Add section header for ALL surnames | |
| ws_ward.cell(row=next_row, column=1, value="📝 All Surnames (Matched & Unmatched)") | |
| # Add column headers | |
| ws_ward.cell(row=next_row + 1, column=1, value="S.N.") | |
| ws_ward.cell(row=next_row + 1, column=2, value="Surname") | |
| ws_ward.cell(row=next_row + 1, column=3, value="Count") | |
| ws_ward.cell(row=next_row + 1, column=4, value="Percentage") | |
| ws_ward.cell(row=next_row + 1, column=5, value="Matched Caste") | |
| # Add all surnames data sorted by count | |
| sorted_all_surnames = sorted(ward_all_surnames.items(), key=lambda x: x[1], reverse=True) | |
| for idx, (surname, count) in enumerate(sorted_all_surnames, 1): | |
| pct = round(count / total * 100, 2) if total > 0 else 0 | |
| matched_caste = SURNAME_TO_CASTE.get(surname, '❌ NOT MATCHED') | |
| ws_ward.cell(row=next_row + 1 + idx, column=1, value=idx) | |
| ws_ward.cell(row=next_row + 1 + idx, column=2, value=surname) | |
| ws_ward.cell(row=next_row + 1 + idx, column=3, value=count) | |
| ws_ward.cell(row=next_row + 1 + idx, column=4, value=f"{pct}%") | |
| ws_ward.cell(row=next_row + 1 + idx, column=5, value=matched_caste) | |
| # Update next_row for unmatched section | |
| next_row = next_row + len(sorted_all_surnames) + 4 | |
| # ===== UNMATCHED SURNAMES SECTION ===== | |
| ward_unmatched = ward_data.get('unmatched_surnames', {}) | |
| if ward_unmatched: | |
| # Add section header | |
| ws_ward.cell(row=next_row, column=1, value="❓ Unmatched Surnames Only (अन्य/अज्ञात)") | |
| # Add column headers | |
| ws_ward.cell(row=next_row + 1, column=1, value="S.N.") | |
| ws_ward.cell(row=next_row + 1, column=2, value="Surname") | |
| ws_ward.cell(row=next_row + 1, column=3, value="Count") | |
| ws_ward.cell(row=next_row + 1, column=4, value="Percentage") | |
| # Add unmatched surnames data sorted by count | |
| sorted_unmatched = sorted(ward_unmatched.items(), key=lambda x: x[1], reverse=True) | |
| for idx, (surname, count) in enumerate(sorted_unmatched, 1): | |
| pct = round(count / total * 100, 2) if total > 0 else 0 | |
| ws_ward.cell(row=next_row + 1 + idx, column=1, value=idx) | |
| ws_ward.cell(row=next_row + 1 + idx, column=2, value=surname) | |
| ws_ward.cell(row=next_row + 1 + idx, column=3, value=count) | |
| ws_ward.cell(row=next_row + 1 + idx, column=4, value=f"{pct}%") | |
| # Add summary of unmatched | |
| total_unmatched = sum(ward_unmatched.values()) | |
| summary_row = next_row + len(sorted_unmatched) + 3 | |
| ws_ward.cell(row=summary_row, column=1, value="Total Unmatched:") | |
| ws_ward.cell(row=summary_row, column=2, value=f"{len(ward_unmatched)} surnames") | |
| ws_ward.cell(row=summary_row, column=3, value=total_unmatched) | |
| ws_ward.cell(row=summary_row, column=4, value=f"{round(total_unmatched/total*100, 2) if total > 0 else 0}%") | |
| # Add pie chart to each ward sheet (for top 10) | |
| chart_rows = min(11, len(ward_rows) + 1) | |
| if chart_rows > 2: | |
| pie_chart = PieChart() | |
| pie_chart.title = f"Ward {ward_num} - Caste Distribution" | |
| data = Reference(ws_ward, min_col=3, min_row=1, max_row=chart_rows) | |
| cats = Reference(ws_ward, min_col=2, min_row=2, max_row=chart_rows) | |
| pie_chart.add_data(data, titles_from_data=True) | |
| pie_chart.set_categories(cats) | |
| pie_chart.width = 14 | |
| pie_chart.height = 10 | |
| ws_ward.add_chart(pie_chart, "F2") | |
| # 5. Unmatched Surnames Sheet | |
| all_unmatched_combined = {} | |
| ward_unmatched_detail = [] | |
| for ward_num in sorted(all_ward_numbers): | |
| ward_data = all_wards_data.get(ward_num, {}) | |
| unmatched = ward_data.get('unmatched_surnames', {}) | |
| for surname, count in unmatched.items(): | |
| all_unmatched_combined[surname] = all_unmatched_combined.get(surname, 0) + count | |
| ward_unmatched_detail.append({ | |
| 'Ward': ward_num, | |
| 'Unmatched Surname': surname, | |
| 'Count': count | |
| }) | |
| # Summary of unmatched surnames | |
| if all_unmatched_combined: | |
| unmatched_summary_df = pd.DataFrame([ | |
| {'S.N.': i+1, 'Unmatched Surname': surname, 'Total Count': count, | |
| 'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0} | |
| for i, (surname, count) in enumerate(sorted(all_unmatched_combined.items(), key=lambda x: x[1], reverse=True)) | |
| ]) | |
| unmatched_summary_df.to_excel(writer, index=False, sheet_name='Unmatched Surnames') | |
| # Add chart for top unmatched surnames | |
| ws_unmatched = writer.sheets['Unmatched Surnames'] | |
| chart_rows = min(21, len(unmatched_summary_df) + 1) | |
| if chart_rows > 2: | |
| bar_chart_unmatched = BarChart() | |
| bar_chart_unmatched.type = "bar" | |
| bar_chart_unmatched.style = 11 | |
| bar_chart_unmatched.title = "Top Unmatched Surnames" | |
| bar_chart_unmatched.y_axis.title = "Surname" | |
| bar_chart_unmatched.x_axis.title = "Count" | |
| data = Reference(ws_unmatched, min_col=3, min_row=1, max_row=chart_rows, max_col=3) | |
| cats = Reference(ws_unmatched, min_col=2, min_row=2, max_row=chart_rows) | |
| bar_chart_unmatched.add_data(data, titles_from_data=True) | |
| bar_chart_unmatched.set_categories(cats) | |
| bar_chart_unmatched.width = 18 | |
| bar_chart_unmatched.height = 12 | |
| ws_unmatched.add_chart(bar_chart_unmatched, "F2") | |
| # Ward-wise unmatched detail | |
| if ward_unmatched_detail: | |
| ward_unmatched_df = pd.DataFrame(ward_unmatched_detail) | |
| ward_unmatched_df.to_excel(writer, index=False, sheet_name='Unmatched by Ward') | |
| # 6. All Surnames Sheet (matched and unmatched) | |
| all_surnames_combined = {} | |
| for ward_num in sorted(all_ward_numbers): | |
| ward_data = all_wards_data.get(ward_num, {}) | |
| for surname, count in ward_data.get('surname_counts', {}).items(): | |
| all_surnames_combined[surname] = all_surnames_combined.get(surname, 0) + count | |
| if all_surnames_combined: | |
| # Import SURNAME_TO_CASTE for matching | |
| from age_group_analysis import SURNAME_TO_CASTE | |
| all_surnames_df = pd.DataFrame([ | |
| { | |
| 'S.N.': i+1, | |
| 'Surname': surname, | |
| 'Count': count, | |
| 'Percentage': round(count/total_voters_all*100, 2) if total_voters_all > 0 else 0, | |
| 'Matched Caste': SURNAME_TO_CASTE.get(surname, 'NOT MATCHED') | |
| } | |
| for i, (surname, count) in enumerate(sorted(all_surnames_combined.items(), key=lambda x: x[1], reverse=True)) | |
| ]) | |
| all_surnames_df.to_excel(writer, index=False, sheet_name='All Surnames') | |
| # 7. Metadata sheet | |
| total_unmatched_voters = sum(all_unmatched_combined.values()) if all_unmatched_combined else 0 | |
| metadata = pd.DataFrame({ | |
| 'Field': ['Province', 'District', 'Municipality', 'Total Wards', 'Total Voters', | |
| 'Total Caste Groups', 'Total Unique Surnames', 'Unmatched Surnames Count', | |
| 'Unmatched Voters Count', 'Match Rate (%)', 'Generated On'], | |
| 'Value': [ | |
| selected_province_age, | |
| selected_district_age, | |
| selected_municipality, | |
| len(all_ward_numbers), | |
| total_voters_all, | |
| len(all_castes), | |
| len(all_surnames_combined) if all_surnames_combined else 0, | |
| len(all_unmatched_combined), | |
| total_unmatched_voters, | |
| round((total_voters_all - total_unmatched_voters) / total_voters_all * 100, 2) if total_voters_all > 0 else 0, | |
| datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| ] | |
| }) | |
| metadata.to_excel(writer, index=False, sheet_name='Metadata') | |
| # Download button | |
| filename = f"{selected_municipality.replace(' ', '_')}_all_wards_caste_data.xlsx" | |
| st.download_button( | |
| label="📥 Download All Wards Caste Data (Excel with Charts)", | |
| data=buffer.getvalue(), | |
| file_name=filename, | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| use_container_width=True | |
| ) | |
| # Show preview charts in Streamlit | |
| st.markdown("---") | |
| st.markdown("**📊 Preview Charts**") | |
| # Municipality total caste distribution chart | |
| muni_caste_totals = {} | |
| for wd in all_wards_data.values(): | |
| for caste, count in wd.get('caste_data', {}).items(): | |
| muni_caste_totals[caste] = muni_caste_totals.get(caste, 0) + count | |
| top_castes = dict(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True)[:15]) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| fig_bar = px.bar( | |
| x=list(top_castes.values()), | |
| y=list(top_castes.keys()), | |
| orientation='h', | |
| title=f"Top 15 Caste/Ethnic Groups - {selected_municipality}", | |
| labels={'x': 'Population', 'y': 'Caste/Ethnic Group'} | |
| ) | |
| fig_bar.update_layout(yaxis={'categoryorder': 'total ascending'}, height=500) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| with col2: | |
| # Pie chart for top 10 | |
| top_10 = dict(sorted(muni_caste_totals.items(), key=lambda x: x[1], reverse=True)[:10]) | |
| fig_pie = px.pie( | |
| values=list(top_10.values()), | |
| names=list(top_10.keys()), | |
| title=f"Caste Distribution (Top 10) - {selected_municipality}" | |
| ) | |
| fig_pie.update_layout(height=500) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| # Show unmatched surnames preview | |
| if all_unmatched: | |
| st.markdown("**❓ Top Unmatched Surnames (Preview)**") | |
| top_unmatched = dict(sorted(all_unmatched.items(), key=lambda x: x[1], reverse=True)[:10]) | |
| unmatched_preview_df = pd.DataFrame([ | |
| {'Surname': s, 'Count': c} for s, c in top_unmatched.items() | |
| ]) | |
| st.dataframe(unmatched_preview_df, use_container_width=True, hide_index=True) | |
| # Also show a preview table | |
| st.markdown("**Preview: Ward-wise Caste Distribution Summary**") | |
| preview_df = summary_df.head(10) | |
| st.dataframe(preview_df, use_container_width=True) | |
| st.markdown("---") | |
| # Analyze button | |
| if st.button("🔍 Analyze Caste/Ethnic Distribution", type="primary", use_container_width=True): | |
| with st.spinner("Analyzing caste/ethnic group distribution..."): | |
| if municipality_id_caste: | |
| try: | |
| # Clear any previous cache to ensure fresh data | |
| if 'caste_analysis_data_local' in st.session_state: | |
| del st.session_state['caste_analysis_data_local'] | |
| if ward_data_available and selected_wards: | |
| # Ward-specific analysis - convert to list of ints | |
| ward_list = [int(w) for w in selected_wards] | |
| st.info(f"Querying municipality {municipality_id_caste} with wards {ward_list}") | |
| caste_data, total_analyzed, surname_counts, unmatched_surnames = get_ward_level_caste_data(municipality_id_caste, ward_list) | |
| else: | |
| # Full municipality analysis | |
| st.info(f"Querying entire municipality {municipality_id_caste}") | |
| caste_data, total_analyzed, surname_counts, unmatched_surnames = get_caste_data_for_municipality(municipality_id_caste) | |
| if caste_data and total_analyzed > 0: | |
| # Store in session state | |
| st.session_state['caste_analysis_data_local'] = caste_data | |
| st.session_state['caste_analysis_total_local'] = total_analyzed | |
| st.session_state['caste_analysis_scope_local'] = analysis_level | |
| st.session_state['surname_counts_local'] = surname_counts | |
| st.session_state['unmatched_surnames_local'] = unmatched_surnames | |
| st.success(f"✅ Analysis complete! Found {total_analyzed:,} voters across {len(caste_data)} caste/ethnic groups") | |
| st.rerun() | |
| else: | |
| st.warning(f"⚠️ Query returned {total_analyzed} voters and {len(caste_data) if caste_data else 0} caste groups.") | |
| except Exception as e: | |
| st.error(f"Error during analysis: {str(e)}") | |
| import traceback | |
| st.code(traceback.format_exc()) | |
| else: | |
| st.error(f"❌ Could not identify municipality ID for: {selected_municipality}") | |
| # Display caste analysis results if available | |
| if 'caste_analysis_data_local' in st.session_state and st.session_state['caste_analysis_data_local']: | |
| caste_data = st.session_state['caste_analysis_data_local'] | |
| total_analyzed = st.session_state['caste_analysis_total_local'] | |
| analysis_scope_caste = st.session_state.get('caste_analysis_scope_local', 'Selected Area') | |
| st.markdown("---") | |
| st.markdown(f"### 📊 Caste/Ethnic Group Distribution Results") | |
| st.info(f"**Analysis Scope:** {analysis_scope_caste} | **Total Voters Analyzed:** {total_analyzed:,}") | |
| # Convert to DataFrame for visualization | |
| caste_df = pd.DataFrame([ | |
| {'Caste/Ethnic Group': caste, 'Population': count, 'Percentage': round(count/total_analyzed*100, 2)} | |
| for caste, count in sorted(caste_data.items(), key=lambda x: x[1], reverse=True) | |
| ]) | |
| # Display summary metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Groups", len(caste_data)) | |
| with col2: | |
| largest_group = caste_df.iloc[0] | |
| group_name = largest_group['Caste/Ethnic Group'].split('(')[0].strip() | |
| st.metric("Largest Group", group_name[:20] + "..." if len(group_name) > 20 else group_name) | |
| with col3: | |
| st.metric("Population", f"{largest_group['Population']:,}") | |
| with col4: | |
| st.metric("Share", f"{largest_group['Percentage']:.1f}%") | |
| # Visualizations in tabs | |
| tab1, tab2, tab3, tab4, tab5 = st.tabs(["📊 Bar Chart", "🥧 Pie Chart", "📋 Data Table", "📝 All Surnames", "❓ Unmatched Surnames"]) | |
| with tab1: | |
| # Bar chart - top groups | |
| top_n = min(20, len(caste_df)) | |
| fig = px.bar( | |
| caste_df.head(top_n), | |
| x='Population', | |
| y='Caste/Ethnic Group', | |
| orientation='h', | |
| title=f"Top {top_n} Caste/Ethnic Groups by Population", | |
| color='Population', | |
| color_continuous_scale='Viridis', | |
| text='Population' | |
| ) | |
| fig.update_traces(texttemplate='%{text:,}', textposition='outside') | |
| fig.update_layout( | |
| yaxis={'categoryorder':'total ascending'}, | |
| height=max(400, top_n * 25), | |
| showlegend=False | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tab2: | |
| # Pie chart - top 10 + others | |
| if len(caste_df) > 10: | |
| top_10 = caste_df.head(10).copy() | |
| others_count = caste_df.iloc[10:]['Population'].sum() | |
| others_row = pd.DataFrame([{ | |
| 'Caste/Ethnic Group': 'Others (Combined)', | |
| 'Population': others_count, | |
| 'Percentage': round(others_count/total_analyzed*100, 2) | |
| }]) | |
| pie_data = pd.concat([top_10, others_row], ignore_index=True) | |
| else: | |
| pie_data = caste_df | |
| fig = px.pie( | |
| pie_data, | |
| values='Population', | |
| names='Caste/Ethnic Group', | |
| title="Caste/Ethnic Group Distribution", | |
| color_discrete_sequence=px.colors.qualitative.Set3 | |
| ) | |
| fig.update_traces(textposition='inside', textinfo='percent+label') | |
| fig.update_layout(height=600) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tab3: | |
| # Detailed table with serial numbers | |
| caste_df_display = caste_df.copy() | |
| caste_df_display.insert(0, 'S.N.', range(1, len(caste_df_display) + 1)) | |
| st.markdown(f"**Complete List - Serially Ordered by Population**") | |
| st.dataframe( | |
| caste_df_display.style.format({ | |
| 'Population': '{:,}', | |
| 'Percentage': '{:.2f}%' | |
| }).background_gradient(subset=['Population'], cmap='YlOrRd'), | |
| use_container_width=True, | |
| height=500 | |
| ) | |
| # Download buttons | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| csv = caste_df_display.to_csv(index=False, encoding='utf-8-sig') | |
| st.download_button( | |
| label="📥 Download as CSV", | |
| data=csv, | |
| file_name=f"caste_distribution_{selected_municipality.replace(' ', '_')}.csv", | |
| mime="text/csv", | |
| use_container_width=True | |
| ) | |
| with col2: | |
| from io import BytesIO | |
| buffer = BytesIO() | |
| with pd.ExcelWriter(buffer, engine='openpyxl') as writer: | |
| caste_df_display.to_excel(writer, index=False, sheet_name='Caste Distribution') | |
| st.download_button( | |
| label="📊 Download as Excel", | |
| data=buffer.getvalue(), | |
| file_name=f"caste_distribution_{selected_municipality.replace(' ', '_')}.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| use_container_width=True | |
| ) | |
| with tab4: | |
| # All surnames found | |
| st.markdown("**All Surnames Found in the Data**") | |
| st.info("This shows every surname detected, sorted by frequency") | |
| if 'surname_counts_local' in st.session_state: | |
| surname_counts = st.session_state['surname_counts_local'] | |
| surname_df = pd.DataFrame([ | |
| {'S.N.': i+1, 'Surname': surname, 'Count': count, 'Percentage': round(count/total_analyzed*100, 2)} | |
| for i, (surname, count) in enumerate(sorted(surname_counts.items(), key=lambda x: x[1], reverse=True)) | |
| ]) | |
| st.metric("Total Unique Surnames", len(surname_df)) | |
| st.dataframe( | |
| surname_df.style.format({ | |
| 'Count': '{:,}', | |
| 'Percentage': '{:.2f}%' | |
| }).background_gradient(subset=['Count'], cmap='Blues'), | |
| use_container_width=True, | |
| height=500 | |
| ) | |
| # Download buttons | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| csv_surnames = surname_df.to_csv(index=False, encoding='utf-8-sig') | |
| st.download_button( | |
| label="📥 Download as CSV", | |
| data=csv_surnames, | |
| file_name=f"all_surnames_{selected_municipality.replace(' ', '_')}.csv", | |
| mime="text/csv", | |
| use_container_width=True | |
| ) | |
| with col2: | |
| from io import BytesIO | |
| buffer = BytesIO() | |
| with pd.ExcelWriter(buffer, engine='openpyxl') as writer: | |
| surname_df.to_excel(writer, index=False, sheet_name='All Surnames') | |
| st.download_button( | |
| label="📊 Download as Excel", | |
| data=buffer.getvalue(), | |
| file_name=f"all_surnames_{selected_municipality.replace(' ', '_')}.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| use_container_width=True | |
| ) | |
| with tab5: | |
| # Unmatched surnames | |
| st.markdown("**Surnames NOT in Caste Database**") | |
| st.warning("These surnames were found but don't match any caste/ethnic group in the database") | |
| if 'unmatched_surnames_local' in st.session_state: | |
| unmatched = st.session_state['unmatched_surnames_local'] | |
| if unmatched: | |
| unmatched_df = pd.DataFrame([ | |
| {'S.N.': i+1, 'Surname': surname, 'Count': count, 'Percentage': round(count/total_analyzed*100, 2)} | |
| for i, (surname, count) in enumerate(sorted(unmatched.items(), key=lambda x: x[1], reverse=True)) | |
| ]) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Unmatched Surnames", len(unmatched_df)) | |
| with col2: | |
| total_unmatched = unmatched_df['Count'].sum() | |
| st.metric("Total Voters", f"{total_unmatched:,}") | |
| with col3: | |
| unmatched_pct = (total_unmatched / total_analyzed * 100) | |
| st.metric("% of Total", f"{unmatched_pct:.1f}%") | |
| st.markdown("**Top 50 Unmatched Surnames**") | |
| st.dataframe( | |
| unmatched_df.head(50).style.format({ | |
| 'Count': '{:,}', | |
| 'Percentage': '{:.2f}%' | |
| }).background_gradient(subset=['Count'], cmap='Oranges'), | |
| use_container_width=True, | |
| height=500 | |
| ) | |
| # Download buttons | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| csv_unmatched = unmatched_df.to_csv(index=False, encoding='utf-8-sig') | |
| st.download_button( | |
| label="📥 Download as CSV", | |
| data=csv_unmatched, | |
| file_name=f"unmatched_surnames_{selected_municipality.replace(' ', '_')}.csv", | |
| mime="text/csv", | |
| use_container_width=True | |
| ) | |
| with col2: | |
| from io import BytesIO | |
| buffer = BytesIO() | |
| with pd.ExcelWriter(buffer, engine='openpyxl') as writer: | |
| unmatched_df.to_excel(writer, index=False, sheet_name='Unmatched Surnames') | |
| st.download_button( | |
| label="📊 Download as Excel", | |
| data=buffer.getvalue(), | |
| file_name=f"unmatched_surnames_{selected_municipality.replace(' ', '_')}.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| use_container_width=True | |
| ) | |
| else: | |
| st.success("✅ All surnames matched successfully!") | |
| st.markdown("---") | |
| st.subheader("📊 Age Group Analysis") | |
| if not muni_new.empty: | |
| # Calculate metrics | |
| total_new = muni_new['count'].sum() | |
| total_old = muni_old['count'].sum() | |
| change = total_new - total_old | |
| change_pct = (change / total_old * 100) if total_old > 0 else 0 | |
| # Metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Voters (Current)", f"{total_new:,}", f"{change:+,}") | |
| with col2: | |
| male_new = muni_new[muni_new['gender'] == 'पुरुष']['count'].sum() | |
| st.metric("Male Voters", f"{male_new:,}") | |
| with col3: | |
| female_new = muni_new[muni_new['gender'] == 'महिला']['count'].sum() | |
| st.metric("Female Voters", f"{female_new:,}") | |
| with col4: | |
| st.metric("Growth Rate", f"{change_pct:+.2f}%") | |
| # Visualizations | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Age Distribution (Current)**") | |
| # Group by age | |
| age_grouped = muni_new.groupby('age_group')['count'].sum().reset_index() | |
| title_text = f"Age Distribution in {selected_municipality}" | |
| if ward_data_available and selected_wards: | |
| if len(selected_wards) == 1: | |
| title_text = f"Age Distribution - Ward {selected_wards[0]}" | |
| else: | |
| title_text = f"Age Distribution - {len(selected_wards)} Wards" | |
| fig = px.pie( | |
| age_grouped, | |
| values='count', | |
| names='age_group', | |
| title=title_text, | |
| color_discrete_sequence=px.colors.qualitative.Set3 | |
| ) | |
| fig.update_traces(textposition='inside', textinfo='percent+label') | |
| fig.update_layout(height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| st.markdown("**Gender Distribution by Age**") | |
| gender_data = muni_new[muni_new['gender'].isin(['पुरुष', 'महिला'])] | |
| gender_grouped = gender_data.groupby(['age_group', 'gender'])['count'].sum().reset_index() | |
| title_text_gender = f"Gender by Age in {selected_municipality}" | |
| if ward_data_available and selected_wards: | |
| if len(selected_wards) == 1: | |
| title_text_gender = f"Gender by Age - Ward {selected_wards[0]}" | |
| else: | |
| title_text_gender = f"Gender by Age - {len(selected_wards)} Wards" | |
| fig = px.bar( | |
| gender_grouped, | |
| x='age_group', | |
| y='count', | |
| color='gender', | |
| title=title_text_gender, | |
| color_discrete_map={'पुरुष': '#3B82F6', 'महिला': '#EC4899'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Comparison with previous election | |
| st.markdown("**📊 Age Group Comparison: Previous vs Current**") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Old vs New comparison | |
| age_old = muni_old.groupby('age_group')['count'].sum().reset_index() | |
| age_old.columns = ['age_group', 'old_count'] | |
| age_new = muni_new.groupby('age_group')['count'].sum().reset_index() | |
| age_new.columns = ['age_group', 'new_count'] | |
| age_comparison = pd.merge(age_old, age_new, on='age_group', how='outer').fillna(0) | |
| age_comparison['change'] = age_comparison['new_count'] - age_comparison['old_count'] | |
| age_comparison['change_pct'] = (age_comparison['change'] / age_comparison['old_count'] * 100).round(2) | |
| age_comparison['change_pct'] = age_comparison['change_pct'].replace([np.inf, -np.inf], 0) | |
| fig = px.bar( | |
| age_comparison, | |
| x='age_group', | |
| y=['old_count', 'new_count'], | |
| barmode='group', | |
| title="Voter Count Comparison", | |
| labels={'value': 'Voters', 'variable': 'Period'}, | |
| color_discrete_map={'old_count': '#F59E0B', 'new_count': '#10B981'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Growth rate by age group | |
| fig = px.bar( | |
| age_comparison, | |
| x='age_group', | |
| y='change_pct', | |
| title="Growth Rate by Age Group (%)", | |
| color='change_pct', | |
| color_continuous_scale='RdYlGn', | |
| labels={'age_group': 'Age Group', 'change_pct': 'Growth (%)'}, | |
| color_continuous_midpoint=0 | |
| ) | |
| fig.update_layout(height=350) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Comparison table | |
| st.markdown("**Age Group Statistics Table**") | |
| age_comparison_display = age_comparison[['age_group', 'old_count', 'new_count', 'change', 'change_pct']].copy() | |
| age_comparison_display.columns = ['Age Group', 'Previous', 'Current', 'Change', 'Growth %'] | |
| st.dataframe( | |
| age_comparison_display.style.format({ | |
| 'Previous': '{:,.0f}', | |
| 'Current': '{:,.0f}', | |
| 'Change': '{:+,.0f}', | |
| 'Growth %': '{:+.2f}' | |
| }), | |
| use_container_width=True | |
| ) | |
| # Ward-by-ward comparison if multiple wards selected | |
| if ward_data_available and selected_wards and len(selected_wards) > 1: | |
| st.markdown("---") | |
| st.markdown("**📊 Ward-by-Ward Comparison**") | |
| # Prepare ward comparison data | |
| ward_comparison_list = [] | |
| for ward_num in selected_wards: | |
| ward_data_new = ward_new_filtered[ward_new_filtered['ward_number'] == ward_num] | |
| ward_data_old = ward_old_filtered[ward_old_filtered['ward_number'] == ward_num] | |
| total_new_ward = ward_data_new['count'].sum() | |
| total_old_ward = ward_data_old['count'].sum() | |
| change_ward = total_new_ward - total_old_ward | |
| change_pct_ward = (change_ward / total_old_ward * 100) if total_old_ward > 0 else 0 | |
| male_ward = ward_data_new[ward_data_new['gender'] == 'पुरुष']['count'].sum() | |
| female_ward = ward_data_new[ward_data_new['gender'] == 'महिला']['count'].sum() | |
| ward_comparison_list.append({ | |
| 'Ward': f"Ward {ward_num}", | |
| 'Previous': total_old_ward, | |
| 'Current': total_new_ward, | |
| 'Change': change_ward, | |
| 'Growth %': change_pct_ward, | |
| 'Male': male_ward, | |
| 'Female': female_ward | |
| }) | |
| ward_comparison_df = pd.DataFrame(ward_comparison_list) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Ward comparison bar chart | |
| fig = px.bar( | |
| ward_comparison_df, | |
| x='Ward', | |
| y='Current', | |
| title="Current Voters by Ward", | |
| color='Current', | |
| color_continuous_scale='Viridis' | |
| ) | |
| fig.update_layout(yaxis_tickformat=',', showlegend=False) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Growth rate comparison | |
| fig = px.bar( | |
| ward_comparison_df, | |
| x='Ward', | |
| y='Growth %', | |
| title="Growth Rate by Ward", | |
| color='Growth %', | |
| color_continuous_scale='RdYlGn', | |
| color_continuous_midpoint=0 | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Ward comparison table | |
| st.dataframe( | |
| ward_comparison_df.style.format({ | |
| 'Previous': '{:,.0f}', | |
| 'Current': '{:,.0f}', | |
| 'Change': '{:+,.0f}', | |
| 'Growth %': '{:+.2f}%', | |
| 'Male': '{:,.0f}', | |
| 'Female': '{:,.0f}' | |
| }).background_gradient(subset=['Growth %'], cmap='RdYlGn'), | |
| use_container_width=True | |
| ) | |
| else: | |
| st.warning(f"No age group data available for {selected_municipality}") | |
| def show_comparative_study(old_province, new_province, old_district, new_district, | |
| old_municipality, new_municipality, old_age, new_age): | |
| """Comprehensive comparative study.""" | |
| st.header("📈 Comparative Study: Old vs Current Voter Data") | |
| # Province comparison | |
| prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name') | |
| # Summary statistics | |
| st.subheader("📊 Summary Statistics") | |
| col1, col2, col3 = st.columns(3) | |
| old_total = old_province['total_voters'].sum() | |
| new_total = new_province['total_voters'].sum() | |
| with col1: | |
| st.markdown("### Previous Election") | |
| st.write(f"**Total Voters:** {old_total:,}") | |
| st.write(f"**Male:** {old_province['male_voters'].sum():,}") | |
| st.write(f"**Female:** {old_province['female_voters'].sum():,}") | |
| st.write(f"**Avg Age:** {old_province['avg_age'].mean():.1f}") | |
| with col2: | |
| st.markdown("### Current Election") | |
| st.write(f"**Total Voters:** {new_total:,}") | |
| st.write(f"**Male:** {new_province['male_voters'].sum():,}") | |
| st.write(f"**Female:** {new_province['female_voters'].sum():,}") | |
| st.write(f"**Avg Age:** {new_province['avg_age'].mean():.1f}") | |
| with col3: | |
| st.markdown("### Change Analysis") | |
| change = new_total - old_total | |
| change_pct = (change / old_total) * 100 | |
| st.write(f"**Net Change:** {change:+,}") | |
| st.write(f"**Growth Rate:** {change_pct:+.2f}%") | |
| st.write(f"**Male Change:** {new_province['male_voters'].sum() - old_province['male_voters'].sum():+,}") | |
| st.write(f"**Female Change:** {new_province['female_voters'].sum() - old_province['female_voters'].sum():+,}") | |
| st.markdown("---") | |
| # Visual comparisons | |
| st.subheader("📉 Visual Comparison Analysis") | |
| tab1, tab2, tab3, tab4 = st.tabs(["Province Level", "Gender Analysis", "Age Analysis", "Growth Trends"]) | |
| with tab1: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Side by side comparison | |
| fig = make_subplots(rows=1, cols=2, subplot_titles=("Previous Election", "Current Election"), | |
| specs=[[{"type": "pie"}, {"type": "pie"}]]) | |
| fig.add_trace( | |
| go.Pie(labels=old_province['province_name'], values=old_province['total_voters'], name="Previous"), | |
| row=1, col=1 | |
| ) | |
| fig.add_trace( | |
| go.Pie(labels=new_province['province_name'], values=new_province['total_voters'], name="Current"), | |
| row=1, col=2 | |
| ) | |
| fig.update_layout(title_text="Province Distribution Comparison") | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Growth by province | |
| fig = px.bar( | |
| prov_comparison, | |
| x='province_name', | |
| y=['total_voters_old', 'total_voters_new'], | |
| barmode='group', | |
| title="Province-wise Voter Count Comparison", | |
| labels={'value': 'Voters', 'variable': 'Period'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tab2: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Gender ratio comparison | |
| gender_comparison = pd.DataFrame({ | |
| 'Province': prov_comparison['province_name'].tolist() * 2, | |
| 'Gender': ['Male'] * len(prov_comparison) + ['Female'] * len(prov_comparison), | |
| 'Previous': prov_comparison['male_voters_old'].tolist() + prov_comparison['female_voters_old'].tolist(), | |
| 'Current': prov_comparison['male_voters_new'].tolist() + prov_comparison['female_voters_new'].tolist() | |
| }) | |
| fig = px.sunburst( | |
| gender_comparison, | |
| path=['Gender', 'Province'], | |
| values='Current', | |
| title="Gender Distribution by Province (Current)" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Gender ratio change | |
| prov_comparison['male_ratio_old'] = prov_comparison['male_voters_old'] / prov_comparison['total_voters_old'] * 100 | |
| prov_comparison['male_ratio_new'] = prov_comparison['male_voters_new'] / prov_comparison['total_voters_new'] * 100 | |
| prov_comparison['ratio_change'] = prov_comparison['male_ratio_new'] - prov_comparison['male_ratio_old'] | |
| fig = px.bar( | |
| prov_comparison, | |
| x='province_name', | |
| y='ratio_change', | |
| color='ratio_change', | |
| color_continuous_scale='RdYlGn', | |
| title="Male Ratio Change by Province (%)" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tab3: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Age group comparison | |
| old_age_grouped = old_age.groupby('age_group')['count'].sum().reset_index() | |
| old_age_grouped['period'] = 'Previous' | |
| new_age_grouped = new_age.groupby('age_group')['count'].sum().reset_index() | |
| new_age_grouped['period'] = 'Current' | |
| age_combined = pd.concat([old_age_grouped, new_age_grouped]) | |
| fig = px.bar( | |
| age_combined, | |
| x='age_group', | |
| y='count', | |
| color='period', | |
| barmode='group', | |
| title="Age Distribution Comparison", | |
| color_discrete_map={'Previous': '#3B82F6', 'Current': '#10B981'} | |
| ) | |
| fig.update_layout(yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Age change analysis | |
| age_change = pd.merge( | |
| old_age_grouped[['age_group', 'count']], | |
| new_age_grouped[['age_group', 'count']], | |
| on='age_group', | |
| suffixes=('_old', '_new') | |
| ) | |
| age_change['change'] = age_change['count_new'] - age_change['count_old'] | |
| age_change['change_pct'] = (age_change['change'] / age_change['count_old'] * 100).round(2) | |
| fig = px.bar( | |
| age_change, | |
| x='age_group', | |
| y='change_pct', | |
| color='change_pct', | |
| color_continuous_scale='RdYlGn', | |
| title="Voter Change by Age Group (%)" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with tab4: | |
| # Growth analysis | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| fig = px.scatter( | |
| prov_comparison, | |
| x='total_voters_old', | |
| y='total_voters_new', | |
| size='voter_change', | |
| color='voter_change_pct', | |
| hover_name='province_name', | |
| title="Voter Growth Scatter Plot", | |
| labels={'total_voters_old': 'Previous Voters', 'total_voters_new': 'Current Voters'}, | |
| color_continuous_scale='RdYlGn' | |
| ) | |
| # Add diagonal line (no change line) | |
| max_val = max(prov_comparison['total_voters_new'].max(), prov_comparison['total_voters_old'].max()) | |
| fig.add_trace(go.Scatter(x=[0, max_val], y=[0, max_val], mode='lines', | |
| name='No Change Line', line=dict(dash='dash', color='gray'))) | |
| fig.update_layout(xaxis_tickformat=',', yaxis_tickformat=',') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| # Growth rate ranking | |
| sorted_growth = prov_comparison.sort_values('voter_change_pct', ascending=True) | |
| fig = px.bar( | |
| sorted_growth, | |
| y='province_name', | |
| x='voter_change_pct', | |
| orientation='h', | |
| color='voter_change_pct', | |
| color_continuous_scale='RdYlGn', | |
| title="Growth Rate by Province (%)" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| def show_detailed_reports(old_province, new_province, old_district, new_district, | |
| old_municipality, new_municipality, old_overall, new_overall): | |
| """Generate detailed reports.""" | |
| st.header("📋 Detailed Reports & Export") | |
| report_type = st.selectbox( | |
| "Select Report Type", | |
| ["Executive Summary", "Province Report", "District Report", "Full Data Export"] | |
| ) | |
| if report_type == "Executive Summary": | |
| st.subheader("📑 Executive Summary Report") | |
| old_total = int(old_overall['total_voters'].iloc[0]) | |
| new_total = int(new_overall['total_voters'].iloc[0]) | |
| change = new_total - old_total | |
| change_pct = (change / old_total) * 100 | |
| st.markdown(f""" | |
| ## Nepal Election Voter Data Analysis Report | |
| ### Executive Summary | |
| **Report Date:** January 11, 2026 | |
| --- | |
| ### Key Findings | |
| 1. **Total Voter Base Growth** | |
| - Previous Election: **{old_total:,}** voters | |
| - Current Election: **{new_total:,}** voters | |
| - Net Increase: **{change:+,}** voters ({change_pct:+.2f}%) | |
| 2. **Gender Distribution** | |
| - Male Voters (Current): **{int(new_overall['male_voters'].iloc[0]):,}** | |
| - Female Voters (Current): **{int(new_overall['female_voters'].iloc[0]):,}** | |
| - Gender Ratio: **{(new_overall['male_voters'].iloc[0]/new_overall['female_voters'].iloc[0]):.2f}** M:F | |
| 3. **Age Demographics** | |
| - Average Voter Age: **{new_overall['avg_age'].iloc[0]:.1f}** years | |
| - Age Range: **{int(new_overall['min_age'].iloc[0])}** - **{int(new_overall['max_age'].iloc[0])}** years | |
| 4. **Administrative Coverage** | |
| - Provinces: **7** | |
| - Districts: **77** | |
| - Local Levels: **753** | |
| --- | |
| ### Provincial Highlights | |
| | Province | Previous Voters | Current Voters | Change | Growth % | | |
| |----------|-----------------|----------------|--------|----------| | |
| """) | |
| prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name') | |
| for _, row in prov_comparison.iterrows(): | |
| st.markdown(f"| {row['province_name']} | {int(row['total_voters_old']):,} | {int(row['total_voters_new']):,} | {int(row['voter_change']):+,} | {row['voter_change_pct']:+.2f}% |") | |
| st.markdown(""" | |
| --- | |
| ### Conclusions | |
| The voter registration data shows a healthy growth in the electoral base, with all provinces | |
| showing positive growth. This indicates successful voter registration drives and natural | |
| population growth reaching voting age. | |
| The gender balance has slightly improved with female participation showing steady growth. | |
| The average age of voters remains stable, suggesting a balanced demographic distribution. | |
| """) | |
| elif report_type == "Province Report": | |
| st.subheader("📊 Province-wise Detailed Report") | |
| prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name') | |
| st.dataframe( | |
| prov_comparison.style.format({ | |
| 'total_voters_old': '{:,.0f}', | |
| 'total_voters_new': '{:,.0f}', | |
| 'voter_change': '{:+,.0f}', | |
| 'voter_change_pct': '{:+.2f}%', | |
| 'male_voters_old': '{:,.0f}', | |
| 'male_voters_new': '{:,.0f}', | |
| 'female_voters_old': '{:,.0f}', | |
| 'female_voters_new': '{:,.0f}' | |
| }), | |
| use_container_width=True | |
| ) | |
| # Download button | |
| csv = prov_comparison.to_csv(index=False) | |
| st.download_button( | |
| label="📥 Download Province Report (CSV)", | |
| data=csv, | |
| file_name="province_voter_report.csv", | |
| mime="text/csv" | |
| ) | |
| elif report_type == "District Report": | |
| st.subheader("🏛️ District-wise Detailed Report") | |
| dist_comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name') | |
| st.dataframe( | |
| dist_comparison.style.format({ | |
| 'total_voters_old': '{:,.0f}', | |
| 'total_voters_new': '{:,.0f}', | |
| 'voter_change': '{:+,.0f}', | |
| 'voter_change_pct': '{:+.2f}%' | |
| }), | |
| use_container_width=True, | |
| height=500 | |
| ) | |
| csv = dist_comparison.to_csv(index=False) | |
| st.download_button( | |
| label="📥 Download District Report (CSV)", | |
| data=csv, | |
| file_name="district_voter_report.csv", | |
| mime="text/csv" | |
| ) | |
| elif report_type == "Full Data Export": | |
| st.subheader("📦 Full Data Export") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| prov_comparison = create_comparative_dataframe(old_province, new_province, 'province_id', 'province_name') | |
| st.download_button( | |
| label="📥 Province Data", | |
| data=prov_comparison.to_csv(index=False), | |
| file_name="province_data.csv", | |
| mime="text/csv" | |
| ) | |
| with col2: | |
| dist_comparison = create_comparative_dataframe(old_district, new_district, 'district_id', 'district_name') | |
| st.download_button( | |
| label="📥 District Data", | |
| data=dist_comparison.to_csv(index=False), | |
| file_name="district_data.csv", | |
| mime="text/csv" | |
| ) | |
| with col3: | |
| muni_comparison = create_comparative_dataframe(old_municipality, new_municipality, 'municipality_id', 'municipality_name') | |
| st.download_button( | |
| label="📥 Local Level Data", | |
| data=muni_comparison.to_csv(index=False), | |
| file_name="local_level_data.csv", | |
| mime="text/csv" | |
| ) | |
| if __name__ == "__main__": | |
| app_entry() | |