electionnepal / constituency_analysis.py
rockerritesh's picture
feat: add constituency winning threshold analysis page
fa2b444 unverified
"""
Constituency Threshold Analysis Page
=====================================
Displays winning threshold analysis for all 165 federal constituencies.
Shows voter counts, estimated turnout, and votes needed to win under
different race scenarios.
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import json
import os
from constituency_utils import CONSTITUENCY_MAPPING, PROVINCE_NAMES
def load_constituency_data():
"""Build constituency voter data from cached stats and constituency mapping."""
csv_path = "data/constituency_analysis_report.csv"
if os.path.exists(csv_path):
return pd.read_csv(csv_path)
# Build from source data
with open("data/cached_stats/new_stats.json") as f:
stats_new = json.load(f)
with open("data/admin/web_data/complete_administrative_structure.json") as f:
admin_structure = json.load(f)
# Map district IDs to constituency district names
district_id_to_const_district = {}
for prov_id_str, prov_data in admin_structure.items():
prov_id = int(prov_id_str)
if prov_id not in CONSTITUENCY_MAPPING:
continue
for dist_id_str, dist_data in prov_data.get('districts', {}).items():
dist_name_admin = dist_data['name_nepali']
for const_dist_name in CONSTITUENCY_MAPPING[prov_id]:
if (const_dist_name in dist_name_admin or
dist_name_admin in const_dist_name or
const_dist_name[:4] == dist_name_admin[:4]):
district_id_to_const_district[int(dist_id_str)] = {
'province_id': prov_id,
'const_district_name': const_dist_name
}
break
# Group municipalities by district
district_municipalities = {}
for m in stats_new['municipalities']:
did = m['district_id']
if did not in district_municipalities:
district_municipalities[did] = []
district_municipalities[did].append(m)
# Assign municipalities to constituencies
constituency_voter_data = {}
for dist_id, municipalities in district_municipalities.items():
if dist_id not in district_id_to_const_district:
continue
info = district_id_to_const_district[dist_id]
prov_id = info['province_id']
const_dist = info['const_district_name']
constituencies = CONSTITUENCY_MAPPING[prov_id][const_dist]
municipalities.sort(key=lambda x: x['municipality_id'])
n_const = len(constituencies)
if n_const == 1:
const_name = constituencies[0]
constituency_voter_data[const_name] = {
'total_voters': sum(m['total_voters'] for m in municipalities),
'male_voters': sum(m['male_voters'] for m in municipalities),
'female_voters': sum(m['female_voters'] for m in municipalities),
'n_municipalities': len(municipalities),
}
else:
chunk_size = len(municipalities) // n_const
remainder = len(municipalities) % n_const
idx = 0
for i, const_name in enumerate(constituencies):
take = chunk_size + (1 if i < remainder else 0)
chunk = municipalities[idx:idx+take]
idx += take
constituency_voter_data[const_name] = {
'total_voters': sum(m['total_voters'] for m in chunk),
'male_voters': sum(m['male_voters'] for m in chunk),
'female_voters': sum(m['female_voters'] for m in chunk),
'n_municipalities': len(chunk),
}
# Build DataFrame
records = []
for const_name, data in constituency_voter_data.items():
prov_id = None
dist_name = None
for pid, districts in CONSTITUENCY_MAPPING.items():
for dname, clist in districts.items():
if const_name in clist:
prov_id = pid
dist_name = dname
break
if prov_id:
break
records.append({
'constituency': const_name,
'province_name': PROVINCE_NAMES[prov_id]['np'] if prov_id else 'Unknown',
'district_name': dist_name,
'province_id': prov_id,
'total_voters': data['total_voters'],
'male_voters': data['male_voters'],
'female_voters': data['female_voters'],
'n_municipalities': data['n_municipalities'],
})
df = pd.DataFrame(records)
df['female_pct'] = (df['female_voters'] / df['total_voters'] * 100).round(1)
return df.sort_values('total_voters', ascending=False).reset_index(drop=True)
def show_constituency_analysis():
"""Main page for constituency threshold analysis."""
st.header("🎯 Constituency Winning Threshold Analysis")
st.markdown("Estimate votes needed to win each of Nepal's federal constituencies under different race scenarios.")
# Load data
df = load_constituency_data()
# ── Configurable Parameters ──
st.sidebar.markdown("---")
st.sidebar.subheader("βš™οΈ Threshold Parameters")
turnout_pct = st.sidebar.slider("Assumed Turnout %", 40, 80, 60, 5)
scenario_labels = {
25: "4-way race (25%)",
33: "3-way race (33%)",
40: "2-way race (40%)",
}
# ── Calculate thresholds ──
df['est_turnout'] = (df['total_voters'] * turnout_pct / 100).astype(int)
for pct in [25, 33, 40]:
df[f'threshold_{pct}'] = (df['total_voters'] * turnout_pct / 100 * pct / 100).astype(int)
df['rank'] = df['total_voters'].rank(ascending=False).astype(int)
# ── Top Metrics ──
col1, col2, col3, col4 = st.columns(4)
col1.metric("Constituencies", f"{len(df)}")
col2.metric("Total Voters", f"{df['total_voters'].sum():,}")
col3.metric("Avg Threshold (33%)", f"{df['threshold_33'].mean():,.0f}")
col4.metric("Turnout Assumption", f"{turnout_pct}%")
st.markdown("---")
# ── Province Filter ──
provinces = ["All Provinces"] + sorted(df['province_name'].unique().tolist())
selected_province = st.selectbox("Filter by Province", provinces)
if selected_province != "All Provinces":
df_filtered = df[df['province_name'] == selected_province].copy()
else:
df_filtered = df.copy()
# ── Tab Layout ──
tab1, tab2, tab3, tab4 = st.tabs([
"πŸ“Š Threshold Table",
"πŸ“ˆ Visual Analysis",
"πŸ† Rankings",
"πŸ—ΊοΈ Province Summary"
])
# ── TAB 1: Threshold Table ──
with tab1:
st.subheader("Constituency-wise Winning Thresholds")
st.caption(f"Assumed turnout: {turnout_pct}% | Showing {len(df_filtered)} constituencies")
display_df = df_filtered[[
'constituency', 'province_name', 'district_name',
'total_voters', 'male_voters', 'female_voters', 'female_pct',
'est_turnout', 'threshold_25', 'threshold_33', 'threshold_40'
]].rename(columns={
'constituency': 'Constituency',
'province_name': 'Province',
'district_name': 'District',
'total_voters': 'Total Voters',
'male_voters': 'Male',
'female_voters': 'Female',
'female_pct': 'Female %',
'est_turnout': 'Est. Turnout',
'threshold_25': 'Win @ 25%',
'threshold_33': 'Win @ 33%',
'threshold_40': 'Win @ 40%',
})
st.dataframe(
display_df.style.format({
'Total Voters': '{:,.0f}',
'Male': '{:,.0f}',
'Female': '{:,.0f}',
'Female %': '{:.1f}%',
'Est. Turnout': '{:,.0f}',
'Win @ 25%': '{:,.0f}',
'Win @ 33%': '{:,.0f}',
'Win @ 40%': '{:,.0f}',
}).background_gradient(subset=['Win @ 33%'], cmap='RdYlGn_r'),
use_container_width=True,
height=600,
)
# Download
csv = display_df.to_csv(index=False)
st.download_button("πŸ“₯ Download Threshold Data (CSV)", csv,
"constituency_thresholds.csv", "text/csv")
# ── TAB 2: Visual Analysis ──
with tab2:
st.subheader("Threshold Distribution")
# Bar chart - sorted by threshold
fig_bar = px.bar(
df_filtered.sort_values('threshold_33'),
x='constituency', y='threshold_33',
color='province_name',
title=f'Votes Needed to Win (33% share, {turnout_pct}% turnout)',
labels={'threshold_33': 'Votes Needed', 'constituency': 'Constituency',
'province_name': 'Province'},
height=500,
)
fig_bar.update_layout(xaxis_tickangle=90, xaxis_tickfont_size=7,
showlegend=True, legend_title_text='Province')
st.plotly_chart(fig_bar, use_container_width=True)
# Histogram
col1, col2 = st.columns(2)
with col1:
fig_hist = px.histogram(
df_filtered, x='total_voters', nbins=30, color='province_name',
title='Voter Count Distribution',
labels={'total_voters': 'Total Voters', 'count': 'Count'},
)
st.plotly_chart(fig_hist, use_container_width=True)
with col2:
fig_scatter = px.scatter(
df_filtered, x='total_voters', y='female_pct',
color='province_name', size='threshold_33',
hover_data=['constituency', 'district_name'],
title='Voters vs Female % (size = threshold)',
labels={'total_voters': 'Total Voters', 'female_pct': 'Female %'},
)
st.plotly_chart(fig_scatter, use_container_width=True)
# ── TAB 3: Rankings ──
with tab3:
col1, col2 = st.columns(2)
with col1:
st.subheader("🟒 Easiest to Win (lowest threshold)")
easy = df_filtered.nsmallest(10, 'threshold_33')[
['constituency', 'province_name', 'total_voters', 'threshold_33']
].reset_index(drop=True)
easy.index += 1
st.dataframe(easy.style.format({
'total_voters': '{:,.0f}', 'threshold_33': '{:,.0f}'
}), use_container_width=True)
with col2:
st.subheader("πŸ”΄ Hardest to Win (highest threshold)")
hard = df_filtered.nlargest(10, 'threshold_33')[
['constituency', 'province_name', 'total_voters', 'threshold_33']
].reset_index(drop=True)
hard.index += 1
st.dataframe(hard.style.format({
'total_voters': '{:,.0f}', 'threshold_33': '{:,.0f}'
}), use_container_width=True)
st.markdown("---")
st.subheader("πŸ‘© Highest Female Voter %")
female_top = df_filtered.nlargest(10, 'female_pct')[
['constituency', 'province_name', 'total_voters', 'female_pct']
].reset_index(drop=True)
female_top.index += 1
st.dataframe(female_top.style.format({
'total_voters': '{:,.0f}', 'female_pct': '{:.1f}%'
}), use_container_width=True)
# ── TAB 4: Province Summary ──
with tab4:
st.subheader("Province-level Threshold Summary")
prov_summary = df.groupby('province_name').agg(
constituencies=('constituency', 'count'),
total_voters=('total_voters', 'sum'),
avg_threshold=('threshold_33', 'mean'),
min_threshold=('threshold_33', 'min'),
max_threshold=('threshold_33', 'max'),
avg_female_pct=('female_pct', 'mean'),
).round(0).reset_index()
prov_summary = prov_summary.rename(columns={
'province_name': 'Province',
'constituencies': 'Constituencies',
'total_voters': 'Total Voters',
'avg_threshold': 'Avg Threshold (33%)',
'min_threshold': 'Min Threshold',
'max_threshold': 'Max Threshold',
'avg_female_pct': 'Avg Female %',
})
st.dataframe(
prov_summary.style.format({
'Total Voters': '{:,.0f}',
'Avg Threshold (33%)': '{:,.0f}',
'Min Threshold': '{:,.0f}',
'Max Threshold': '{:,.0f}',
'Avg Female %': '{:.1f}%',
}),
use_container_width=True,
)
# Province comparison bar chart
fig_prov = px.bar(
prov_summary, x='Province', y='Avg Threshold (33%)',
color='Province',
title='Average Winning Threshold by Province',
text='Constituencies',
)
fig_prov.update_traces(textposition='outside')
st.plotly_chart(fig_prov, use_container_width=True)