Spaces:

rockerritesh
/

electionnepal

Paused

App Files Files Community

electionnepal / constituency_analysis.py

rockerritesh

feat: add constituency winning threshold analysis page

fa2b444 unverified about 1 month ago

raw

history blame contribute delete

13.4 kB

	"""
	Constituency Threshold Analysis Page
	=====================================
	Displays winning threshold analysis for all 165 federal constituencies.
	Shows voter counts, estimated turnout, and votes needed to win under
	different race scenarios.
	"""

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	import json
	import os

	from constituency_utils import CONSTITUENCY_MAPPING, PROVINCE_NAMES


	def load_constituency_data():
	"""Build constituency voter data from cached stats and constituency mapping."""
	csv_path = "data/constituency_analysis_report.csv"
	if os.path.exists(csv_path):
	return pd.read_csv(csv_path)

	# Build from source data
	with open("data/cached_stats/new_stats.json") as f:
	stats_new = json.load(f)
	with open("data/admin/web_data/complete_administrative_structure.json") as f:
	admin_structure = json.load(f)

	# Map district IDs to constituency district names
	district_id_to_const_district = {}
	for prov_id_str, prov_data in admin_structure.items():
	prov_id = int(prov_id_str)
	if prov_id not in CONSTITUENCY_MAPPING:
	continue
	for dist_id_str, dist_data in prov_data.get('districts', {}).items():
	dist_name_admin = dist_data['name_nepali']
	for const_dist_name in CONSTITUENCY_MAPPING[prov_id]:
	if (const_dist_name in dist_name_admin or
	dist_name_admin in const_dist_name or
	const_dist_name[:4] == dist_name_admin[:4]):
	district_id_to_const_district[int(dist_id_str)] = {
	'province_id': prov_id,
	'const_district_name': const_dist_name
	}
	break

	# Group municipalities by district
	district_municipalities = {}
	for m in stats_new['municipalities']:
	did = m['district_id']
	if did not in district_municipalities:
	district_municipalities[did] = []
	district_municipalities[did].append(m)

	# Assign municipalities to constituencies
	constituency_voter_data = {}
	for dist_id, municipalities in district_municipalities.items():
	if dist_id not in district_id_to_const_district:
	continue
	info = district_id_to_const_district[dist_id]
	prov_id = info['province_id']
	const_dist = info['const_district_name']
	constituencies = CONSTITUENCY_MAPPING[prov_id][const_dist]
	municipalities.sort(key=lambda x: x['municipality_id'])
	n_const = len(constituencies)

	if n_const == 1:
	const_name = constituencies[0]
	constituency_voter_data[const_name] = {
	'total_voters': sum(m['total_voters'] for m in municipalities),
	'male_voters': sum(m['male_voters'] for m in municipalities),
	'female_voters': sum(m['female_voters'] for m in municipalities),
	'n_municipalities': len(municipalities),
	}
	else:
	chunk_size = len(municipalities) // n_const
	remainder = len(municipalities) % n_const
	idx = 0
	for i, const_name in enumerate(constituencies):
	take = chunk_size + (1 if i < remainder else 0)
	chunk = municipalities[idx:idx+take]
	idx += take
	constituency_voter_data[const_name] = {
	'total_voters': sum(m['total_voters'] for m in chunk),
	'male_voters': sum(m['male_voters'] for m in chunk),
	'female_voters': sum(m['female_voters'] for m in chunk),
	'n_municipalities': len(chunk),
	}

	# Build DataFrame
	records = []
	for const_name, data in constituency_voter_data.items():
	prov_id = None
	dist_name = None
	for pid, districts in CONSTITUENCY_MAPPING.items():
	for dname, clist in districts.items():
	if const_name in clist:
	prov_id = pid
	dist_name = dname
	break
	if prov_id:
	break
	records.append({
	'constituency': const_name,
	'province_name': PROVINCE_NAMES[prov_id]['np'] if prov_id else 'Unknown',
	'district_name': dist_name,
	'province_id': prov_id,
	'total_voters': data['total_voters'],
	'male_voters': data['male_voters'],
	'female_voters': data['female_voters'],
	'n_municipalities': data['n_municipalities'],
	})

	df = pd.DataFrame(records)
	df['female_pct'] = (df['female_voters'] / df['total_voters'] * 100).round(1)
	return df.sort_values('total_voters', ascending=False).reset_index(drop=True)


	def show_constituency_analysis():
	"""Main page for constituency threshold analysis."""
	st.header("🎯 Constituency Winning Threshold Analysis")
	st.markdown("Estimate votes needed to win each of Nepal's federal constituencies under different race scenarios.")

	# Load data
	df = load_constituency_data()

	# ── Configurable Parameters ──
	st.sidebar.markdown("---")
	st.sidebar.subheader("⚙️ Threshold Parameters")
	turnout_pct = st.sidebar.slider("Assumed Turnout %", 40, 80, 60, 5)
	scenario_labels = {
	25: "4-way race (25%)",
	33: "3-way race (33%)",
	40: "2-way race (40%)",
	}

	# ── Calculate thresholds ──
	df['est_turnout'] = (df['total_voters'] * turnout_pct / 100).astype(int)
	for pct in [25, 33, 40]:
	df[f'threshold_{pct}'] = (df['total_voters'] * turnout_pct / 100 * pct / 100).astype(int)
	df['rank'] = df['total_voters'].rank(ascending=False).astype(int)

	# ── Top Metrics ──
	col1, col2, col3, col4 = st.columns(4)
	col1.metric("Constituencies", f"{len(df)}")
	col2.metric("Total Voters", f"{df['total_voters'].sum():,}")
	col3.metric("Avg Threshold (33%)", f"{df['threshold_33'].mean():,.0f}")
	col4.metric("Turnout Assumption", f"{turnout_pct}%")

	st.markdown("---")

	# ── Province Filter ──
	provinces = ["All Provinces"] + sorted(df['province_name'].unique().tolist())
	selected_province = st.selectbox("Filter by Province", provinces)
	if selected_province != "All Provinces":
	df_filtered = df[df['province_name'] == selected_province].copy()
	else:
	df_filtered = df.copy()

	# ── Tab Layout ──
	tab1, tab2, tab3, tab4 = st.tabs([
	"📊 Threshold Table",
	"📈 Visual Analysis",
	"🏆 Rankings",
	"🗺️ Province Summary"
	])

	# ── TAB 1: Threshold Table ──
	with tab1:
	st.subheader("Constituency-wise Winning Thresholds")
	st.caption(f"Assumed turnout: {turnout_pct}% \| Showing {len(df_filtered)} constituencies")

	display_df = df_filtered[[
	'constituency', 'province_name', 'district_name',
	'total_voters', 'male_voters', 'female_voters', 'female_pct',
	'est_turnout', 'threshold_25', 'threshold_33', 'threshold_40'
	]].rename(columns={
	'constituency': 'Constituency',
	'province_name': 'Province',
	'district_name': 'District',
	'total_voters': 'Total Voters',
	'male_voters': 'Male',
	'female_voters': 'Female',
	'female_pct': 'Female %',
	'est_turnout': 'Est. Turnout',
	'threshold_25': 'Win @ 25%',
	'threshold_33': 'Win @ 33%',
	'threshold_40': 'Win @ 40%',
	})

	st.dataframe(
	display_df.style.format({
	'Total Voters': '{:,.0f}',
	'Male': '{:,.0f}',
	'Female': '{:,.0f}',
	'Female %': '{:.1f}%',
	'Est. Turnout': '{:,.0f}',
	'Win @ 25%': '{:,.0f}',
	'Win @ 33%': '{:,.0f}',
	'Win @ 40%': '{:,.0f}',
	}).background_gradient(subset=['Win @ 33%'], cmap='RdYlGn_r'),
	use_container_width=True,
	height=600,
	)

	# Download
	csv = display_df.to_csv(index=False)
	st.download_button("📥 Download Threshold Data (CSV)", csv,
	"constituency_thresholds.csv", "text/csv")

	# ── TAB 2: Visual Analysis ──
	with tab2:
	st.subheader("Threshold Distribution")

	# Bar chart - sorted by threshold
	fig_bar = px.bar(
	df_filtered.sort_values('threshold_33'),
	x='constituency', y='threshold_33',
	color='province_name',
	title=f'Votes Needed to Win (33% share, {turnout_pct}% turnout)',
	labels={'threshold_33': 'Votes Needed', 'constituency': 'Constituency',
	'province_name': 'Province'},
	height=500,
	)
	fig_bar.update_layout(xaxis_tickangle=90, xaxis_tickfont_size=7,
	showlegend=True, legend_title_text='Province')
	st.plotly_chart(fig_bar, use_container_width=True)

	# Histogram
	col1, col2 = st.columns(2)
	with col1:
	fig_hist = px.histogram(
	df_filtered, x='total_voters', nbins=30, color='province_name',
	title='Voter Count Distribution',
	labels={'total_voters': 'Total Voters', 'count': 'Count'},
	)
	st.plotly_chart(fig_hist, use_container_width=True)

	with col2:
	fig_scatter = px.scatter(
	df_filtered, x='total_voters', y='female_pct',
	color='province_name', size='threshold_33',
	hover_data=['constituency', 'district_name'],
	title='Voters vs Female % (size = threshold)',
	labels={'total_voters': 'Total Voters', 'female_pct': 'Female %'},
	)
	st.plotly_chart(fig_scatter, use_container_width=True)

	# ── TAB 3: Rankings ──
	with tab3:
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("🟢 Easiest to Win (lowest threshold)")
	easy = df_filtered.nsmallest(10, 'threshold_33')[
	['constituency', 'province_name', 'total_voters', 'threshold_33']
	].reset_index(drop=True)
	easy.index += 1
	st.dataframe(easy.style.format({
	'total_voters': '{:,.0f}', 'threshold_33': '{:,.0f}'
	}), use_container_width=True)

	with col2:
	st.subheader("🔴 Hardest to Win (highest threshold)")
	hard = df_filtered.nlargest(10, 'threshold_33')[
	['constituency', 'province_name', 'total_voters', 'threshold_33']
	].reset_index(drop=True)
	hard.index += 1
	st.dataframe(hard.style.format({
	'total_voters': '{:,.0f}', 'threshold_33': '{:,.0f}'
	}), use_container_width=True)

	st.markdown("---")
	st.subheader("👩 Highest Female Voter %")
	female_top = df_filtered.nlargest(10, 'female_pct')[
	['constituency', 'province_name', 'total_voters', 'female_pct']
	].reset_index(drop=True)
	female_top.index += 1
	st.dataframe(female_top.style.format({
	'total_voters': '{:,.0f}', 'female_pct': '{:.1f}%'
	}), use_container_width=True)

	# ── TAB 4: Province Summary ──
	with tab4:
	st.subheader("Province-level Threshold Summary")

	prov_summary = df.groupby('province_name').agg(
	constituencies=('constituency', 'count'),
	total_voters=('total_voters', 'sum'),
	avg_threshold=('threshold_33', 'mean'),
	min_threshold=('threshold_33', 'min'),
	max_threshold=('threshold_33', 'max'),
	avg_female_pct=('female_pct', 'mean'),
	).round(0).reset_index()

	prov_summary = prov_summary.rename(columns={
	'province_name': 'Province',
	'constituencies': 'Constituencies',
	'total_voters': 'Total Voters',
	'avg_threshold': 'Avg Threshold (33%)',
	'min_threshold': 'Min Threshold',
	'max_threshold': 'Max Threshold',
	'avg_female_pct': 'Avg Female %',
	})

	st.dataframe(
	prov_summary.style.format({
	'Total Voters': '{:,.0f}',
	'Avg Threshold (33%)': '{:,.0f}',
	'Min Threshold': '{:,.0f}',
	'Max Threshold': '{:,.0f}',
	'Avg Female %': '{:.1f}%',
	}),
	use_container_width=True,
	)

	# Province comparison bar chart
	fig_prov = px.bar(
	prov_summary, x='Province', y='Avg Threshold (33%)',
	color='Province',
	title='Average Winning Threshold by Province',
	text='Constituencies',
	)
	fig_prov.update_traces(textposition='outside')
	st.plotly_chart(fig_prov, use_container_width=True)