Spaces:
Paused
Paused
| """ | |
| Constituency Threshold Analysis Page | |
| ===================================== | |
| Displays winning threshold analysis for all 165 federal constituencies. | |
| Shows voter counts, estimated turnout, and votes needed to win under | |
| different race scenarios. | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import json | |
| import os | |
| from constituency_utils import CONSTITUENCY_MAPPING, PROVINCE_NAMES | |
| def load_constituency_data(): | |
| """Build constituency voter data from cached stats and constituency mapping.""" | |
| csv_path = "data/constituency_analysis_report.csv" | |
| if os.path.exists(csv_path): | |
| return pd.read_csv(csv_path) | |
| # Build from source data | |
| with open("data/cached_stats/new_stats.json") as f: | |
| stats_new = json.load(f) | |
| with open("data/admin/web_data/complete_administrative_structure.json") as f: | |
| admin_structure = json.load(f) | |
| # Map district IDs to constituency district names | |
| district_id_to_const_district = {} | |
| for prov_id_str, prov_data in admin_structure.items(): | |
| prov_id = int(prov_id_str) | |
| if prov_id not in CONSTITUENCY_MAPPING: | |
| continue | |
| for dist_id_str, dist_data in prov_data.get('districts', {}).items(): | |
| dist_name_admin = dist_data['name_nepali'] | |
| for const_dist_name in CONSTITUENCY_MAPPING[prov_id]: | |
| if (const_dist_name in dist_name_admin or | |
| dist_name_admin in const_dist_name or | |
| const_dist_name[:4] == dist_name_admin[:4]): | |
| district_id_to_const_district[int(dist_id_str)] = { | |
| 'province_id': prov_id, | |
| 'const_district_name': const_dist_name | |
| } | |
| break | |
| # Group municipalities by district | |
| district_municipalities = {} | |
| for m in stats_new['municipalities']: | |
| did = m['district_id'] | |
| if did not in district_municipalities: | |
| district_municipalities[did] = [] | |
| district_municipalities[did].append(m) | |
| # Assign municipalities to constituencies | |
| constituency_voter_data = {} | |
| for dist_id, municipalities in district_municipalities.items(): | |
| if dist_id not in district_id_to_const_district: | |
| continue | |
| info = district_id_to_const_district[dist_id] | |
| prov_id = info['province_id'] | |
| const_dist = info['const_district_name'] | |
| constituencies = CONSTITUENCY_MAPPING[prov_id][const_dist] | |
| municipalities.sort(key=lambda x: x['municipality_id']) | |
| n_const = len(constituencies) | |
| if n_const == 1: | |
| const_name = constituencies[0] | |
| constituency_voter_data[const_name] = { | |
| 'total_voters': sum(m['total_voters'] for m in municipalities), | |
| 'male_voters': sum(m['male_voters'] for m in municipalities), | |
| 'female_voters': sum(m['female_voters'] for m in municipalities), | |
| 'n_municipalities': len(municipalities), | |
| } | |
| else: | |
| chunk_size = len(municipalities) // n_const | |
| remainder = len(municipalities) % n_const | |
| idx = 0 | |
| for i, const_name in enumerate(constituencies): | |
| take = chunk_size + (1 if i < remainder else 0) | |
| chunk = municipalities[idx:idx+take] | |
| idx += take | |
| constituency_voter_data[const_name] = { | |
| 'total_voters': sum(m['total_voters'] for m in chunk), | |
| 'male_voters': sum(m['male_voters'] for m in chunk), | |
| 'female_voters': sum(m['female_voters'] for m in chunk), | |
| 'n_municipalities': len(chunk), | |
| } | |
| # Build DataFrame | |
| records = [] | |
| for const_name, data in constituency_voter_data.items(): | |
| prov_id = None | |
| dist_name = None | |
| for pid, districts in CONSTITUENCY_MAPPING.items(): | |
| for dname, clist in districts.items(): | |
| if const_name in clist: | |
| prov_id = pid | |
| dist_name = dname | |
| break | |
| if prov_id: | |
| break | |
| records.append({ | |
| 'constituency': const_name, | |
| 'province_name': PROVINCE_NAMES[prov_id]['np'] if prov_id else 'Unknown', | |
| 'district_name': dist_name, | |
| 'province_id': prov_id, | |
| 'total_voters': data['total_voters'], | |
| 'male_voters': data['male_voters'], | |
| 'female_voters': data['female_voters'], | |
| 'n_municipalities': data['n_municipalities'], | |
| }) | |
| df = pd.DataFrame(records) | |
| df['female_pct'] = (df['female_voters'] / df['total_voters'] * 100).round(1) | |
| return df.sort_values('total_voters', ascending=False).reset_index(drop=True) | |
| def show_constituency_analysis(): | |
| """Main page for constituency threshold analysis.""" | |
| st.header("π― Constituency Winning Threshold Analysis") | |
| st.markdown("Estimate votes needed to win each of Nepal's federal constituencies under different race scenarios.") | |
| # Load data | |
| df = load_constituency_data() | |
| # ββ Configurable Parameters ββ | |
| st.sidebar.markdown("---") | |
| st.sidebar.subheader("βοΈ Threshold Parameters") | |
| turnout_pct = st.sidebar.slider("Assumed Turnout %", 40, 80, 60, 5) | |
| scenario_labels = { | |
| 25: "4-way race (25%)", | |
| 33: "3-way race (33%)", | |
| 40: "2-way race (40%)", | |
| } | |
| # ββ Calculate thresholds ββ | |
| df['est_turnout'] = (df['total_voters'] * turnout_pct / 100).astype(int) | |
| for pct in [25, 33, 40]: | |
| df[f'threshold_{pct}'] = (df['total_voters'] * turnout_pct / 100 * pct / 100).astype(int) | |
| df['rank'] = df['total_voters'].rank(ascending=False).astype(int) | |
| # ββ Top Metrics ββ | |
| col1, col2, col3, col4 = st.columns(4) | |
| col1.metric("Constituencies", f"{len(df)}") | |
| col2.metric("Total Voters", f"{df['total_voters'].sum():,}") | |
| col3.metric("Avg Threshold (33%)", f"{df['threshold_33'].mean():,.0f}") | |
| col4.metric("Turnout Assumption", f"{turnout_pct}%") | |
| st.markdown("---") | |
| # ββ Province Filter ββ | |
| provinces = ["All Provinces"] + sorted(df['province_name'].unique().tolist()) | |
| selected_province = st.selectbox("Filter by Province", provinces) | |
| if selected_province != "All Provinces": | |
| df_filtered = df[df['province_name'] == selected_province].copy() | |
| else: | |
| df_filtered = df.copy() | |
| # ββ Tab Layout ββ | |
| tab1, tab2, tab3, tab4 = st.tabs([ | |
| "π Threshold Table", | |
| "π Visual Analysis", | |
| "π Rankings", | |
| "πΊοΈ Province Summary" | |
| ]) | |
| # ββ TAB 1: Threshold Table ββ | |
| with tab1: | |
| st.subheader("Constituency-wise Winning Thresholds") | |
| st.caption(f"Assumed turnout: {turnout_pct}% | Showing {len(df_filtered)} constituencies") | |
| display_df = df_filtered[[ | |
| 'constituency', 'province_name', 'district_name', | |
| 'total_voters', 'male_voters', 'female_voters', 'female_pct', | |
| 'est_turnout', 'threshold_25', 'threshold_33', 'threshold_40' | |
| ]].rename(columns={ | |
| 'constituency': 'Constituency', | |
| 'province_name': 'Province', | |
| 'district_name': 'District', | |
| 'total_voters': 'Total Voters', | |
| 'male_voters': 'Male', | |
| 'female_voters': 'Female', | |
| 'female_pct': 'Female %', | |
| 'est_turnout': 'Est. Turnout', | |
| 'threshold_25': 'Win @ 25%', | |
| 'threshold_33': 'Win @ 33%', | |
| 'threshold_40': 'Win @ 40%', | |
| }) | |
| st.dataframe( | |
| display_df.style.format({ | |
| 'Total Voters': '{:,.0f}', | |
| 'Male': '{:,.0f}', | |
| 'Female': '{:,.0f}', | |
| 'Female %': '{:.1f}%', | |
| 'Est. Turnout': '{:,.0f}', | |
| 'Win @ 25%': '{:,.0f}', | |
| 'Win @ 33%': '{:,.0f}', | |
| 'Win @ 40%': '{:,.0f}', | |
| }).background_gradient(subset=['Win @ 33%'], cmap='RdYlGn_r'), | |
| use_container_width=True, | |
| height=600, | |
| ) | |
| # Download | |
| csv = display_df.to_csv(index=False) | |
| st.download_button("π₯ Download Threshold Data (CSV)", csv, | |
| "constituency_thresholds.csv", "text/csv") | |
| # ββ TAB 2: Visual Analysis ββ | |
| with tab2: | |
| st.subheader("Threshold Distribution") | |
| # Bar chart - sorted by threshold | |
| fig_bar = px.bar( | |
| df_filtered.sort_values('threshold_33'), | |
| x='constituency', y='threshold_33', | |
| color='province_name', | |
| title=f'Votes Needed to Win (33% share, {turnout_pct}% turnout)', | |
| labels={'threshold_33': 'Votes Needed', 'constituency': 'Constituency', | |
| 'province_name': 'Province'}, | |
| height=500, | |
| ) | |
| fig_bar.update_layout(xaxis_tickangle=90, xaxis_tickfont_size=7, | |
| showlegend=True, legend_title_text='Province') | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| # Histogram | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| fig_hist = px.histogram( | |
| df_filtered, x='total_voters', nbins=30, color='province_name', | |
| title='Voter Count Distribution', | |
| labels={'total_voters': 'Total Voters', 'count': 'Count'}, | |
| ) | |
| st.plotly_chart(fig_hist, use_container_width=True) | |
| with col2: | |
| fig_scatter = px.scatter( | |
| df_filtered, x='total_voters', y='female_pct', | |
| color='province_name', size='threshold_33', | |
| hover_data=['constituency', 'district_name'], | |
| title='Voters vs Female % (size = threshold)', | |
| labels={'total_voters': 'Total Voters', 'female_pct': 'Female %'}, | |
| ) | |
| st.plotly_chart(fig_scatter, use_container_width=True) | |
| # ββ TAB 3: Rankings ββ | |
| with tab3: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("π’ Easiest to Win (lowest threshold)") | |
| easy = df_filtered.nsmallest(10, 'threshold_33')[ | |
| ['constituency', 'province_name', 'total_voters', 'threshold_33'] | |
| ].reset_index(drop=True) | |
| easy.index += 1 | |
| st.dataframe(easy.style.format({ | |
| 'total_voters': '{:,.0f}', 'threshold_33': '{:,.0f}' | |
| }), use_container_width=True) | |
| with col2: | |
| st.subheader("π΄ Hardest to Win (highest threshold)") | |
| hard = df_filtered.nlargest(10, 'threshold_33')[ | |
| ['constituency', 'province_name', 'total_voters', 'threshold_33'] | |
| ].reset_index(drop=True) | |
| hard.index += 1 | |
| st.dataframe(hard.style.format({ | |
| 'total_voters': '{:,.0f}', 'threshold_33': '{:,.0f}' | |
| }), use_container_width=True) | |
| st.markdown("---") | |
| st.subheader("π© Highest Female Voter %") | |
| female_top = df_filtered.nlargest(10, 'female_pct')[ | |
| ['constituency', 'province_name', 'total_voters', 'female_pct'] | |
| ].reset_index(drop=True) | |
| female_top.index += 1 | |
| st.dataframe(female_top.style.format({ | |
| 'total_voters': '{:,.0f}', 'female_pct': '{:.1f}%' | |
| }), use_container_width=True) | |
| # ββ TAB 4: Province Summary ββ | |
| with tab4: | |
| st.subheader("Province-level Threshold Summary") | |
| prov_summary = df.groupby('province_name').agg( | |
| constituencies=('constituency', 'count'), | |
| total_voters=('total_voters', 'sum'), | |
| avg_threshold=('threshold_33', 'mean'), | |
| min_threshold=('threshold_33', 'min'), | |
| max_threshold=('threshold_33', 'max'), | |
| avg_female_pct=('female_pct', 'mean'), | |
| ).round(0).reset_index() | |
| prov_summary = prov_summary.rename(columns={ | |
| 'province_name': 'Province', | |
| 'constituencies': 'Constituencies', | |
| 'total_voters': 'Total Voters', | |
| 'avg_threshold': 'Avg Threshold (33%)', | |
| 'min_threshold': 'Min Threshold', | |
| 'max_threshold': 'Max Threshold', | |
| 'avg_female_pct': 'Avg Female %', | |
| }) | |
| st.dataframe( | |
| prov_summary.style.format({ | |
| 'Total Voters': '{:,.0f}', | |
| 'Avg Threshold (33%)': '{:,.0f}', | |
| 'Min Threshold': '{:,.0f}', | |
| 'Max Threshold': '{:,.0f}', | |
| 'Avg Female %': '{:.1f}%', | |
| }), | |
| use_container_width=True, | |
| ) | |
| # Province comparison bar chart | |
| fig_prov = px.bar( | |
| prov_summary, x='Province', y='Avg Threshold (33%)', | |
| color='Province', | |
| title='Average Winning Threshold by Province', | |
| text='Constituencies', | |
| ) | |
| fig_prov.update_traces(textposition='outside') | |
| st.plotly_chart(fig_prov, use_container_width=True) | |