from __future__ import annotations import pandas as pd import plotly.express as px import streamlit as st from src.assumptions import BASELINE, DATA_QUALITY_NOTES from src.model_pool import Criteria, estimate_pool, sensitivity_table def format_count(value: int | float) -> str: return f"{value:,.0f}" def parse_count(value: str, fallback: int) -> int: cleaned = value.replace(",", "").replace(" ", "").strip() if not cleaned: return fallback return int(cleaned) def title_label(value: str) -> str: return value.replace("_", " ").title() st.set_page_config( page_title="Partner Pool Assumption Simulator", page_icon="S7", layout="wide", ) st.title("Partner Pool Assumption Simulator") st.caption("S7-K · Personal Relationship · transparent demo model") st.info( "Prototype status: current numbers are demo assumptions. Use this app to test model logic, " "not to claim a factual count of available partners." ) with st.sidebar: st.header("Scenario") base_population_text = st.text_input( "Baseline population", value=format_count(BASELINE.total_reference_population), help=f"Reference population before filters, formatted with commas. Demo default: {format_count(BASELINE.total_reference_population)}.", ) try: base_population = parse_count(base_population_text, BASELINE.total_reference_population) except ValueError: st.warning("Use digits with optional commas, for example 10,000,000.") base_population = BASELINE.total_reference_population if not 10_000 <= base_population <= 50_000_000: st.warning("Baseline population should stay between 10,000 and 50,000,000 for this demo.") base_population = max(10_000, min(base_population, 50_000_000)) target_population = st.selectbox( "Target population", ["all_adults", "women", "men"], format_func=title_label, help="Applies a demo sex-share coefficient before the other filters. Women: 53%, Men: 47%, All adults: 100%.", ) age_min, age_max = st.slider( "Age range", 18, 70, (28, 42), help="Narrows the pool by the selected age-band overlap.", ) region_scope = st.selectbox( "Region scope", ["all_ukraine", "large_cities", "kyiv_region", "western_regions"], format_func=title_label, help="Applies the selected regional scope coefficient.", ) relationship_status = st.selectbox( "Relationship status", ["any", "not_married", "single_or_divorced"], format_func=title_label, help="Demo availability proxy. Official marital status is not the same as real availability.", ) min_height = st.slider( "Minimum height, cm", 150, 205, 175, help="Interpolates a demo height-distribution coefficient.", ) income_level = st.selectbox( "Income threshold", ["any", "above_median", "top_25", "top_10"], format_func=title_label, help="Applies an estimated income threshold coefficient.", ) education_level = st.selectbox( "Education filter", ["any", "higher_education", "graduate_plus"], format_func=title_label, help="Applies an estimated education-level coefficient.", ) criteria = Criteria( base_population=base_population, target_population=target_population, age_min=age_min, age_max=age_max, region_scope=region_scope, relationship_status=relationship_status, min_height_cm=min_height, income_level=income_level, education_level=education_level, ) estimate = estimate_pool(criteria) steps = sensitivity_table(criteria) col_a, col_b, col_c = st.columns(3) col_a.metric("Conservative estimate", format_count(estimate.conservative)) col_b.metric("Central estimate", format_count(estimate.central)) col_c.metric("Optimistic estimate", format_count(estimate.optimistic)) st.subheader("What narrows the pool") step_df = pd.DataFrame(steps) display_df = step_df.assign( coefficient=step_df["coefficient"].map("{:.4f}".format), remaining=step_df["remaining"].map(format_count), ) fig = px.bar( step_df, x="factor", y="remaining", text="remaining", custom_data=["coefficient"], title="Remaining estimated pool after each criterion", ) fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside") fig.update_traces( hovertemplate="%{x}
Remaining: %{y:,.0f}
Coefficient: %{customdata[0]:.4f}" ) fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="") st.plotly_chart(fig, use_container_width=True) st.subheader("Scenario details") st.dataframe(display_df, use_container_width=True, hide_index=True) st.subheader("Data quality notes") for note in DATA_QUALITY_NOTES: st.write(f"- **{note['label']}**: {note['note']}") st.subheader("Interpretation guardrails") st.write( "This model estimates a demographic scenario, not compatibility, attraction, safety, or relationship success. " "A stricter filter can make a pool smaller, but it does not define a person's real-life chances." )