Spaces:

K-RnD-Lab
/

Partner-Pool-Simulator_05-2026

Running

App Files Files Community

TEZv commited on 2 days ago

Commit

ac4e07f

1 Parent(s): 6b77245

Format counts and add target population

Browse files

Files changed (3) hide show

app.py +68 -15
src/assumptions.py +6 -0
src/model_pool.py +3 -0

app.py CHANGED Viewed

@@ -8,6 +8,21 @@ from src.assumptions import BASELINE, DATA_QUALITY_NOTES
 from src.model_pool import Criteria, estimate_pool, sensitivity_table
 st.set_page_config(
     page_title="Partner Pool Assumption Simulator",
     page_icon="S7",
@@ -24,38 +39,68 @@ st.info(
 with st.sidebar:
     st.header("Scenario")
-    base_population = st.number_input(
         "Baseline population",
-        min_value=10_000,
-        max_value=50_000_000,
-        value=BASELINE.total_reference_population,
-        step=50_000,
     )
-    age_min, age_max = st.slider("Age range", 18, 70, (28, 42))
     region_scope = st.selectbox(
         "Region scope",
         ["all_ukraine", "large_cities", "kyiv_region", "western_regions"],
-        format_func=lambda value: value.replace("_", " ").title(),
     )
     relationship_status = st.selectbox(
         "Relationship status",
         ["any", "not_married", "single_or_divorced"],
-        format_func=lambda value: value.replace("_", " ").title(),
     )
-    min_height = st.slider("Minimum height, cm", 150, 205, 175)
     income_level = st.selectbox(
         "Income threshold",
         ["any", "above_median", "top_25", "top_10"],
-        format_func=lambda value: value.replace("_", " ").title(),
     )
     education_level = st.selectbox(
         "Education filter",
         ["any", "higher_education", "graduate_plus"],
-        format_func=lambda value: value.replace("_", " ").title(),
     )
 criteria = Criteria(
     base_population=base_population,
     age_min=age_min,
     age_max=age_max,
     region_scope=region_scope,
@@ -69,25 +114,33 @@ estimate = estimate_pool(criteria)
 steps = sensitivity_table(criteria)
 col_a, col_b, col_c = st.columns(3)
-col_a.metric("Conservative estimate", f"{estimate.conservative:,.0f}")
-col_b.metric("Central estimate", f"{estimate.central:,.0f}")
-col_c.metric("Optimistic estimate", f"{estimate.optimistic:,.0f}")
 st.subheader("What narrows the pool")
 step_df = pd.DataFrame(steps)
 fig = px.bar(
     step_df,
     x="factor",
     y="remaining",
     text="remaining",
     title="Remaining estimated pool after each criterion",
 )
 fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
 fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="")
 st.plotly_chart(fig, use_container_width=True)
 st.subheader("Scenario details")
-st.dataframe(step_df, use_container_width=True, hide_index=True)
 st.subheader("Data quality notes")
 for note in DATA_QUALITY_NOTES:

 from src.model_pool import Criteria, estimate_pool, sensitivity_table
+def format_count(value: int | float) -> str:
+    return f"{value:,.0f}"
+def parse_count(value: str, fallback: int) -> int:
+    cleaned = value.replace(",", "").replace(" ", "").strip()
+    if not cleaned:
+        return fallback
+    return int(cleaned)
+def title_label(value: str) -> str:
+    return value.replace("_", " ").title()
 st.set_page_config(
     page_title="Partner Pool Assumption Simulator",
     page_icon="S7",
 with st.sidebar:
     st.header("Scenario")
+    base_population_text = st.text_input(
         "Baseline population",
+        value=format_count(BASELINE.total_reference_population),
+        help=f"Reference population before filters, formatted with commas. Demo default: {format_count(BASELINE.total_reference_population)}.",
+    )
+    try:
+        base_population = parse_count(base_population_text, BASELINE.total_reference_population)
+    except ValueError:
+        st.warning("Use digits with optional commas, for example 10,000,000.")
+        base_population = BASELINE.total_reference_population
+    if not 10_000 <= base_population <= 50_000_000:
+        st.warning("Baseline population should stay between 10,000 and 50,000,000 for this demo.")
+        base_population = max(10_000, min(base_population, 50_000_000))
+    target_population = st.selectbox(
+        "Target population",
+        ["all_adults", "women", "men"],
+        format_func=title_label,
+        help="Applies a demo sex-share coefficient before the other filters. Women: 53%, Men: 47%, All adults: 100%.",
+    )
+    age_min, age_max = st.slider(
+        "Age range",
+        18,
+        70,
+        (28, 42),
+        help="Narrows the pool by the selected age-band overlap.",
     )
     region_scope = st.selectbox(
         "Region scope",
         ["all_ukraine", "large_cities", "kyiv_region", "western_regions"],
+        format_func=title_label,
+        help="Applies the selected regional scope coefficient.",
     )
     relationship_status = st.selectbox(
         "Relationship status",
         ["any", "not_married", "single_or_divorced"],
+        format_func=title_label,
+        help="Demo availability proxy. Official marital status is not the same as real availability.",
+    )
+    min_height = st.slider(
+        "Minimum height, cm",
+        150,
+        205,
+        175,
+        help="Interpolates a demo height-distribution coefficient.",
     )
     income_level = st.selectbox(
         "Income threshold",
         ["any", "above_median", "top_25", "top_10"],
+        format_func=title_label,
+        help="Applies an estimated income threshold coefficient.",
     )
     education_level = st.selectbox(
         "Education filter",
         ["any", "higher_education", "graduate_plus"],
+        format_func=title_label,
+        help="Applies an estimated education-level coefficient.",
     )
 criteria = Criteria(
     base_population=base_population,
+    target_population=target_population,
     age_min=age_min,
     age_max=age_max,
     region_scope=region_scope,
 steps = sensitivity_table(criteria)
 col_a, col_b, col_c = st.columns(3)
+col_a.metric("Conservative estimate", format_count(estimate.conservative))
+col_b.metric("Central estimate", format_count(estimate.central))
+col_c.metric("Optimistic estimate", format_count(estimate.optimistic))
 st.subheader("What narrows the pool")
 step_df = pd.DataFrame(steps)
+display_df = step_df.assign(
+    coefficient=step_df["coefficient"].map("{:.4f}".format),
+    remaining=step_df["remaining"].map(format_count),
+)
 fig = px.bar(
     step_df,
     x="factor",
     y="remaining",
     text="remaining",
+    custom_data=["coefficient"],
     title="Remaining estimated pool after each criterion",
 )
 fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
+fig.update_traces(
+    hovertemplate="<b>%{x}</b><br>Remaining: %{y:,.0f}<br>Coefficient: %{customdata[0]:.4f}<extra></extra>"
+)
 fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="")
 st.plotly_chart(fig, use_container_width=True)
 st.subheader("Scenario details")
+st.dataframe(display_df, use_container_width=True, hide_index=True)
 st.subheader("Data quality notes")
 for note in DATA_QUALITY_NOTES:

src/assumptions.py CHANGED Viewed

@@ -27,6 +27,12 @@ REGION_FACTORS = {
     "western_regions": 0.24,
 }
 RELATIONSHIP_STATUS_FACTORS = {
     "any": 1.0,
     "not_married": 0.46,

     "western_regions": 0.24,
 }
+TARGET_POPULATION_FACTORS = {
+    "all_adults": 1.0,
+    "women": 0.53,
+    "men": 0.47,
+}
 RELATIONSHIP_STATUS_FACTORS = {
     "any": 1.0,
     "not_married": 0.46,

src/model_pool.py CHANGED Viewed

@@ -10,12 +10,14 @@ from .assumptions import (
     INCOME_FACTORS,
     REGION_FACTORS,
     RELATIONSHIP_STATUS_FACTORS,
 )
 @dataclass(frozen=True)
 class Criteria:
     base_population: int
     age_min: int
     age_max: int
     region_scope: str
@@ -69,6 +71,7 @@ def height_factor(min_height_cm: int) -> float:
 def model_factors(criteria: Criteria) -> list[tuple[str, float]]:
     return [
         ("Age range", age_factor(criteria.age_min, criteria.age_max)),
         ("Region scope", REGION_FACTORS[criteria.region_scope]),
         ("Relationship status", RELATIONSHIP_STATUS_FACTORS[criteria.relationship_status]),

     INCOME_FACTORS,
     REGION_FACTORS,
     RELATIONSHIP_STATUS_FACTORS,
+    TARGET_POPULATION_FACTORS,
 )
 @dataclass(frozen=True)
 class Criteria:
     base_population: int
+    target_population: str
     age_min: int
     age_max: int
     region_scope: str
 def model_factors(criteria: Criteria) -> list[tuple[str, float]]:
     return [
+        ("Target population", TARGET_POPULATION_FACTORS[criteria.target_population]),
         ("Age range", age_factor(criteria.age_min, criteria.age_max)),
         ("Region scope", REGION_FACTORS[criteria.region_scope]),
         ("Relationship status", RELATIONSHIP_STATUS_FACTORS[criteria.relationship_status]),