Spaces:

K-RnD-Lab
/

Partner-Pool-Simulator_05-2026

Running

Partner-Pool-Simulator_05-2026

File size: 11,578 Bytes

3c0e2ec
 
 
 
 
 
ecf207c
 
 
 
363723c
ecf207c
 
 
3c0e2ec
 
 
ac4e07f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363723c
 
 
 
 
 
ecf207c
 
 
 
 
 
 
 
 
 
 
 
3c0e2ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177b9af
ecf207c
 
 
 
 
 
 
 
 
 
177b9af
 
ecf207c
 
 
363723c
ecf207c
177b9af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5317eb
 
177b9af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5317eb
 
177b9af
363723c
82c668e
363723c
e5317eb
363723c
ecf207c
82c668e
ecf207c
 
363723c
ecf207c
177b9af
82c668e
 
 
 
177b9af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c0e2ec
 
 
ac4e07f
3c0e2ec
 
 
 
 
82c668e
3c0e2ec
177b9af
 
 
 
 
 
 
 
 
3c0e2ec
 
 
 
ecf207c
3c0e2ec
ecf207c
ac4e07f
 
 
ecf207c
3c0e2ec
e5317eb
 
 
 
 
3c0e2ec
 
ac4e07f
 
 
ecf207c
ac4e07f
3c0e2ec
 
 
 
 
ecf207c
3c0e2ec
 
 
ac4e07f
ecf207c
 
 
 
 
 
ac4e07f
3c0e2ec
 
 
 
ac4e07f
3c0e2ec
ecf207c
 
 
 
 
 
 
82c668e
363723c
 
ecf207c
 
3c0e2ec
 
 
 
 
 
 
177b9af
 
3c0e2ec
ecf207c

from __future__ import annotations

import pandas as pd
import plotly.express as px
import streamlit as st

from src.assumptions import (
    BASELINE,
    BASELINE_REFERENCE_OPTIONS,
    DATA_QUALITY_NOTES,
    INCOME_THRESHOLD_OPTIONS_UAH,
    SALARY_ANCHORS_UAH,
    SOURCE_LINKS,
)
from src.model_pool import Criteria, estimate_pool, sensitivity_table


def format_count(value: int | float) -> str:
    return f"{value:,.0f}"


def parse_count(value: str, fallback: int) -> int:
    cleaned = value.replace(",", "").replace(" ", "").strip()
    if not cleaned:
        return fallback
    return int(cleaned)


def title_label(value: str) -> str:
    return value.replace("_", " ").title()


def income_threshold_label(value: int) -> str:
    if value == 0:
        return "Any income"
    return f"{format_count(value)} UAH"


def format_percent(value: int | float) -> str:
    if value == 0:
        return "0%"
    if value < 0.001:
        return "<0.001%"
    if value < 0.01:
        return f"{value:.4f}%"
    if value < 1:
        return f"{value:.3f}%"
    return f"{value:.2f}%"


st.set_page_config(
    page_title="Partner Pool Assumption Simulator",
    page_icon="S7",
    layout="wide",
)

st.title("Partner Pool Assumption Simulator")
st.caption("S7-K · Personal Relationship · transparent demo model")

st.info(
    "Prototype status: current numbers are demo assumptions. Use this app to test model logic, "
    "not to claim a factual count of available partners."
)

with st.sidebar:
    st.header("Scenario")
    with st.expander("Core demographics", expanded=True):
        baseline_preset = st.selectbox(
            "Baseline preset",
            list(BASELINE_REFERENCE_OPTIONS),
            format_func=lambda value: BASELINE_REFERENCE_OPTIONS[value]["label"],
            help=(
                "Baseline is the starting universe before filters. It is not automatically the whole country; "
                "choose a national reference or a narrower custom pool depending on the scenario."
            ),
        )
        preset = BASELINE_REFERENCE_OPTIONS[baseline_preset]
        base_population_text = st.text_input(
            "Baseline population",
            value=format_count(preset["value"]),
            help=(
                f"{preset['note']} Formatted with commas. "
                "Demo is a fixed synthetic example. Custom means you choose your own starting audience."
            ),
        )
        try:
            base_population = parse_count(base_population_text, BASELINE.total_reference_population)
        except ValueError:
            st.warning("Use digits with optional commas, for example 10,000,000.")
            base_population = BASELINE.total_reference_population
        if not 10_000 <= base_population <= 50_000_000:
            st.warning("Baseline population should stay between 10,000 and 50,000,000 for this demo.")
            base_population = max(10_000, min(base_population, 50_000_000))

        target_population = st.selectbox(
            "Target population",
            ["all_adults", "women", "men"],
            format_func=title_label,
            help="Applies a demo sex-share coefficient before the other filters. Women: 53%, Men: 47%, All adults: 100%.",
        )
        age_min, age_max = st.slider(
            "Age range",
            18,
            70,
            (18, 70),
            help="Narrows the pool by the selected age-band overlap. The full 18-70 range is treated as no age filter.",
        )
        region_scope = st.selectbox(
            "Region scope",
            ["all_ukraine", "large_cities", "kyiv_region", "western_regions"],
            format_func=title_label,
            help="Applies the selected regional scope coefficient.",
        )
        relationship_status = st.selectbox(
            "Relationship status",
            ["any", "not_married", "single_or_divorced"],
            format_func=title_label,
            help="Demo availability proxy. Official marital status is not the same as real availability.",
        )
        min_height = st.slider(
            "Minimum height, cm",
            150,
            205,
            150,
            help="Interpolates a demo height-distribution coefficient. 150 cm is treated as no height filter.",
        )
        income_min_uah = st.select_slider(
            "Minimum monthly income, UAH",
            options=INCOME_THRESHOLD_OPTIONS_UAH,
            value=0,
            format_func=income_threshold_label,
            help=(
                "Scenario salary threshold. 0 means no income filter. Salary anchors: Work.ua current benchmark is about "
                f"{format_count(SALARY_ANCHORS_UAH['workua_current_average'])} UAH/month; "
                f"KSE cites Work.ua January 2026 median at {format_count(SALARY_ANCHORS_UAH['kse_workua_jan_2026_median'])} UAH/month. "
                "High thresholds up to 1,000,000 UAH/month are scenario stress-test cutoffs, not official maximum salary data."
            ),
        )
        st.caption(
            "Selected income threshold: "
            f"{'Any income' if income_min_uah == 0 else format_count(income_min_uah) + ' UAH/month'}"
        )
        education_level = st.selectbox(
            "Education filter",
            ["any", "higher_education", "graduate_plus"],
            format_func=title_label,
            help="Applies an estimated education-level coefficient.",
        )

    with st.expander("Family context"):
        children_status = st.selectbox(
            "Children status",
            ["any", "no_children", "has_children", "co_parenting_ready"],
            format_func=title_label,
            help="Scenario preference around existing children. These are demo assumptions, not value judgments.",
        )
        future_children = st.selectbox(
            "Future children",
            ["any", "wants_children", "does_not_want_children", "open_or_undecided"],
            format_func=title_label,
            help="Scenario preference around future children.",
        )

    with st.expander("War and mobility"):
        military_status = st.selectbox(
            "Military status",
            ["any", "civilian_or_not_serving", "active_service", "veteran_or_service_history"],
            format_func=title_label,
            help="War-related scenario filter. Active service and veteran/service-history shares are placeholders until sourced.",
        )
        relocation = st.selectbox(
            "Relocation",
            ["any", "same_city_only", "open_to_relocation", "remote_or_long_distance_ok"],
            format_func=title_label,
            help="Mobility and distance preference filter.",
        )

    with st.expander("Lifestyle and compatibility"):
        housing = st.selectbox(
            "Housing",
            ["any", "independent_living", "own_or_stable_housing"],
            format_func=title_label,
            help="Scenario proxy for independent or stable living setup.",
        )
        smoking = st.selectbox(
            "Smoking",
            ["any", "non_smoker", "ok_with_smoking"],
            format_func=title_label,
            help="Lifestyle preference around smoking.",
        )
        alcohol = st.selectbox(
            "Alcohol",
            ["any", "rare_or_none", "moderate_ok"],
            format_func=title_label,
            help="Lifestyle preference around alcohol use.",
        )
        language = st.selectbox(
            "Language comfort",
            ["any", "ukrainian_comfortable", "english_comfortable", "ukrainian_and_english"],
            format_func=title_label,
            help="Communication comfort filter.",
        )
        pets = st.selectbox(
            "Pets",
            ["any", "pet_friendly", "no_pets_preferred"],
            format_func=title_label,
            help="Household compatibility preference around pets.",
        )

criteria = Criteria(
    base_population=base_population,
    target_population=target_population,
    age_min=age_min,
    age_max=age_max,
    region_scope=region_scope,
    relationship_status=relationship_status,
    min_height_cm=min_height,
    income_min_uah=income_min_uah,
    education_level=education_level,
    children_status=children_status,
    future_children=future_children,
    military_status=military_status,
    relocation=relocation,
    housing=housing,
    smoking=smoking,
    alcohol=alcohol,
    language=language,
    pets=pets,
)

estimate = estimate_pool(criteria)
steps = sensitivity_table(criteria)
central_percent = (estimate.central / criteria.base_population) * 100

col_a, col_b, col_c, col_d = st.columns(4)
col_a.metric("Conservative estimate", format_count(estimate.conservative))
col_b.metric("Central estimate", format_count(estimate.central))
col_c.metric("Optimistic estimate", format_count(estimate.optimistic))
col_d.metric("Central share", format_percent(central_percent))

if central_percent == 100:
    st.caption("Neutral defaults are active: the central estimate equals 100% of the selected baseline.")
else:
    st.caption("Central share is the central estimate divided by the selected baseline after all active filters.")

st.subheader("What narrows the pool")
step_df = pd.DataFrame(steps)
display_df = step_df.assign(
    coefficient=step_df["coefficient"].map("{:.4f}".format),
    remaining=step_df["remaining"].map(format_count),
    percent_of_baseline=step_df["percent_of_baseline"].map(format_percent),
)
fig = px.bar(
    step_df,
    x="factor",
    y="remaining",
    text="remaining",
    custom_data=["coefficient", "percent_of_baseline"],
    title="Remaining estimated pool after each criterion",
)
fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
fig.update_traces(
    hovertemplate=(
        "<b>%{x}</b><br>"
        "Remaining: %{y:,.0f}<br>"
        "Share of baseline: %{customdata[1]:.4f}%<br>"
        "Coefficient: %{customdata[0]:.4f}<extra></extra>"
    )
)
fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="")
st.plotly_chart(fig, use_container_width=True)

st.subheader("Scenario details")
st.dataframe(display_df, use_container_width=True, hide_index=True)

st.subheader("Baseline and salary anchors")
st.write(
    "Baseline population is the starting reference pool before filters. The default 10,000,000 is a demo working pool, "
    "not the population of Ukraine. For a national pre-invasion reference, use the SSSU January 2022 option "
    f"({format_count(BASELINE_REFERENCE_OPTIONS['sssu_jan_2022_total']['value'])})."
)
st.write(
    f"The income slider uses {format_count(SALARY_ANCHORS_UAH['workua_current_average'])} UAH/month as the current public job-market benchmark. "
    "High values such as 200,000, 500,000, or 1,000,000 UAH/month are supported as scenario stress-test cutoffs. "
    "They are not official salary percentiles or a claimed real maximum."
)

st.subheader("Data quality notes")
for note in DATA_QUALITY_NOTES:
    st.write(f"- **{note['label']}**: {note['note']}")

st.subheader("Interpretation guardrails")
st.write(
    "This model estimates a demographic scenario, not compatibility, attraction, safety, or relationship success. "
    "A stricter filter can make a pool smaller, but it does not define a person's real-life chances. "
    "War, children, housing, and lifestyle filters are sensitive context variables; treat them as transparent assumptions."
)

st.subheader("Sources")
for source in SOURCE_LINKS:
    st.markdown(f"- [{source['label']}]({source['url']}) — {source['note']}")