Format counts and add target population
Browse files- app.py +68 -15
- src/assumptions.py +6 -0
- src/model_pool.py +3 -0
app.py
CHANGED
|
@@ -8,6 +8,21 @@ from src.assumptions import BASELINE, DATA_QUALITY_NOTES
|
|
| 8 |
from src.model_pool import Criteria, estimate_pool, sensitivity_table
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
st.set_page_config(
|
| 12 |
page_title="Partner Pool Assumption Simulator",
|
| 13 |
page_icon="S7",
|
|
@@ -24,38 +39,68 @@ st.info(
|
|
| 24 |
|
| 25 |
with st.sidebar:
|
| 26 |
st.header("Scenario")
|
| 27 |
-
|
| 28 |
"Baseline population",
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
)
|
| 34 |
-
age_min, age_max = st.slider("Age range", 18, 70, (28, 42))
|
| 35 |
region_scope = st.selectbox(
|
| 36 |
"Region scope",
|
| 37 |
["all_ukraine", "large_cities", "kyiv_region", "western_regions"],
|
| 38 |
-
format_func=
|
|
|
|
| 39 |
)
|
| 40 |
relationship_status = st.selectbox(
|
| 41 |
"Relationship status",
|
| 42 |
["any", "not_married", "single_or_divorced"],
|
| 43 |
-
format_func=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
)
|
| 45 |
-
min_height = st.slider("Minimum height, cm", 150, 205, 175)
|
| 46 |
income_level = st.selectbox(
|
| 47 |
"Income threshold",
|
| 48 |
["any", "above_median", "top_25", "top_10"],
|
| 49 |
-
format_func=
|
|
|
|
| 50 |
)
|
| 51 |
education_level = st.selectbox(
|
| 52 |
"Education filter",
|
| 53 |
["any", "higher_education", "graduate_plus"],
|
| 54 |
-
format_func=
|
|
|
|
| 55 |
)
|
| 56 |
|
| 57 |
criteria = Criteria(
|
| 58 |
base_population=base_population,
|
|
|
|
| 59 |
age_min=age_min,
|
| 60 |
age_max=age_max,
|
| 61 |
region_scope=region_scope,
|
|
@@ -69,25 +114,33 @@ estimate = estimate_pool(criteria)
|
|
| 69 |
steps = sensitivity_table(criteria)
|
| 70 |
|
| 71 |
col_a, col_b, col_c = st.columns(3)
|
| 72 |
-
col_a.metric("Conservative estimate",
|
| 73 |
-
col_b.metric("Central estimate",
|
| 74 |
-
col_c.metric("Optimistic estimate",
|
| 75 |
|
| 76 |
st.subheader("What narrows the pool")
|
| 77 |
step_df = pd.DataFrame(steps)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
fig = px.bar(
|
| 79 |
step_df,
|
| 80 |
x="factor",
|
| 81 |
y="remaining",
|
| 82 |
text="remaining",
|
|
|
|
| 83 |
title="Remaining estimated pool after each criterion",
|
| 84 |
)
|
| 85 |
fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
|
|
|
|
|
|
|
|
|
|
| 86 |
fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="")
|
| 87 |
st.plotly_chart(fig, use_container_width=True)
|
| 88 |
|
| 89 |
st.subheader("Scenario details")
|
| 90 |
-
st.dataframe(
|
| 91 |
|
| 92 |
st.subheader("Data quality notes")
|
| 93 |
for note in DATA_QUALITY_NOTES:
|
|
|
|
| 8 |
from src.model_pool import Criteria, estimate_pool, sensitivity_table
|
| 9 |
|
| 10 |
|
| 11 |
+
def format_count(value: int | float) -> str:
|
| 12 |
+
return f"{value:,.0f}"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def parse_count(value: str, fallback: int) -> int:
|
| 16 |
+
cleaned = value.replace(",", "").replace(" ", "").strip()
|
| 17 |
+
if not cleaned:
|
| 18 |
+
return fallback
|
| 19 |
+
return int(cleaned)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def title_label(value: str) -> str:
|
| 23 |
+
return value.replace("_", " ").title()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
st.set_page_config(
|
| 27 |
page_title="Partner Pool Assumption Simulator",
|
| 28 |
page_icon="S7",
|
|
|
|
| 39 |
|
| 40 |
with st.sidebar:
|
| 41 |
st.header("Scenario")
|
| 42 |
+
base_population_text = st.text_input(
|
| 43 |
"Baseline population",
|
| 44 |
+
value=format_count(BASELINE.total_reference_population),
|
| 45 |
+
help=f"Reference population before filters, formatted with commas. Demo default: {format_count(BASELINE.total_reference_population)}.",
|
| 46 |
+
)
|
| 47 |
+
try:
|
| 48 |
+
base_population = parse_count(base_population_text, BASELINE.total_reference_population)
|
| 49 |
+
except ValueError:
|
| 50 |
+
st.warning("Use digits with optional commas, for example 10,000,000.")
|
| 51 |
+
base_population = BASELINE.total_reference_population
|
| 52 |
+
if not 10_000 <= base_population <= 50_000_000:
|
| 53 |
+
st.warning("Baseline population should stay between 10,000 and 50,000,000 for this demo.")
|
| 54 |
+
base_population = max(10_000, min(base_population, 50_000_000))
|
| 55 |
+
|
| 56 |
+
target_population = st.selectbox(
|
| 57 |
+
"Target population",
|
| 58 |
+
["all_adults", "women", "men"],
|
| 59 |
+
format_func=title_label,
|
| 60 |
+
help="Applies a demo sex-share coefficient before the other filters. Women: 53%, Men: 47%, All adults: 100%.",
|
| 61 |
+
)
|
| 62 |
+
age_min, age_max = st.slider(
|
| 63 |
+
"Age range",
|
| 64 |
+
18,
|
| 65 |
+
70,
|
| 66 |
+
(28, 42),
|
| 67 |
+
help="Narrows the pool by the selected age-band overlap.",
|
| 68 |
)
|
|
|
|
| 69 |
region_scope = st.selectbox(
|
| 70 |
"Region scope",
|
| 71 |
["all_ukraine", "large_cities", "kyiv_region", "western_regions"],
|
| 72 |
+
format_func=title_label,
|
| 73 |
+
help="Applies the selected regional scope coefficient.",
|
| 74 |
)
|
| 75 |
relationship_status = st.selectbox(
|
| 76 |
"Relationship status",
|
| 77 |
["any", "not_married", "single_or_divorced"],
|
| 78 |
+
format_func=title_label,
|
| 79 |
+
help="Demo availability proxy. Official marital status is not the same as real availability.",
|
| 80 |
+
)
|
| 81 |
+
min_height = st.slider(
|
| 82 |
+
"Minimum height, cm",
|
| 83 |
+
150,
|
| 84 |
+
205,
|
| 85 |
+
175,
|
| 86 |
+
help="Interpolates a demo height-distribution coefficient.",
|
| 87 |
)
|
|
|
|
| 88 |
income_level = st.selectbox(
|
| 89 |
"Income threshold",
|
| 90 |
["any", "above_median", "top_25", "top_10"],
|
| 91 |
+
format_func=title_label,
|
| 92 |
+
help="Applies an estimated income threshold coefficient.",
|
| 93 |
)
|
| 94 |
education_level = st.selectbox(
|
| 95 |
"Education filter",
|
| 96 |
["any", "higher_education", "graduate_plus"],
|
| 97 |
+
format_func=title_label,
|
| 98 |
+
help="Applies an estimated education-level coefficient.",
|
| 99 |
)
|
| 100 |
|
| 101 |
criteria = Criteria(
|
| 102 |
base_population=base_population,
|
| 103 |
+
target_population=target_population,
|
| 104 |
age_min=age_min,
|
| 105 |
age_max=age_max,
|
| 106 |
region_scope=region_scope,
|
|
|
|
| 114 |
steps = sensitivity_table(criteria)
|
| 115 |
|
| 116 |
col_a, col_b, col_c = st.columns(3)
|
| 117 |
+
col_a.metric("Conservative estimate", format_count(estimate.conservative))
|
| 118 |
+
col_b.metric("Central estimate", format_count(estimate.central))
|
| 119 |
+
col_c.metric("Optimistic estimate", format_count(estimate.optimistic))
|
| 120 |
|
| 121 |
st.subheader("What narrows the pool")
|
| 122 |
step_df = pd.DataFrame(steps)
|
| 123 |
+
display_df = step_df.assign(
|
| 124 |
+
coefficient=step_df["coefficient"].map("{:.4f}".format),
|
| 125 |
+
remaining=step_df["remaining"].map(format_count),
|
| 126 |
+
)
|
| 127 |
fig = px.bar(
|
| 128 |
step_df,
|
| 129 |
x="factor",
|
| 130 |
y="remaining",
|
| 131 |
text="remaining",
|
| 132 |
+
custom_data=["coefficient"],
|
| 133 |
title="Remaining estimated pool after each criterion",
|
| 134 |
)
|
| 135 |
fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
|
| 136 |
+
fig.update_traces(
|
| 137 |
+
hovertemplate="<b>%{x}</b><br>Remaining: %{y:,.0f}<br>Coefficient: %{customdata[0]:.4f}<extra></extra>"
|
| 138 |
+
)
|
| 139 |
fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="")
|
| 140 |
st.plotly_chart(fig, use_container_width=True)
|
| 141 |
|
| 142 |
st.subheader("Scenario details")
|
| 143 |
+
st.dataframe(display_df, use_container_width=True, hide_index=True)
|
| 144 |
|
| 145 |
st.subheader("Data quality notes")
|
| 146 |
for note in DATA_QUALITY_NOTES:
|
src/assumptions.py
CHANGED
|
@@ -27,6 +27,12 @@ REGION_FACTORS = {
|
|
| 27 |
"western_regions": 0.24,
|
| 28 |
}
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
RELATIONSHIP_STATUS_FACTORS = {
|
| 31 |
"any": 1.0,
|
| 32 |
"not_married": 0.46,
|
|
|
|
| 27 |
"western_regions": 0.24,
|
| 28 |
}
|
| 29 |
|
| 30 |
+
TARGET_POPULATION_FACTORS = {
|
| 31 |
+
"all_adults": 1.0,
|
| 32 |
+
"women": 0.53,
|
| 33 |
+
"men": 0.47,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
RELATIONSHIP_STATUS_FACTORS = {
|
| 37 |
"any": 1.0,
|
| 38 |
"not_married": 0.46,
|
src/model_pool.py
CHANGED
|
@@ -10,12 +10,14 @@ from .assumptions import (
|
|
| 10 |
INCOME_FACTORS,
|
| 11 |
REGION_FACTORS,
|
| 12 |
RELATIONSHIP_STATUS_FACTORS,
|
|
|
|
| 13 |
)
|
| 14 |
|
| 15 |
|
| 16 |
@dataclass(frozen=True)
|
| 17 |
class Criteria:
|
| 18 |
base_population: int
|
|
|
|
| 19 |
age_min: int
|
| 20 |
age_max: int
|
| 21 |
region_scope: str
|
|
@@ -69,6 +71,7 @@ def height_factor(min_height_cm: int) -> float:
|
|
| 69 |
|
| 70 |
def model_factors(criteria: Criteria) -> list[tuple[str, float]]:
|
| 71 |
return [
|
|
|
|
| 72 |
("Age range", age_factor(criteria.age_min, criteria.age_max)),
|
| 73 |
("Region scope", REGION_FACTORS[criteria.region_scope]),
|
| 74 |
("Relationship status", RELATIONSHIP_STATUS_FACTORS[criteria.relationship_status]),
|
|
|
|
| 10 |
INCOME_FACTORS,
|
| 11 |
REGION_FACTORS,
|
| 12 |
RELATIONSHIP_STATUS_FACTORS,
|
| 13 |
+
TARGET_POPULATION_FACTORS,
|
| 14 |
)
|
| 15 |
|
| 16 |
|
| 17 |
@dataclass(frozen=True)
|
| 18 |
class Criteria:
|
| 19 |
base_population: int
|
| 20 |
+
target_population: str
|
| 21 |
age_min: int
|
| 22 |
age_max: int
|
| 23 |
region_scope: str
|
|
|
|
| 71 |
|
| 72 |
def model_factors(criteria: Criteria) -> list[tuple[str, float]]:
|
| 73 |
return [
|
| 74 |
+
("Target population", TARGET_POPULATION_FACTORS[criteria.target_population]),
|
| 75 |
("Age range", age_factor(criteria.age_min, criteria.age_max)),
|
| 76 |
("Region scope", REGION_FACTORS[criteria.region_scope]),
|
| 77 |
("Relationship status", RELATIONSHIP_STATUS_FACTORS[criteria.relationship_status]),
|