TEZv commited on
Commit
ac4e07f
·
1 Parent(s): 6b77245

Format counts and add target population

Browse files
Files changed (3) hide show
  1. app.py +68 -15
  2. src/assumptions.py +6 -0
  3. src/model_pool.py +3 -0
app.py CHANGED
@@ -8,6 +8,21 @@ from src.assumptions import BASELINE, DATA_QUALITY_NOTES
8
  from src.model_pool import Criteria, estimate_pool, sensitivity_table
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  st.set_page_config(
12
  page_title="Partner Pool Assumption Simulator",
13
  page_icon="S7",
@@ -24,38 +39,68 @@ st.info(
24
 
25
  with st.sidebar:
26
  st.header("Scenario")
27
- base_population = st.number_input(
28
  "Baseline population",
29
- min_value=10_000,
30
- max_value=50_000_000,
31
- value=BASELINE.total_reference_population,
32
- step=50_000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  )
34
- age_min, age_max = st.slider("Age range", 18, 70, (28, 42))
35
  region_scope = st.selectbox(
36
  "Region scope",
37
  ["all_ukraine", "large_cities", "kyiv_region", "western_regions"],
38
- format_func=lambda value: value.replace("_", " ").title(),
 
39
  )
40
  relationship_status = st.selectbox(
41
  "Relationship status",
42
  ["any", "not_married", "single_or_divorced"],
43
- format_func=lambda value: value.replace("_", " ").title(),
 
 
 
 
 
 
 
 
44
  )
45
- min_height = st.slider("Minimum height, cm", 150, 205, 175)
46
  income_level = st.selectbox(
47
  "Income threshold",
48
  ["any", "above_median", "top_25", "top_10"],
49
- format_func=lambda value: value.replace("_", " ").title(),
 
50
  )
51
  education_level = st.selectbox(
52
  "Education filter",
53
  ["any", "higher_education", "graduate_plus"],
54
- format_func=lambda value: value.replace("_", " ").title(),
 
55
  )
56
 
57
  criteria = Criteria(
58
  base_population=base_population,
 
59
  age_min=age_min,
60
  age_max=age_max,
61
  region_scope=region_scope,
@@ -69,25 +114,33 @@ estimate = estimate_pool(criteria)
69
  steps = sensitivity_table(criteria)
70
 
71
  col_a, col_b, col_c = st.columns(3)
72
- col_a.metric("Conservative estimate", f"{estimate.conservative:,.0f}")
73
- col_b.metric("Central estimate", f"{estimate.central:,.0f}")
74
- col_c.metric("Optimistic estimate", f"{estimate.optimistic:,.0f}")
75
 
76
  st.subheader("What narrows the pool")
77
  step_df = pd.DataFrame(steps)
 
 
 
 
78
  fig = px.bar(
79
  step_df,
80
  x="factor",
81
  y="remaining",
82
  text="remaining",
 
83
  title="Remaining estimated pool after each criterion",
84
  )
85
  fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
 
 
 
86
  fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="")
87
  st.plotly_chart(fig, use_container_width=True)
88
 
89
  st.subheader("Scenario details")
90
- st.dataframe(step_df, use_container_width=True, hide_index=True)
91
 
92
  st.subheader("Data quality notes")
93
  for note in DATA_QUALITY_NOTES:
 
8
  from src.model_pool import Criteria, estimate_pool, sensitivity_table
9
 
10
 
11
+ def format_count(value: int | float) -> str:
12
+ return f"{value:,.0f}"
13
+
14
+
15
+ def parse_count(value: str, fallback: int) -> int:
16
+ cleaned = value.replace(",", "").replace(" ", "").strip()
17
+ if not cleaned:
18
+ return fallback
19
+ return int(cleaned)
20
+
21
+
22
+ def title_label(value: str) -> str:
23
+ return value.replace("_", " ").title()
24
+
25
+
26
  st.set_page_config(
27
  page_title="Partner Pool Assumption Simulator",
28
  page_icon="S7",
 
39
 
40
  with st.sidebar:
41
  st.header("Scenario")
42
+ base_population_text = st.text_input(
43
  "Baseline population",
44
+ value=format_count(BASELINE.total_reference_population),
45
+ help=f"Reference population before filters, formatted with commas. Demo default: {format_count(BASELINE.total_reference_population)}.",
46
+ )
47
+ try:
48
+ base_population = parse_count(base_population_text, BASELINE.total_reference_population)
49
+ except ValueError:
50
+ st.warning("Use digits with optional commas, for example 10,000,000.")
51
+ base_population = BASELINE.total_reference_population
52
+ if not 10_000 <= base_population <= 50_000_000:
53
+ st.warning("Baseline population should stay between 10,000 and 50,000,000 for this demo.")
54
+ base_population = max(10_000, min(base_population, 50_000_000))
55
+
56
+ target_population = st.selectbox(
57
+ "Target population",
58
+ ["all_adults", "women", "men"],
59
+ format_func=title_label,
60
+ help="Applies a demo sex-share coefficient before the other filters. Women: 53%, Men: 47%, All adults: 100%.",
61
+ )
62
+ age_min, age_max = st.slider(
63
+ "Age range",
64
+ 18,
65
+ 70,
66
+ (28, 42),
67
+ help="Narrows the pool by the selected age-band overlap.",
68
  )
 
69
  region_scope = st.selectbox(
70
  "Region scope",
71
  ["all_ukraine", "large_cities", "kyiv_region", "western_regions"],
72
+ format_func=title_label,
73
+ help="Applies the selected regional scope coefficient.",
74
  )
75
  relationship_status = st.selectbox(
76
  "Relationship status",
77
  ["any", "not_married", "single_or_divorced"],
78
+ format_func=title_label,
79
+ help="Demo availability proxy. Official marital status is not the same as real availability.",
80
+ )
81
+ min_height = st.slider(
82
+ "Minimum height, cm",
83
+ 150,
84
+ 205,
85
+ 175,
86
+ help="Interpolates a demo height-distribution coefficient.",
87
  )
 
88
  income_level = st.selectbox(
89
  "Income threshold",
90
  ["any", "above_median", "top_25", "top_10"],
91
+ format_func=title_label,
92
+ help="Applies an estimated income threshold coefficient.",
93
  )
94
  education_level = st.selectbox(
95
  "Education filter",
96
  ["any", "higher_education", "graduate_plus"],
97
+ format_func=title_label,
98
+ help="Applies an estimated education-level coefficient.",
99
  )
100
 
101
  criteria = Criteria(
102
  base_population=base_population,
103
+ target_population=target_population,
104
  age_min=age_min,
105
  age_max=age_max,
106
  region_scope=region_scope,
 
114
  steps = sensitivity_table(criteria)
115
 
116
  col_a, col_b, col_c = st.columns(3)
117
+ col_a.metric("Conservative estimate", format_count(estimate.conservative))
118
+ col_b.metric("Central estimate", format_count(estimate.central))
119
+ col_c.metric("Optimistic estimate", format_count(estimate.optimistic))
120
 
121
  st.subheader("What narrows the pool")
122
  step_df = pd.DataFrame(steps)
123
+ display_df = step_df.assign(
124
+ coefficient=step_df["coefficient"].map("{:.4f}".format),
125
+ remaining=step_df["remaining"].map(format_count),
126
+ )
127
  fig = px.bar(
128
  step_df,
129
  x="factor",
130
  y="remaining",
131
  text="remaining",
132
+ custom_data=["coefficient"],
133
  title="Remaining estimated pool after each criterion",
134
  )
135
  fig.update_traces(texttemplate="%{text:,.0f}", textposition="outside")
136
+ fig.update_traces(
137
+ hovertemplate="<b>%{x}</b><br>Remaining: %{y:,.0f}<br>Coefficient: %{customdata[0]:.4f}<extra></extra>"
138
+ )
139
  fig.update_layout(yaxis_title="Estimated remaining pool", xaxis_title="")
140
  st.plotly_chart(fig, use_container_width=True)
141
 
142
  st.subheader("Scenario details")
143
+ st.dataframe(display_df, use_container_width=True, hide_index=True)
144
 
145
  st.subheader("Data quality notes")
146
  for note in DATA_QUALITY_NOTES:
src/assumptions.py CHANGED
@@ -27,6 +27,12 @@ REGION_FACTORS = {
27
  "western_regions": 0.24,
28
  }
29
 
 
 
 
 
 
 
30
  RELATIONSHIP_STATUS_FACTORS = {
31
  "any": 1.0,
32
  "not_married": 0.46,
 
27
  "western_regions": 0.24,
28
  }
29
 
30
+ TARGET_POPULATION_FACTORS = {
31
+ "all_adults": 1.0,
32
+ "women": 0.53,
33
+ "men": 0.47,
34
+ }
35
+
36
  RELATIONSHIP_STATUS_FACTORS = {
37
  "any": 1.0,
38
  "not_married": 0.46,
src/model_pool.py CHANGED
@@ -10,12 +10,14 @@ from .assumptions import (
10
  INCOME_FACTORS,
11
  REGION_FACTORS,
12
  RELATIONSHIP_STATUS_FACTORS,
 
13
  )
14
 
15
 
16
  @dataclass(frozen=True)
17
  class Criteria:
18
  base_population: int
 
19
  age_min: int
20
  age_max: int
21
  region_scope: str
@@ -69,6 +71,7 @@ def height_factor(min_height_cm: int) -> float:
69
 
70
  def model_factors(criteria: Criteria) -> list[tuple[str, float]]:
71
  return [
 
72
  ("Age range", age_factor(criteria.age_min, criteria.age_max)),
73
  ("Region scope", REGION_FACTORS[criteria.region_scope]),
74
  ("Relationship status", RELATIONSHIP_STATUS_FACTORS[criteria.relationship_status]),
 
10
  INCOME_FACTORS,
11
  REGION_FACTORS,
12
  RELATIONSHIP_STATUS_FACTORS,
13
+ TARGET_POPULATION_FACTORS,
14
  )
15
 
16
 
17
  @dataclass(frozen=True)
18
  class Criteria:
19
  base_population: int
20
+ target_population: str
21
  age_min: int
22
  age_max: int
23
  region_scope: str
 
71
 
72
  def model_factors(criteria: Criteria) -> list[tuple[str, float]]:
73
  return [
74
+ ("Target population", TARGET_POPULATION_FACTORS[criteria.target_population]),
75
  ("Age range", age_factor(criteria.age_min, criteria.age_max)),
76
  ("Region scope", REGION_FACTORS[criteria.region_scope]),
77
  ("Relationship status", RELATIONSHIP_STATUS_FACTORS[criteria.relationship_status]),