PranavSharma commited on
Commit
5d5de4c
·
verified ·
1 Parent(s): dc96c2e

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ *.egg-info/
7
+ dist/
8
+ build/
9
+ .venv/
10
+ venv/
11
+ .env
12
+
13
+ # Streamlit
14
+ .streamlit/
15
+
16
+ # Data (keep repo clean)
17
+ data/**/*.csv
18
+ data/**/*.xlsx
19
+ data/**/*.xls
20
+ data/**/*.parquet
21
+ data/**/*.json
22
+
23
+ # OS
24
+ .DS_Store
25
+ Thumbs.db
app.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import streamlit as st
4
+ import plotly.graph_objects as go
5
+ from pathlib import Path
6
+
7
+ from pricing_engine.data import generate_synthetic_sku
8
+ from pricing_engine.data_uci import make_sku_week_panel, eligible_skus
9
+ from pricing_engine.core import (
10
+ estimate_loglog_elasticity,
11
+ bootstrap_optimal_price,
12
+ robust_optimal_price,
13
+ profit_distribution_at_price,
14
+ )
15
+
16
+ # ============================
17
+ # Config
18
+ # ============================
19
+ RAW_UCI_PATH = Path("data/Online Retail.xlsx")
20
+ PANEL_PARQUET_PATH = Path("data/processed/uci_sku_week.parquet")
21
+
22
+ DEFAULT_BOOT_SEED = 42
23
+ DEFAULT_N_BOOT = 300
24
+
25
+ DEFAULT_MAX_MOVE_FRAC = 0.10 # ±10% around median price
26
+ DEFAULT_LEVERAGE_TH = 0.05 # <5% movement => HOLD
27
+
28
+ # Absolute downside risk cap as a fraction of baseline median profit (prevents ratio blow-ups)
29
+ DEFAULT_ABS_DOWNSIDE_CAP_FRAC = 0.25 # 25% of baseline median profit (tune via advanced)
30
+
31
+ # ============================
32
+ # Data utilities
33
+ # ============================
34
+ @st.cache_data(show_spinner=False)
35
+ def load_uci_panel() -> pd.DataFrame:
36
+ if PANEL_PARQUET_PATH.exists():
37
+ return pd.read_parquet(PANEL_PARQUET_PATH)
38
+
39
+ if not RAW_UCI_PATH.exists():
40
+ raise FileNotFoundError(
41
+ f"Missing both parquet and raw file.\n"
42
+ f"Expected parquet: {PANEL_PARQUET_PATH}\n"
43
+ f"Fallback raw: {RAW_UCI_PATH}"
44
+ )
45
+
46
+ df_raw = pd.read_excel(RAW_UCI_PATH)
47
+ panel = make_sku_week_panel(df_raw)
48
+
49
+ PANEL_PARQUET_PATH.parent.mkdir(parents=True, exist_ok=True)
50
+ panel.to_parquet(PANEL_PARQUET_PATH, index=False)
51
+ return panel
52
+
53
+
54
+ def standardize_engine_df(df: pd.DataFrame) -> pd.DataFrame:
55
+ if "price" not in df.columns:
56
+ raise KeyError("df must contain 'price'.")
57
+ if "qty" in df.columns:
58
+ q_col = "qty"
59
+ elif "demand" in df.columns:
60
+ q_col = "demand"
61
+ else:
62
+ raise KeyError("df must contain 'qty' or 'demand'.")
63
+ return df[["price", q_col]].copy().rename(columns={q_col: "qty"})
64
+
65
+
66
+ def _week_col(panel: pd.DataFrame) -> str:
67
+ if "week" in panel.columns:
68
+ return "week"
69
+ if "Week" in panel.columns:
70
+ return "Week"
71
+ raise KeyError("panel must contain 'week' or 'Week'.")
72
+
73
+ # ============================
74
+ # Decision engine
75
+ # ============================
76
+ def compute_bundle(
77
+ df_engine: pd.DataFrame,
78
+ cost: float,
79
+ n_boot: int,
80
+ risk_lambda: float,
81
+ downside_q: float,
82
+ seed: int = DEFAULT_BOOT_SEED,
83
+ n_grid: int = 250,
84
+ max_move_frac: float = DEFAULT_MAX_MOVE_FRAC,
85
+ leverage_threshold: float = DEFAULT_LEVERAGE_TH,
86
+ # relative cap
87
+ max_downside_frac: float = 0.05,
88
+ # absolute cap (fraction of baseline median profit)
89
+ abs_downside_cap_frac: float = DEFAULT_ABS_DOWNSIDE_CAP_FRAC,
90
+ ) -> dict:
91
+ # Point estimate exists but will not be shown (bootstrap summary will be shown instead)
92
+ _a_hat, b_hat = estimate_loglog_elasticity(df_engine)
93
+
94
+ boot = bootstrap_optimal_price(df_engine, cost=float(cost), n_boot=int(n_boot), seed=int(seed))
95
+ params = boot[["intercept", "elasticity"]]
96
+
97
+ # price grid around median within allowed move window and observed min/max
98
+ p_center = float(df_engine["price"].median())
99
+ p_min = max(float(df_engine["price"].min()), p_center * (1 - max_move_frac))
100
+ p_max = min(float(df_engine["price"].max()), p_center * (1 + max_move_frac))
101
+ p_grid = np.linspace(p_min, p_max, int(n_grid)).astype(float)
102
+
103
+ def solve(lmbda: float) -> dict:
104
+ sol = robust_optimal_price(
105
+ boot_params=params,
106
+ cost=float(cost),
107
+ price_grid=p_grid,
108
+ risk_lambda=float(lmbda),
109
+ downside_quantile=float(downside_q),
110
+ )
111
+ price = float(sol["price"])
112
+ stats = profit_distribution_at_price(params, float(cost), price, q=float(downside_q))
113
+ return {"price": price, "stats": stats}
114
+
115
+ naive = solve(0.0)
116
+ rob = naive if risk_lambda == 0.0 else solve(risk_lambda)
117
+
118
+ # profit bands across grid (for proof plot)
119
+ A = np.exp(params["intercept"].values)
120
+ beta = params["elasticity"].values
121
+ profit_mat = (p_grid[None, :] - float(cost)) * (A[:, None] * (p_grid[None, :] ** beta[:, None]))
122
+ med_profit = np.median(profit_mat, axis=0)
123
+ q_low = np.quantile(profit_mat, float(downside_q), axis=0)
124
+ q_high = np.quantile(profit_mat, 1.0 - float(downside_q), axis=0)
125
+
126
+ # Feasibility: existence of at least one price with positive median AND positive downside
127
+ feasible_mask = (med_profit > 0) & (q_low > 0)
128
+ any_feasible = bool(np.any(feasible_mask))
129
+
130
+ # Leverage: how much median profit moves across the grid (scale-safe)
131
+ profit_range = float(np.max(med_profit) - np.min(med_profit))
132
+ profit_scale = float(max(np.max(np.abs(med_profit)), 1e-9))
133
+ leverage_frac = profit_range / profit_scale
134
+
135
+ # Baseline = status quo (median observed price)
136
+ baseline_price = p_center
137
+ baseline_stats = profit_distribution_at_price(params, float(cost), float(baseline_price), q=float(downside_q))
138
+ baseline_med = float(baseline_stats["median_profit"])
139
+
140
+ # Downside definitions: relative + absolute
141
+ downside_risk = float(rob["stats"]["downside_risk"])
142
+ # Relative downside normalized by baseline (not recommended) to avoid blow-ups near 0
143
+ downside_fraction = float(downside_risk / max(abs(baseline_med), 1e-9))
144
+ # Absolute downside cap tied to baseline median profit magnitude (>=0)
145
+ abs_downside_cap = float(abs_downside_cap_frac * max(abs(baseline_med), 0.0))
146
+
147
+ # Decide
148
+ if not any_feasible:
149
+ decision = "NO-GO"
150
+ rec_price, rec_stats = rob["price"], rob["stats"]
151
+ rationale = "No feasible price yields positive median and positive downside profit (q-down) under uncertainty. Do not deploy."
152
+ tone = "error"
153
+
154
+ elif risk_lambda == 0.0:
155
+ decision = "OPTIMIZE"
156
+ rec_price, rec_stats = naive["price"], naive["stats"]
157
+ rationale = "Risk-neutral optimization. Deploy profit-maximizing price."
158
+ tone = "success"
159
+
160
+ elif leverage_frac < leverage_threshold:
161
+ decision = "HOLD"
162
+ rec_price, rec_stats = baseline_price, baseline_stats
163
+ rationale = "Profit is positive but not sensitive to price. Deploy baseline (status quo)."
164
+ tone = "warning"
165
+
166
+ elif (downside_fraction > max_downside_frac) or (downside_risk > abs_downside_cap):
167
+ decision = "HOLD"
168
+ rec_price, rec_stats = baseline_price, baseline_stats
169
+ rationale = "Downside variability is too high for a price change (relative/absolute cap). Deploy baseline (status quo)."
170
+ tone = "warning"
171
+
172
+ else:
173
+ decision = "OPTIMIZE"
174
+ rec_price, rec_stats = rob["price"], rob["stats"]
175
+ rationale = "Price materially affects outcomes with acceptable downside risk. Deploy robust price."
176
+ tone = "success"
177
+
178
+ rec_med = float(rec_stats["median_profit"])
179
+ naive_med = float(naive["stats"]["median_profit"])
180
+ med_delta_pct = 100.0 * (rec_med - naive_med) / max(abs(naive_med), 1e-9)
181
+
182
+ # Bootstrap elasticity summary (use this in UI)
183
+ beta_med = float(np.median(beta))
184
+ beta_p10, beta_p90 = (float(x) for x in np.quantile(beta, [0.10, 0.90]))
185
+
186
+ return {
187
+ "b_hat": float(b_hat), # kept for reference; UI should prefer bootstrap summary
188
+ "beta_boot": {"median": beta_med, "p10": beta_p10, "p90": beta_p90},
189
+ "grid": p_grid,
190
+ "bands": {"median": med_profit, "q_low": q_low, "q_high": q_high},
191
+ "rob": rob,
192
+ "naive": naive,
193
+ "baseline": {"price": float(baseline_price), "stats": baseline_stats},
194
+ "recommended": {"price": float(rec_price), "stats": rec_stats},
195
+ "kpis": {
196
+ "decision": decision,
197
+ "tone": tone,
198
+ "rationale": rationale,
199
+ "downside_risk": float(downside_risk),
200
+ "downside_fraction": float(downside_fraction),
201
+ "max_downside_frac": float(max_downside_frac),
202
+ "abs_downside_cap": float(abs_downside_cap),
203
+ "abs_downside_cap_frac": float(abs_downside_cap_frac),
204
+ "leverage_frac": float(leverage_frac),
205
+ "median_profit": float(rec_stats["median_profit"]),
206
+ "q_down_profit": float(rec_stats["q_down_profit"]),
207
+ "median_delta_pct": float(med_delta_pct),
208
+ },
209
+ }
210
+
211
+
212
+ def render_plot(bundle: dict, downside_q: float) -> go.Figure:
213
+ p = bundle["grid"]
214
+ b = bundle["bands"]
215
+ rec = bundle["recommended"]["price"]
216
+ naive = bundle["naive"]["price"]
217
+
218
+ fig = go.Figure()
219
+ fig.add_trace(go.Scatter(x=p, y=b["median"], mode="lines", name="Median profit"))
220
+ fig.add_trace(go.Scatter(x=p, y=b["q_low"], mode="lines", name=f"Downside (q{int(downside_q*100)})"))
221
+ fig.add_trace(go.Scatter(x=p, y=b["q_high"], mode="lines", name=f"Upside (q{int((1-downside_q)*100)})"))
222
+
223
+ fig.add_vline(x=rec, line=dict(color="green", width=3))
224
+ fig.add_annotation(
225
+ x=rec,
226
+ y=float(np.max(b["median"])),
227
+ text=f"Deploy €{rec:.2f}",
228
+ showarrow=True,
229
+ arrowhead=2,
230
+ ax=40,
231
+ ay=-40,
232
+ font=dict(color="green"),
233
+ )
234
+
235
+ fig.add_vline(x=naive, line=dict(color="gray", width=2, dash="dash"))
236
+ fig.add_annotation(
237
+ x=naive,
238
+ y=float(np.min(b["median"])),
239
+ text=f"Naïve-opt €{naive:.2f}",
240
+ showarrow=True,
241
+ arrowhead=2,
242
+ ax=-40,
243
+ ay=40,
244
+ font=dict(color="gray"),
245
+ )
246
+
247
+ fig.update_layout(
248
+ xaxis_title="Price",
249
+ yaxis_title="Profit",
250
+ hovermode="x unified",
251
+ margin=dict(l=10, r=10, t=10, b=10),
252
+ )
253
+ return fig
254
+
255
+
256
+ # ============================
257
+ # UI
258
+ # ============================
259
+ st.set_page_config(page_title="Pricing Decision — Robust Price", layout="wide")
260
+ st.title("Pricing Decision")
261
+ st.caption("One output: deploy price. One proof: profit vs price under uncertainty.")
262
+
263
+ # Attempt to load UCI panel (optional)
264
+ panel_uci = None
265
+ try:
266
+ panel_uci = load_uci_panel()
267
+ except Exception:
268
+ panel_uci = None
269
+
270
+ # If parquet isn't available, hide UCI mode entirely (public-repo safe)
271
+ st.sidebar.header("1) Choose data")
272
+ dataset_choices = ["Synthetic"] if panel_uci is None else ["Synthetic", "UCI Online Retail"]
273
+ dataset_mode = st.sidebar.radio("Dataset", dataset_choices, index=0)
274
+
275
+ st.sidebar.header("2) Economics")
276
+ if dataset_mode == "Synthetic":
277
+ cost = st.sidebar.slider("Unit cost", 0.0, 20.0, 4.0, 0.1)
278
+ else:
279
+ cost_frac = st.sidebar.slider("Cost as % of current median price", 0.30, 0.80, 0.50, 0.05)
280
+
281
+ st.sidebar.header("3) Risk appetite")
282
+ risk_profile = st.sidebar.radio("Risk appetite", ["Risk-neutral", "Risk-averse"], index=0)
283
+
284
+ profile_map = {
285
+ "Risk-neutral": {"lambda": 0.0, "q": 0.20, "max_downside_frac": 0.20, "abs_cap_frac": 0.50},
286
+ "Risk-averse": {"lambda": 1.5, "q": 0.05, "max_downside_frac": 0.03, "abs_cap_frac": 0.20},
287
+ }
288
+ risk_lambda = float(profile_map[risk_profile]["lambda"])
289
+ downside_q = float(profile_map[risk_profile]["q"])
290
+ max_downside_frac = float(profile_map[risk_profile]["max_downside_frac"])
291
+ abs_cap_frac = float(profile_map[risk_profile]["abs_cap_frac"])
292
+
293
+ show_advanced = st.sidebar.checkbox("Show advanced controls", value=False)
294
+ if show_advanced:
295
+ n_boot = st.sidebar.slider("Bootstrap draws", 100, 800, DEFAULT_N_BOOT, 50)
296
+ max_move_frac = st.sidebar.slider("Allowed price move (±)", 0.05, 0.25, DEFAULT_MAX_MOVE_FRAC, 0.01)
297
+ leverage_th = st.sidebar.slider("Leverage threshold", 0.01, 0.15, DEFAULT_LEVERAGE_TH, 0.01)
298
+ abs_cap_frac = st.sidebar.slider("Abs downside cap (% of baseline median)", 0.05, 1.00, abs_cap_frac, 0.05)
299
+ else:
300
+ n_boot = DEFAULT_N_BOOT
301
+ max_move_frac = DEFAULT_MAX_MOVE_FRAC
302
+ leverage_th = DEFAULT_LEVERAGE_TH
303
+
304
+ # Build df_engine
305
+ if dataset_mode == "Synthetic":
306
+ st.sidebar.header("4) Synthetic (optional)")
307
+ seed = int(st.sidebar.number_input("Seed", min_value=0, value=42, step=1))
308
+ noise_std = st.sidebar.slider("Demand noise", 0.01, 0.40, 0.15, 0.01)
309
+ true_elasticity = st.sidebar.slider("True elasticity", -3.0, -0.2, -1.5, 0.1)
310
+
311
+ df_raw = generate_synthetic_sku(elasticity=true_elasticity, noise_std=noise_std, seed=seed)
312
+ df_engine = standardize_engine_df(df_raw)
313
+ source_label = "Synthetic"
314
+ time_unit = "per period (synthetic)"
315
+ else:
316
+ st.sidebar.header("4) Pick SKU")
317
+ min_weeks = st.sidebar.slider("Min weeks", 10, 52, 26, 1)
318
+ min_price_points = st.sidebar.slider("Min distinct prices", 3, 20, 8, 1)
319
+ min_total_qty = st.sidebar.slider("Min total qty", 50, 2000, 200, 50)
320
+
321
+ skus = eligible_skus(panel_uci, min_weeks=min_weeks, min_price_points=min_price_points, min_total_qty=min_total_qty)
322
+ if not skus:
323
+ st.error("No SKUs pass governance thresholds. Lower thresholds or use Synthetic.")
324
+ st.stop()
325
+
326
+ sku_selected = st.sidebar.selectbox("SKU (StockCode)", skus, index=0)
327
+ wk = _week_col(panel_uci)
328
+ sku_panel = panel_uci.loc[panel_uci["StockCode"] == sku_selected].sort_values(wk)
329
+
330
+ df_engine = standardize_engine_df(sku_panel[["avg_price", "qty"]].rename(columns={"avg_price": "price"}))
331
+ median_price = float(df_engine["price"].median())
332
+ cost = float(cost_frac * median_price)
333
+
334
+ seed = DEFAULT_BOOT_SEED
335
+ source_label = f"UCI — {sku_selected}"
336
+ time_unit = "per week"
337
+
338
+ # ============================
339
+ # Risk sensitivity check (compute both profiles every run)
340
+ # ============================
341
+ bundle_rn = compute_bundle(
342
+ df_engine=df_engine,
343
+ cost=float(cost),
344
+ n_boot=int(n_boot),
345
+ risk_lambda=float(profile_map["Risk-neutral"]["lambda"]),
346
+ downside_q=float(profile_map["Risk-neutral"]["q"]),
347
+ seed=int(seed),
348
+ max_move_frac=float(max_move_frac),
349
+ leverage_threshold=float(leverage_th),
350
+ max_downside_frac=float(profile_map["Risk-neutral"]["max_downside_frac"]),
351
+ abs_downside_cap_frac=float(profile_map["Risk-neutral"]["abs_cap_frac"]),
352
+ )
353
+
354
+ bundle_ra = compute_bundle(
355
+ df_engine=df_engine,
356
+ cost=float(cost),
357
+ n_boot=int(n_boot),
358
+ risk_lambda=float(profile_map["Risk-averse"]["lambda"]),
359
+ downside_q=float(profile_map["Risk-averse"]["q"]),
360
+ seed=int(seed),
361
+ max_move_frac=float(max_move_frac),
362
+ leverage_threshold=float(leverage_th),
363
+ max_downside_frac=float(profile_map["Risk-averse"]["max_downside_frac"]),
364
+ abs_downside_cap_frac=float(profile_map["Risk-averse"]["abs_cap_frac"]),
365
+ )
366
+
367
+ PRICE_EPS = 1e-6
368
+ same_price = abs(bundle_rn["recommended"]["price"] - bundle_ra["recommended"]["price"]) <= PRICE_EPS
369
+ same_decision = bundle_rn["kpis"]["decision"] == bundle_ra["kpis"]["decision"]
370
+ risk_sensitive = (not same_price) or (not same_decision)
371
+
372
+ bundle = bundle_rn if risk_profile == "Risk-neutral" else bundle_ra
373
+
374
+ k = bundle["kpis"]
375
+ rec_price = bundle["recommended"]["price"]
376
+ naive_price = bundle["naive"]["price"]
377
+
378
+ # Risk message
379
+ if risk_sensitive:
380
+ st.warning(
381
+ f"Risk appetite changes the recommendation: "
382
+ f"Risk-neutral {bundle_rn['kpis']['decision']} @ €{bundle_rn['recommended']['price']:.2f} | "
383
+ f"Risk-averse {bundle_ra['kpis']['decision']} @ €{bundle_ra['recommended']['price']:.2f}"
384
+ )
385
+ else:
386
+ st.info("Risk appetite does not change the recommendation for this dataset/SKU.")
387
+
388
+ # Time-unit clarification (pre-empts “model units” criticism)
389
+ st.caption(f"Source: {source_label} | Risk profile: {risk_profile} | Profit shown: **{time_unit}**")
390
+
391
+ # Decision banner
392
+ left, right = st.columns([2, 1], vertical_alignment="center")
393
+ with left:
394
+ st.markdown(f"### Deploy **€{rec_price:.2f}**")
395
+ st.write(k["rationale"])
396
+ with right:
397
+ st.metric("Decision", k["decision"])
398
+
399
+ # KPI tiles
400
+ beta_boot = bundle["beta_boot"]
401
+ c1, c2, c3, c4 = st.columns(4)
402
+ c1.metric("Elasticity (bootstrap median)", f"{beta_boot['median']:.3f}")
403
+ c2.metric("Median profit", f"{k['median_profit']:.2f}")
404
+ c3.metric(f"Downside profit (q{int(downside_q*100)})", f"{k['q_down_profit']:.2f}")
405
+ c4.metric("Median vs Naïve-opt", f"{k['median_delta_pct']:+.1f}%")
406
+
407
+ st.caption(f"Elasticity uncertainty: p10={beta_boot['p10']:.3f}, p90={beta_boot['p90']:.3f}")
408
+
409
+ # HOLD explanation
410
+ if k["decision"] == "HOLD":
411
+ if k["leverage_frac"] < leverage_th:
412
+ st.caption(
413
+ f"HOLD triggered: low leverage {k['leverage_frac']*100:.1f}% "
414
+ f"< threshold {leverage_th*100:.1f}% (price barely changes profit)."
415
+ )
416
+ else:
417
+ st.caption(
418
+ f"HOLD triggered: downside risk too high. "
419
+ f"Relative={k['downside_fraction']*100:.1f}% (cap {k['max_downside_frac']*100:.1f}%), "
420
+ f"Absolute={k['downside_risk']:.2f} (cap {k['abs_downside_cap']:.2f})."
421
+ )
422
+
423
+ # Tone message
424
+ if k["tone"] == "success":
425
+ st.success(f"DECISION: {k['decision']} — deploy €{rec_price:.2f}")
426
+ elif k["tone"] == "warning":
427
+ st.warning(f"DECISION: {k['decision']} — deploy baseline €{rec_price:.2f}")
428
+ else:
429
+ st.error(f"DECISION: {k['decision']} — no deployment recommended")
430
+
431
+ # Proof chart
432
+ st.subheader("Proof: profit under uncertainty across feasible prices")
433
+ st.caption(
434
+ "This is an observational pricing decision demo (not a causal price elasticity estimate)."
435
+ )
436
+
437
+ fig = render_plot(bundle, downside_q=downside_q)
438
+ st.plotly_chart(fig, use_container_width=True)
439
+
440
+ # Decision card
441
+ baseline_price = bundle["baseline"]["price"]
442
+ baseline_med = bundle["baseline"]["stats"]["median_profit"]
443
+ baseline_q = bundle["baseline"]["stats"]["q_down_profit"]
444
+
445
+ st.subheader("Decision Card (copy/paste)")
446
+ st.code(
447
+ f"""Decision: Deploy price €{rec_price:.2f}
448
+
449
+ Source: {source_label}
450
+ Risk profile: {risk_profile}
451
+ Profit unit: {time_unit}
452
+
453
+ Why:
454
+ - Elasticity (bootstrap median): {beta_boot['median']:.3f} (p10 {beta_boot['p10']:.3f}, p90 {beta_boot['p90']:.3f})
455
+ - Median profit (recommended): {k['median_profit']:.2f}
456
+ - Downside profit (q{int(downside_q*100)}): {k['q_down_profit']:.2f}
457
+ - Downside risk (absolute): {k['downside_risk']:.2f} (cap {k['abs_downside_cap']:.2f} = {k['abs_downside_cap_frac']*100:.0f}% of baseline median)
458
+ - Downside fraction (vs baseline median): {k['downside_fraction']*100:.1f}% (cap {k['max_downside_frac']*100:.1f}%)
459
+ - Leverage: {k['leverage_frac']*100:.1f}%
460
+
461
+ Baseline (current median price): €{baseline_price:.2f}
462
+ - Baseline median profit: {baseline_med:.2f}
463
+ - Baseline downside profit (q{int(downside_q*100)}): {baseline_q:.2f}
464
+
465
+ Naïve-opt (risk-neutral optimizer): €{naive_price:.2f}
466
+ Rationale: {k['rationale']}
467
+ """,
468
+ language="text",
469
+ )
470
+
471
+ with st.expander("Data preview"):
472
+ st.dataframe(df_engine.head(30), use_container_width=True)
data/processed/uci_sku_week.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f0258efb9e8373fae8f237f1902854aa8e4ed02e63b57a7c5c8024672868b25
3
+ size 908235
docs/Appendix.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # **Appendix — Methodological Notes**
2
+
3
+ ---
4
+
5
+ ## **Why Elasticity Is Observational**
6
+
7
+ Retail prices are not randomized.
8
+ Observed price–quantity relationships reflect correlation, not causal response.
9
+
10
+ This system does not attempt causal identification.
11
+ It focuses on robust decision-making given observed behavior.
12
+
13
+ ---
14
+
15
+ ## **Why Bootstrap Is Used**
16
+
17
+ Closed-form uncertainty assumptions are fragile in pricing contexts.
18
+
19
+ Bootstrap resampling:
20
+
21
+ * captures parameter uncertainty
22
+ * avoids distributional assumptions
23
+ * supports downside-aware evaluation
24
+
25
+ ---
26
+
27
+ ## **Why No Machine Learning Models Are Used**
28
+
29
+ The pricing decision is low-dimensional.
30
+
31
+ Additional model complexity:
32
+
33
+ * increases opacity
34
+ * complicates governance
35
+ * does not improve decision quality at this stage
36
+
37
+ ML pricing belongs to later integration phases.
38
+
39
+ ---
40
+
41
+ ## **Out-of-Scope Extensions**
42
+
43
+ The following are intentionally excluded:
44
+
45
+ * causal pricing experiments
46
+ * promotion-response modeling
47
+ * multi-SKU or portfolio pricing
48
+ * inventory-constrained pricing
49
+ * dynamic or reinforcement learning pricing
50
+
51
+ These extensions require additional data and governance structures.
52
+
53
+ ---
54
+
55
+ ## **Closing Note**
56
+
57
+ The system is designed to answer one question well:
58
+
59
+ > **What price can be deployed with confidence under uncertainty?**
60
+
61
+ Everything else is deliberately deferred.
62
+
63
+ ---
docs/Executive_Brief.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # **Executive Brief — Robust Pricing Decisions Under Uncertainty**
2
+
3
+ ---
4
+
5
+ ## **Why This Matters**
6
+
7
+ Pricing decisions are often made using point estimates of demand or profit.
8
+ While convenient, this approach ignores a critical reality:
9
+
10
+ * profit outcomes are uncertain
11
+ * downside risk is asymmetric
12
+ * naïve optimization frequently selects fragile prices
13
+
14
+ When a deployed price performs poorly, teams are forced into:
15
+
16
+ * reactive discounting
17
+ * margin erosion
18
+ * post-hoc justification
19
+ * loss of confidence in pricing governance
20
+
21
+ This system was designed to answer a single operational question:
22
+
23
+ > **What price should be deployed when profit is uncertain and downside risk matters?**
24
+
25
+ ---
26
+
27
+ ## **What the System Evaluates**
28
+
29
+ For each feasible price, the system evaluates:
30
+
31
+ * **Median profit** — expected outcome
32
+ * **Downside profit** — exposure to adverse scenarios
33
+ * **Governance thresholds** — leverage and risk limits
34
+
35
+ These elements are combined into a **deploy / hold / reject** decision framework.
36
+
37
+ ---
38
+
39
+ ## **The Resulting Decisions**
40
+
41
+ The system outputs one of three outcomes:
42
+
43
+ | Decision | Interpretation |
44
+ |-------|----------------|
45
+ | **OPTIMIZE** | Deploy the recommended price |
46
+ | **HOLD** | Maintain the current (baseline) price |
47
+ | **NO-GO** | No price change is viable under uncertainty |
48
+
49
+ Each decision is accompanied by a **justification card** suitable for approval or review.
50
+
51
+ ---
52
+
53
+ ## **What This Is**
54
+
55
+ * A pricing **decision-support system**
56
+ * Designed for **risk-aware governance**
57
+ * Built to produce **defensible deployment choices**
58
+
59
+ ---
60
+
61
+ ## **What This Is Not**
62
+
63
+ * Not a causal elasticity estimator
64
+ * Not a promotion optimization engine
65
+ * Not an experimentation framework
66
+
67
+ ---
68
+
69
+ ## **Leadership Takeaway**
70
+
71
+ **Robust pricing decisions outperform naïve profit maximization when uncertainty is material.**
72
+
73
+ By explicitly incorporating downside risk and governance thresholds, this system produces prices that are not only profitable, but **operationally defensible**.
74
+
75
+ ---
docs/Technical_Brief.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # **Pricing Decision — Technical Summary**
2
+
3
+ ---
4
+
5
+ ## **1. Purpose**
6
+
7
+ The difficulty of pricing under uncertainty is not estimating demand,
8
+ but deciding **which price can be safely deployed**.
9
+
10
+ Prices that maximize expected profit often expose unacceptable downside risk,
11
+ leading to reversals, overrides, and erosion of trust in pricing decisions.
12
+
13
+ This system addresses the technical question:
14
+
15
+ > **How should prices be selected when profit distributions—not point estimates—matter?**
16
+
17
+ ---
18
+
19
+ ## **2. Data Basis**
20
+
21
+ Two operating modes are supported:
22
+
23
+ ### **Synthetic Mode**
24
+ * controlled elasticity parameter
25
+ * additive demand noise
26
+ * known cost structure
27
+ * fully reproducible
28
+
29
+ Used to demonstrate idealized pricing behavior.
30
+
31
+ ### **Observational Retail Mode (UCI, local only)**
32
+ * transactional retail data
33
+ * time-varying prices
34
+ * non-randomized price changes
35
+ * aggregated per period
36
+
37
+ Elasticity estimates in this mode are **observational, not causal**.
38
+
39
+ ---
40
+
41
+ ## **3. Model Structure**
42
+
43
+ Demand is modeled using a log–log specification.
44
+ Parameter uncertainty is captured via bootstrap resampling.
45
+
46
+ The objective is **distributional robustness**, not causal identification.
47
+
48
+ ---
49
+
50
+ ## **4. Profit Evaluation**
51
+
52
+ For each candidate price:
53
+
54
+ * profit distributions are computed
55
+ * median profit represents expected outcome
56
+ * downside quantiles (q10 or q5) represent risk exposure
57
+
58
+ All profit values are expressed **per aggregation period**.
59
+
60
+ ---
61
+
62
+ ## **5. Decision Logic**
63
+
64
+ Candidate prices are evaluated within a constrained grid around the current median price.
65
+
66
+ Decisions follow explicit governance rules:
67
+
68
+ * **Feasibility:** at least one price must yield positive median and downside profit
69
+ * **Leverage:** price must materially affect profit
70
+ * **Risk:** downside exposure must remain within relative and absolute caps
71
+
72
+ Violations trigger HOLD or NO-GO outcomes.
73
+
74
+ ---
75
+
76
+ ## **6. Output**
77
+
78
+ The system produces:
79
+
80
+ * decision state (OPTIMIZE / HOLD / NO-GO)
81
+ * recommended deploy price
82
+ * profit distribution diagnostics
83
+ * traceable justification metrics
84
+
85
+ This design prioritizes **auditability, explainability, and governance** over model complexity.
86
+
87
+ ---
88
+
89
+ ## **Closing Position**
90
+
91
+ Pricing under uncertainty is a decision problem, not a curve-fitting exercise.
92
+
93
+ This system converts uncertain demand response into **deployable pricing actions**
94
+ that remain defensible under review, volatility, and downside exposure.
95
+
96
+ ---
pricing_engine/__init__.py ADDED
File without changes
pricing_engine/core.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pricing Decision Core — Robust Optimization under Elasticity Uncertainty
3
+
4
+ Purpose:
5
+ Select a price that maximizes risk-adjusted profit by propagating
6
+ uncertainty in demand elasticity into profit distributions and penalizing
7
+ downside fragility.
8
+
9
+ Core Assumptions:
10
+ - Demand follows a power-law response to price: q = A * p^beta
11
+ - Elasticity uncertainty is captured via bootstrap resampling
12
+ - Decisions are evaluated using median profit and downside risk
13
+
14
+ What this module DOES:
15
+ - Estimates elasticity
16
+ - Propagates uncertainty to profit
17
+ - Selects robust prices
18
+
19
+ What this module DOES NOT do:
20
+ - Forecast demand over time
21
+ - Handle multiple SKUs
22
+ - Perform MLOps or deployment
23
+ """
24
+
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+ from typing import Tuple
29
+
30
+
31
+ def _qty_col(df: pd.DataFrame) -> str:
32
+ if "qty" in df.columns:
33
+ return "qty"
34
+ if "demand" in df.columns:
35
+ return "demand"
36
+ raise KeyError("Input df must contain 'qty' or 'demand' column.")
37
+
38
+
39
+ def estimate_loglog_elasticity(df: pd.DataFrame) -> Tuple[float, float]:
40
+ """
41
+ log(q) = a + b*log(p)
42
+ Returns: (a, b) where a=log(A), b=elasticity
43
+ """
44
+ data = df.copy()
45
+ q_col = _qty_col(data)
46
+
47
+ data = data[(data["price"] > 0) & (data[q_col] > 0)].copy()
48
+ if len(data) < 3:
49
+ raise ValueError("Need at least 3 valid observations to fit elasticity.")
50
+
51
+ X = np.log(data["price"].astype(float).values)
52
+ y = np.log(data[q_col].astype(float).values)
53
+
54
+ X_mat = np.column_stack([np.ones(len(X)), X])
55
+
56
+ # Stable OLS: least squares instead of explicit inverse
57
+ beta_hat, *_ = np.linalg.lstsq(X_mat, y, rcond=None)
58
+
59
+ return float(beta_hat[0]), float(beta_hat[1])
60
+
61
+
62
+
63
+ def profit_curve(
64
+ prices: np.ndarray,
65
+ intercept: float,
66
+ elasticity: float,
67
+ cost: float,
68
+ ) -> pd.DataFrame:
69
+ """
70
+ Compute demand and profit for a grid of prices.
71
+ """
72
+ # Recover A from intercept: intercept = log(A)
73
+ A = np.exp(intercept)
74
+
75
+ demand = A * (prices ** elasticity)
76
+ profit = (prices - cost) * demand
77
+
78
+ return pd.DataFrame(
79
+ {
80
+ "price": prices,
81
+ "demand": demand,
82
+ "profit": profit,
83
+ }
84
+ )
85
+
86
+
87
+
88
+ def optimal_price(
89
+ curve: pd.DataFrame,
90
+ ) -> dict:
91
+ """
92
+ Select price that maximizes profit.
93
+ """
94
+ idx = curve["profit"].idxmax()
95
+ row = curve.loc[idx]
96
+
97
+ return {
98
+ "price": float(row["price"]),
99
+ "profit": float(row["profit"]),
100
+ "demand": float(row["demand"]),
101
+ }
102
+
103
+
104
+ def bootstrap_optimal_price(
105
+ df: pd.DataFrame,
106
+ cost: float,
107
+ n_boot: int = 200,
108
+ n_grid: int = 200,
109
+ seed: int = 42,
110
+ ) -> pd.DataFrame:
111
+ """
112
+ Bootstrap uncertainty over elasticity by resampling rows (time periods) with replacement.
113
+ Returns a table of bootstrap draws with (intercept, elasticity, opt_price, opt_profit).
114
+ """
115
+ rng = np.random.default_rng(seed)
116
+
117
+ # Defensive cleaning for log transforms
118
+ data = df[(df["price"] > 0) & (df[_qty_col(df)] > 0)].copy()
119
+ q_col = _qty_col(data)
120
+ data = data.rename(columns={q_col: "qty"})
121
+
122
+ n = len(data)
123
+ if n < 10:
124
+ raise ValueError("Need at least 10 observations for bootstrap stability.")
125
+
126
+ # Keep optimization inside observed price range (no extrapolation)
127
+ p_min, p_max = float(data["price"].min()), float(data["price"].max())
128
+ price_grid = np.linspace(p_min, p_max, n_grid)
129
+
130
+ rows = []
131
+ for _ in range(n_boot):
132
+ # sample indices with replacement
133
+ idx = rng.integers(0, n, size=n)
134
+ sample = data.iloc[idx]
135
+
136
+ a_hat, b_hat = estimate_loglog_elasticity(sample)
137
+
138
+ curve = profit_curve(price_grid, a_hat, b_hat, cost)
139
+ dec = optimal_price(curve)
140
+
141
+ rows.append(
142
+ {
143
+ "intercept": a_hat,
144
+ "elasticity": b_hat,
145
+ "opt_price": dec["price"],
146
+ "opt_profit": dec["profit"],
147
+ "opt_demand": dec["demand"],
148
+ }
149
+ )
150
+
151
+ return pd.DataFrame(rows)
152
+
153
+
154
+ def decision_stability_summary(boot: pd.DataFrame) -> dict:
155
+ """
156
+ Summarize stability of the optimal price decision.
157
+ """
158
+ q10, q50, q90 = boot["opt_price"].quantile([0.1, 0.5, 0.9]).tolist()
159
+ spread = q90 - q10
160
+
161
+ return {
162
+ "opt_price_median": float(q50),
163
+ "opt_price_q10": float(q10),
164
+ "opt_price_q90": float(q90),
165
+ "opt_price_spread_q90_q10": float(spread),
166
+ "elasticity_median": float(boot["elasticity"].median()),
167
+ "elasticity_q10": float(boot["elasticity"].quantile(0.1)),
168
+ "elasticity_q90": float(boot["elasticity"].quantile(0.9)),
169
+ }
170
+
171
+
172
+ def stability_flag(summary: dict, max_spread_frac: float = 0.15) -> dict:
173
+ """
174
+ Flag whether decision is stable: opt price spread <= max_spread_frac * median price.
175
+ """
176
+ denom = max(summary["opt_price_median"], 1e-9)
177
+ frac = summary["opt_price_spread_q90_q10"] / denom
178
+ return {
179
+ "stable": bool(frac <= max_spread_frac),
180
+ "spread_fraction_of_median": float(frac),
181
+ "threshold": float(max_spread_frac),
182
+ }
183
+
184
+
185
+ def robust_optimal_price(
186
+ boot_params: pd.DataFrame,
187
+ cost: float,
188
+ price_grid: np.ndarray,
189
+ risk_lambda: float = 0.5,
190
+ downside_quantile: float = 0.1,
191
+ ) -> dict:
192
+ """
193
+ Robust price selection: maximize median(profit) - lambda * downside_risk(profit)
194
+
195
+ downside_risk(profit) = median(profit) - q_downside(profit)
196
+ where q_downside is e.g. 10th percentile across bootstrap draws.
197
+
198
+ Inputs:
199
+ boot_params: DataFrame with columns ["intercept", "elasticity"] from bootstrap
200
+ cost: unit cost
201
+ price_grid: candidate prices to evaluate
202
+ risk_lambda: penalty weight (0 = median-only, higher = more conservative)
203
+ downside_quantile: e.g. 0.1 for q10
204
+
205
+ Returns dict with chosen price and diagnostics.
206
+ """
207
+ if boot_params.empty:
208
+ raise ValueError("boot_params is empty.")
209
+
210
+ A = np.exp(boot_params["intercept"].values) # shape (B,)
211
+ beta = boot_params["elasticity"].values # shape (B,)
212
+ prices = price_grid.astype(float) # shape (P,)
213
+
214
+ # Profit across draws for each price:
215
+ # demand_{b,p} = A_b * p^beta_b
216
+ # profit_{b,p} = (p - cost) * demand_{b,p}
217
+ # We build profit matrix shape (B, P)
218
+ demand = A[:, None] * (prices[None, :] ** beta[:, None])
219
+ profit = (prices[None, :] - float(cost)) * demand
220
+
221
+ med = np.median(profit, axis=0) # shape (P,)
222
+ q_down = np.quantile(profit, downside_quantile, axis=0)
223
+ downside_risk = med - q_down
224
+
225
+ score = med - risk_lambda * downside_risk
226
+
227
+ best_idx = int(np.argmax(score))
228
+ p_star = float(prices[best_idx])
229
+
230
+ return {
231
+ "price": p_star,
232
+ "score": float(score[best_idx]),
233
+ "median_profit": float(med[best_idx]),
234
+ "q_down_profit": float(q_down[best_idx]),
235
+ "downside_risk": float(downside_risk[best_idx]),
236
+ "risk_lambda": float(risk_lambda),
237
+ "downside_quantile": float(downside_quantile),
238
+ }
239
+
240
+
241
+ def profit_distribution_at_price(
242
+ boot_params: pd.DataFrame,
243
+ cost: float,
244
+ price: float,
245
+ q: float = 0.1,
246
+ ) -> dict:
247
+ A = np.exp(boot_params["intercept"].values)
248
+ beta = boot_params["elasticity"].values
249
+ p = float(price)
250
+
251
+ profit = (p - float(cost)) * (A * (p ** beta))
252
+
253
+ med = float(np.median(profit))
254
+ q_down = float(np.quantile(profit, q))
255
+ q_up = float(np.quantile(profit, 1 - q))
256
+
257
+ return {
258
+ "price": p,
259
+ "median_profit": med,
260
+ "q_down_profit": q_down,
261
+ "q_up_profit": q_up,
262
+ "downside_risk": med - q_down,
263
+ "upside_spread": q_up - med,
264
+ }
265
+
266
+
267
+ def decision_justification_card(
268
+ robust_stats: dict,
269
+ naive_stats: dict,
270
+ decision_status: dict,
271
+ ) -> dict:
272
+ denom_profit = max(abs(float(naive_stats["median_profit"])), 1e-9)
273
+ denom_risk = max(abs(float(naive_stats["downside_risk"])), 1e-9)
274
+
275
+ med_delta_pct = (robust_stats["median_profit"] - naive_stats["median_profit"]) / denom_profit * 100.0
276
+ downside_improvement_pct = (naive_stats["downside_risk"] - robust_stats["downside_risk"]) / denom_risk * 100.0
277
+
278
+ rationale = (
279
+ "The selected price sacrifices negligible median profit to materially reduce downside risk "
280
+ "across plausible demand elasticities, producing a more stable and defensible pricing decision under uncertainty."
281
+ if decision_status["status"] == "ROBUST"
282
+ else
283
+ "The price decision shows excessive downside variability relative to expected payoff and should not be deployed "
284
+ "without further constraints or additional data."
285
+ )
286
+
287
+ return {
288
+ "recommended_price": round(float(robust_stats["price"]), 2),
289
+ "naive_price": round(float(naive_stats["price"]), 2),
290
+ "median_profit_delta_pct": round(float(med_delta_pct), 2),
291
+ "downside_risk_improvement_pct": round(float(downside_improvement_pct), 2),
292
+ "decision_status": decision_status["status"],
293
+ "rationale": rationale,
294
+ }
295
+
296
+
297
+
298
+
299
+ def decision_status(
300
+ stats: dict,
301
+ max_downside_frac: float = 0.05,
302
+ ) -> dict:
303
+ frac = stats["downside_risk"] / max(stats["median_profit"], 1e-9)
304
+
305
+ return {
306
+ "status": "ROBUST" if frac <= max_downside_frac else "FRAGILE",
307
+ "downside_fraction": round(frac, 3),
308
+ "threshold": max_downside_frac,
309
+ }
310
+
311
+
312
+ def sensitivity_table_at_price(
313
+ boot_params: pd.DataFrame,
314
+ base_cost: float,
315
+ price: float,
316
+ q: float = 0.1,
317
+ elasticity_scales: Tuple[float, ...] = (0.9, 1.0, 1.1),
318
+ cost_scales: Tuple[float, ...] = (0.9, 1.0, 1.1),
319
+ ) -> pd.DataFrame:
320
+ """
321
+ Returns median profit (and downside) at a fixed price under perturbations:
322
+ - scale elasticity draws by factors
323
+ - scale cost by factors
324
+ """
325
+ A = np.exp(boot_params["intercept"].values)
326
+ beta0 = boot_params["elasticity"].values
327
+ p = float(price)
328
+
329
+ rows = []
330
+ for e_scale in elasticity_scales:
331
+ beta = beta0 * float(e_scale)
332
+ demand = A * (p ** beta)
333
+
334
+ for c_scale in cost_scales:
335
+ c = float(base_cost) * float(c_scale)
336
+ profit = (p - c) * demand
337
+ med = float(np.median(profit))
338
+ q_down = float(np.quantile(profit, q))
339
+ rows.append(
340
+ {
341
+ "elasticity_scale": float(e_scale),
342
+ "cost_scale": float(c_scale),
343
+ "median_profit": med,
344
+ f"q{int(q*100)}_profit": q_down,
345
+ "downside_risk": med - q_down,
346
+ }
347
+ )
348
+ return pd.DataFrame(rows)
pricing_engine/data.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+
5
+ def generate_synthetic_sku(
6
+ n_periods: int = 52,
7
+ base_price: float = 10.0,
8
+ base_demand: float = 120.0,
9
+ elasticity: float = -1.5,
10
+ noise_std: float = 0.1,
11
+ seed: int = 42,
12
+ ) -> pd.DataFrame:
13
+ """
14
+ Generate a single-SKU synthetic price–demand time series.
15
+ Elasticity is constant and known (ground truth).
16
+ """
17
+ rng = np.random.default_rng(seed)
18
+
19
+ # price variation around base price
20
+ prices = base_price * (1 + rng.normal(0, 0.15, size=n_periods))
21
+ prices = np.clip(prices, base_price * 0.6, base_price * 1.4)
22
+
23
+ # demand model: q = A * p^elasticity * noise
24
+ noise = np.exp(rng.normal(0, noise_std, size=n_periods))
25
+ demand = base_demand * (prices ** elasticity) * noise
26
+
27
+ df = pd.DataFrame(
28
+ {
29
+ "t": np.arange(n_periods),
30
+ "price": prices,
31
+ "qty": demand,
32
+ }
33
+ )
34
+
35
+ return df
pricing_engine/data_uci.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ def make_sku_week_panel(df: pd.DataFrame) -> pd.DataFrame:
5
+ '''
6
+ Process raw UCI Online Retail data into SKU-week panel format.
7
+
8
+ :param df: Input raw data
9
+ :type df: pd.DataFrame
10
+ :return: Processed SKU-week panel data
11
+ :rtype: pd.DataFrame
12
+ '''
13
+ # "Normalise"
14
+ df_panel = df.copy()
15
+
16
+ df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'], errors='coerce')
17
+ df['StockCode'] = df['StockCode'].astype(str)
18
+
19
+ # Hard governance filters
20
+ df = df[~df['InvoiceNo'].str.startswith('C', na=False)] # Remove cancellations
21
+ df = df[df['Quantity'] > 0] # Keep only positive quantities
22
+ df = df[df['UnitPrice'] > 0] # Keep only positive prices
23
+ df = df.dropna(subset=['InvoiceDate', 'StockCode', 'Quantity', 'UnitPrice']) # Remove rows with missing values
24
+
25
+ # Select one country for simplicity
26
+ if 'Country' in df.columns:
27
+ df = df[df['Country'] == 'United Kingdom']
28
+
29
+ # Create 'Week' column
30
+ df['Week'] = df['InvoiceDate'].dt.to_period('W').apply(lambda r: r.start_time)
31
+
32
+ # Aggregate to SKU-Week level
33
+ df_panel = (df.groupby(['StockCode', 'Week']).apply(
34
+ lambda x: pd.Series({
35
+ 'qty': x['Quantity'].sum(),
36
+ 'price': x['UnitPrice'].mean(),
37
+ 'avg_price': np.average(x['UnitPrice'], weights=x['Quantity']),
38
+ 'n_txn': len(x),
39
+ })
40
+ ).reset_index())
41
+
42
+ return df_panel
43
+
44
+
45
+ def eligible_skus(df_panel: pd.DataFrame, min_weeks: int = 26, min_price_points: int = 10, min_total_qty: int = 200) -> list[str]:
46
+ '''
47
+ Identify SKUs eligible for analysis based on data sufficiency criteria.
48
+
49
+ :param df_panel: Processed SKU-week panel data
50
+ :type df_panel: pd.DataFrame
51
+ :param min_weeks: Minimum number of weeks of data required
52
+ :type min_weeks: int
53
+ :param min_price_points: Minimum number of distinct price points required
54
+ :type min_price_points: int
55
+ :param min_total_qty: Minimum total quantity required
56
+ :type min_total_qty: int
57
+ :return: List of eligible SKU codes
58
+ :rtype: list[str]
59
+ '''
60
+ sku_stats = df_panel.groupby('StockCode').agg(
61
+ n_weeks=('Week', 'nunique'),
62
+ n_price_points=('avg_price', 'nunique'),
63
+ total_qty=('qty', 'sum'),
64
+ )
65
+ eligible = sku_stats[
66
+ (sku_stats['n_weeks'] >= min_weeks) &
67
+ (sku_stats['total_qty'] >= min_total_qty) &
68
+ (sku_stats['n_price_points'] >= min_price_points)
69
+ ].index.tolist()
70
+ return eligible
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ numpy>=1.23
2
+ pandas>=1.5
3
+ streamlit>=1.30
4
+ plotly>=5.15
5
+ scikit-learn>=1.2
6
+ scipy>=1.9
7
+ pyarrow>=12.0
8
+ openpyxl>=3.1
tools/build_uci_panel.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+ from pricing_engine.data_uci import make_sku_week_panel
5
+
6
+ RAW_PATH = Path("data/Online Retail.xlsx")
7
+ OUT_PATH = Path("data/processed/uci_sku_week.parquet")
8
+
9
+ def main():
10
+ if not RAW_PATH.exists():
11
+ raise FileNotFoundError(f"Missing raw file: {RAW_PATH}")
12
+
13
+ print("Loading raw UCI Online Retail…")
14
+ df_raw = pd.read_excel(RAW_PATH)
15
+
16
+ print("Building SKU-week panel…")
17
+ panel = make_sku_week_panel(df_raw)
18
+
19
+ OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
20
+
21
+ print(f"Writing parquet → {OUT_PATH}")
22
+ panel.to_parquet(OUT_PATH, index=False)
23
+
24
+ print("Done.")
25
+
26
+ if __name__ == "__main__":
27
+ main()