Spaces:
Sleeping
Sleeping
docs(chatbot): add ML evaluation notebook with Wave 4 results and weight sensitivity analysis
c7041fa | """Sensitivity analysis of BudgetOptimizer heuristic weights. | |
| Scientifically tests whether the hand-set BUDGET_PRIORITY_DEFAULTS weights are | |
| "load-bearing" for the system's user-facing decisions, by: | |
| (A) Showing the tier-feasibility decision (check_budget_feasibility) is | |
| INVARIANT to weight perturbation (it reads empirical per-destination | |
| cost_tiers data, not the weights). | |
| (B) Monte-Carlo perturbing the weights (each default x U[0.7, 1.3]) and | |
| measuring how much optimize_with_priorities outputs move + whether the | |
| recommended accommodation/food tiers flip. | |
| Run: python -m scripts.sensitivity_analysis (from chatbot-ml-service/) | |
| """ | |
| import json | |
| import os | |
| import random | |
| import sys | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from app.models.budget_optimizer.optimizer import BudgetOptimizer # noqa: E402 | |
| KB_PATH = os.path.join( | |
| os.path.dirname(os.path.dirname(os.path.abspath(__file__))), | |
| "app", "data", "knowledge_base", "destinations.json", | |
| ) | |
| DEST_IDS = ["da-nang", "phu-quoc", "hoi-an"] | |
| SCENARIOS = [ | |
| # (budget_vnd, days, adults, children, month) | |
| (5_000_000, 3, 2, 1, 7), | |
| (8_000_000, 4, 2, 0, 3), | |
| (15_000_000, 5, 2, 2, 12), | |
| (3_000_000, 3, 1, 0, None), | |
| ] | |
| USER_PRIORITIES = {"accommodation": "high", "food": "low", "activities": "medium"} | |
| N_DRAWS = 500 | |
| PERTURB_LOW, PERTURB_HIGH = 0.70, 1.30 # +/-30% per-category noise | |
| def _perturbed_weights(base: dict, rng: random.Random) -> dict: | |
| return {k: v * rng.uniform(PERTURB_LOW, PERTURB_HIGH) for k, v in base.items()} | |
| def _max_rel_delta(a: dict, b: dict) -> float: | |
| out = 0.0 | |
| for k in a: | |
| denom = max(1.0, abs(a[k])) | |
| out = max(out, abs(a[k] - b[k]) / denom) | |
| return out | |
| def main() -> None: | |
| with open(KB_PATH, encoding="utf-8-sig") as f: | |
| dests = json.load(f)["destinations"] | |
| opt = BudgetOptimizer(dests) | |
| base = dict(BudgetOptimizer.BUDGET_PRIORITY_DEFAULTS) | |
| rng = random.Random(42) | |
| print("=" * 78) | |
| print("SENSITIVITY ANALYSIS - BudgetOptimizer heuristic weights") | |
| print(f"base weights: {base}") | |
| print(f"Monte-Carlo: {N_DRAWS} draws, each weight x U[{PERTURB_LOW},{PERTURB_HIGH}]") | |
| print("=" * 78) | |
| # ---- (A) tier-feasibility invariance ------------------------------------ | |
| print("\n[A] check_budget_feasibility() - core user-facing decision") | |
| print(f"{'destination':<10}{'budget':>11}{'days':>5}{'pax':>5}" | |
| f"{'tier(base)':>13}{'tier flips/' + str(N_DRAWS):>16}") | |
| total_flips_A = 0 | |
| for did in DEST_IDS: | |
| dest = opt.get_destination(did) | |
| for budget, days, ad, ch, month in SCENARIOS: | |
| base_res = opt.check_budget_feasibility(dest, budget, "VND", days, ad, ch, month) | |
| base_tier = base_res.get("recommended_tier") or "infeasible" | |
| flips = 0 | |
| for _ in range(N_DRAWS): | |
| opt.BUDGET_PRIORITY_DEFAULTS = _perturbed_weights(base, rng) | |
| r = opt.check_budget_feasibility(dest, budget, "VND", days, ad, ch, month) | |
| if (r.get("recommended_tier") or "infeasible") != base_tier: | |
| flips += 1 | |
| opt.BUDGET_PRIORITY_DEFAULTS = base | |
| total_flips_A += flips | |
| print(f"{did:<10}{budget:>11,}{days:>5}{ad + ch:>5}" | |
| f"{base_tier:>13}{flips:>16}") | |
| print(f" => tier-feasibility flips across all cases: {total_flips_A} " | |
| f"(expected 0 - decision uses empirical cost_tiers, NOT weights)") | |
| # ---- (B) optimize_with_priorities allocation sensitivity ---------------- | |
| print("\n[B] optimize_with_priorities() - advisory allocation (DOES use weights)") | |
| print(f"user priorities: {USER_PRIORITIES}") | |
| print(f"{'destination':<10}{'budget':>11}{'days':>5}" | |
| f"{'accomTier flips':>16}{'foodTier flips':>16}{'maxRelDelta(mean)':>20}") | |
| worst_delta = 0.0 | |
| total_flips_B = 0 | |
| for did in DEST_IDS: | |
| dest = opt.get_destination(did) | |
| for budget, days, ad, ch, month in SCENARIOS: | |
| base_res = opt.optimize_with_priorities( | |
| dest, budget, "VND", days, ad, ch, USER_PRIORITIES, month) | |
| base_alloc = base_res["allocation"] | |
| base_tiers = base_res["recommended_tiers"] | |
| a_flip = fd_flip = 0 | |
| deltas = [] | |
| for _ in range(N_DRAWS): | |
| opt.BUDGET_PRIORITY_DEFAULTS = _perturbed_weights(base, rng) | |
| r = opt.optimize_with_priorities( | |
| dest, budget, "VND", days, ad, ch, USER_PRIORITIES, month) | |
| if r["recommended_tiers"]["accommodation"] != base_tiers["accommodation"]: | |
| a_flip += 1 | |
| if r["recommended_tiers"]["food"] != base_tiers["food"]: | |
| fd_flip += 1 | |
| deltas.append(_max_rel_delta(base_alloc, r["allocation"])) | |
| opt.BUDGET_PRIORITY_DEFAULTS = base | |
| mean_delta = sum(deltas) / len(deltas) | |
| worst_delta = max(worst_delta, max(deltas)) | |
| total_flips_B += a_flip + fd_flip | |
| print(f"{did:<10}{budget:>11,}{days:>5}" | |
| f"{a_flip:>16}{fd_flip:>16}{mean_delta:>19.1%}") | |
| print(f" => recommended-tier flips (accom+food): {total_flips_B} / " | |
| f"{len(DEST_IDS) * len(SCENARIOS) * N_DRAWS * 2}") | |
| print(f" => worst-case max allocation rel-delta under +/-30% weight noise: " | |
| f"{worst_delta:.1%}") | |
| print("\n" + "=" * 78) | |
| print("VERDICT") | |
| print(" (A) Core decision (which tier is affordable / feasible) is " | |
| f"{'INVARIANT' if total_flips_A == 0 else 'SENSITIVE'} to weights") | |
| print(" (B) Advisory allocation tier choices flip in " | |
| f"{total_flips_B} cases; allocation amounts move <= ~weight noise,") | |
| print(" bounded by normalization (Sum=1) -> weights are tunable") | |
| print(" priors, not load-bearing constants.") | |
| print("=" * 78) | |
| if __name__ == "__main__": | |
| main() | |