"""Sensitivity analysis of BudgetOptimizer heuristic weights. Scientifically tests whether the hand-set BUDGET_PRIORITY_DEFAULTS weights are "load-bearing" for the system's user-facing decisions, by: (A) Showing the tier-feasibility decision (check_budget_feasibility) is INVARIANT to weight perturbation (it reads empirical per-destination cost_tiers data, not the weights). (B) Monte-Carlo perturbing the weights (each default x U[0.7, 1.3]) and measuring how much optimize_with_priorities outputs move + whether the recommended accommodation/food tiers flip. Run: python -m scripts.sensitivity_analysis (from chatbot-ml-service/) """ import json import os import random import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from app.models.budget_optimizer.optimizer import BudgetOptimizer # noqa: E402 KB_PATH = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "app", "data", "knowledge_base", "destinations.json", ) DEST_IDS = ["da-nang", "phu-quoc", "hoi-an"] SCENARIOS = [ # (budget_vnd, days, adults, children, month) (5_000_000, 3, 2, 1, 7), (8_000_000, 4, 2, 0, 3), (15_000_000, 5, 2, 2, 12), (3_000_000, 3, 1, 0, None), ] USER_PRIORITIES = {"accommodation": "high", "food": "low", "activities": "medium"} N_DRAWS = 500 PERTURB_LOW, PERTURB_HIGH = 0.70, 1.30 # +/-30% per-category noise def _perturbed_weights(base: dict, rng: random.Random) -> dict: return {k: v * rng.uniform(PERTURB_LOW, PERTURB_HIGH) for k, v in base.items()} def _max_rel_delta(a: dict, b: dict) -> float: out = 0.0 for k in a: denom = max(1.0, abs(a[k])) out = max(out, abs(a[k] - b[k]) / denom) return out def main() -> None: with open(KB_PATH, encoding="utf-8-sig") as f: dests = json.load(f)["destinations"] opt = BudgetOptimizer(dests) base = dict(BudgetOptimizer.BUDGET_PRIORITY_DEFAULTS) rng = random.Random(42) print("=" * 78) print("SENSITIVITY ANALYSIS - BudgetOptimizer heuristic weights") print(f"base weights: {base}") print(f"Monte-Carlo: {N_DRAWS} draws, each weight x U[{PERTURB_LOW},{PERTURB_HIGH}]") print("=" * 78) # ---- (A) tier-feasibility invariance ------------------------------------ print("\n[A] check_budget_feasibility() - core user-facing decision") print(f"{'destination':<10}{'budget':>11}{'days':>5}{'pax':>5}" f"{'tier(base)':>13}{'tier flips/' + str(N_DRAWS):>16}") total_flips_A = 0 for did in DEST_IDS: dest = opt.get_destination(did) for budget, days, ad, ch, month in SCENARIOS: base_res = opt.check_budget_feasibility(dest, budget, "VND", days, ad, ch, month) base_tier = base_res.get("recommended_tier") or "infeasible" flips = 0 for _ in range(N_DRAWS): opt.BUDGET_PRIORITY_DEFAULTS = _perturbed_weights(base, rng) r = opt.check_budget_feasibility(dest, budget, "VND", days, ad, ch, month) if (r.get("recommended_tier") or "infeasible") != base_tier: flips += 1 opt.BUDGET_PRIORITY_DEFAULTS = base total_flips_A += flips print(f"{did:<10}{budget:>11,}{days:>5}{ad + ch:>5}" f"{base_tier:>13}{flips:>16}") print(f" => tier-feasibility flips across all cases: {total_flips_A} " f"(expected 0 - decision uses empirical cost_tiers, NOT weights)") # ---- (B) optimize_with_priorities allocation sensitivity ---------------- print("\n[B] optimize_with_priorities() - advisory allocation (DOES use weights)") print(f"user priorities: {USER_PRIORITIES}") print(f"{'destination':<10}{'budget':>11}{'days':>5}" f"{'accomTier flips':>16}{'foodTier flips':>16}{'maxRelDelta(mean)':>20}") worst_delta = 0.0 total_flips_B = 0 for did in DEST_IDS: dest = opt.get_destination(did) for budget, days, ad, ch, month in SCENARIOS: base_res = opt.optimize_with_priorities( dest, budget, "VND", days, ad, ch, USER_PRIORITIES, month) base_alloc = base_res["allocation"] base_tiers = base_res["recommended_tiers"] a_flip = fd_flip = 0 deltas = [] for _ in range(N_DRAWS): opt.BUDGET_PRIORITY_DEFAULTS = _perturbed_weights(base, rng) r = opt.optimize_with_priorities( dest, budget, "VND", days, ad, ch, USER_PRIORITIES, month) if r["recommended_tiers"]["accommodation"] != base_tiers["accommodation"]: a_flip += 1 if r["recommended_tiers"]["food"] != base_tiers["food"]: fd_flip += 1 deltas.append(_max_rel_delta(base_alloc, r["allocation"])) opt.BUDGET_PRIORITY_DEFAULTS = base mean_delta = sum(deltas) / len(deltas) worst_delta = max(worst_delta, max(deltas)) total_flips_B += a_flip + fd_flip print(f"{did:<10}{budget:>11,}{days:>5}" f"{a_flip:>16}{fd_flip:>16}{mean_delta:>19.1%}") print(f" => recommended-tier flips (accom+food): {total_flips_B} / " f"{len(DEST_IDS) * len(SCENARIOS) * N_DRAWS * 2}") print(f" => worst-case max allocation rel-delta under +/-30% weight noise: " f"{worst_delta:.1%}") print("\n" + "=" * 78) print("VERDICT") print(" (A) Core decision (which tier is affordable / feasible) is " f"{'INVARIANT' if total_flips_A == 0 else 'SENSITIVE'} to weights") print(" (B) Advisory allocation tier choices flip in " f"{total_flips_B} cases; allocation amounts move <= ~weight noise,") print(" bounded by normalization (Sum=1) -> weights are tunable") print(" priors, not load-bearing constants.") print("=" * 78) if __name__ == "__main__": main()