wanderlust-chatbot / scripts /sensitivity_analysis.py
Kiriten892's picture
docs(chatbot): add ML evaluation notebook with Wave 4 results and weight sensitivity analysis
c7041fa
Raw
History Blame Contribute Delete
5.97 kB
"""Sensitivity analysis of BudgetOptimizer heuristic weights.
Scientifically tests whether the hand-set BUDGET_PRIORITY_DEFAULTS weights are
"load-bearing" for the system's user-facing decisions, by:
(A) Showing the tier-feasibility decision (check_budget_feasibility) is
INVARIANT to weight perturbation (it reads empirical per-destination
cost_tiers data, not the weights).
(B) Monte-Carlo perturbing the weights (each default x U[0.7, 1.3]) and
measuring how much optimize_with_priorities outputs move + whether the
recommended accommodation/food tiers flip.
Run: python -m scripts.sensitivity_analysis (from chatbot-ml-service/)
"""
import json
import os
import random
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.models.budget_optimizer.optimizer import BudgetOptimizer # noqa: E402
KB_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"app", "data", "knowledge_base", "destinations.json",
)
DEST_IDS = ["da-nang", "phu-quoc", "hoi-an"]
SCENARIOS = [
# (budget_vnd, days, adults, children, month)
(5_000_000, 3, 2, 1, 7),
(8_000_000, 4, 2, 0, 3),
(15_000_000, 5, 2, 2, 12),
(3_000_000, 3, 1, 0, None),
]
USER_PRIORITIES = {"accommodation": "high", "food": "low", "activities": "medium"}
N_DRAWS = 500
PERTURB_LOW, PERTURB_HIGH = 0.70, 1.30 # +/-30% per-category noise
def _perturbed_weights(base: dict, rng: random.Random) -> dict:
return {k: v * rng.uniform(PERTURB_LOW, PERTURB_HIGH) for k, v in base.items()}
def _max_rel_delta(a: dict, b: dict) -> float:
out = 0.0
for k in a:
denom = max(1.0, abs(a[k]))
out = max(out, abs(a[k] - b[k]) / denom)
return out
def main() -> None:
with open(KB_PATH, encoding="utf-8-sig") as f:
dests = json.load(f)["destinations"]
opt = BudgetOptimizer(dests)
base = dict(BudgetOptimizer.BUDGET_PRIORITY_DEFAULTS)
rng = random.Random(42)
print("=" * 78)
print("SENSITIVITY ANALYSIS - BudgetOptimizer heuristic weights")
print(f"base weights: {base}")
print(f"Monte-Carlo: {N_DRAWS} draws, each weight x U[{PERTURB_LOW},{PERTURB_HIGH}]")
print("=" * 78)
# ---- (A) tier-feasibility invariance ------------------------------------
print("\n[A] check_budget_feasibility() - core user-facing decision")
print(f"{'destination':<10}{'budget':>11}{'days':>5}{'pax':>5}"
f"{'tier(base)':>13}{'tier flips/' + str(N_DRAWS):>16}")
total_flips_A = 0
for did in DEST_IDS:
dest = opt.get_destination(did)
for budget, days, ad, ch, month in SCENARIOS:
base_res = opt.check_budget_feasibility(dest, budget, "VND", days, ad, ch, month)
base_tier = base_res.get("recommended_tier") or "infeasible"
flips = 0
for _ in range(N_DRAWS):
opt.BUDGET_PRIORITY_DEFAULTS = _perturbed_weights(base, rng)
r = opt.check_budget_feasibility(dest, budget, "VND", days, ad, ch, month)
if (r.get("recommended_tier") or "infeasible") != base_tier:
flips += 1
opt.BUDGET_PRIORITY_DEFAULTS = base
total_flips_A += flips
print(f"{did:<10}{budget:>11,}{days:>5}{ad + ch:>5}"
f"{base_tier:>13}{flips:>16}")
print(f" => tier-feasibility flips across all cases: {total_flips_A} "
f"(expected 0 - decision uses empirical cost_tiers, NOT weights)")
# ---- (B) optimize_with_priorities allocation sensitivity ----------------
print("\n[B] optimize_with_priorities() - advisory allocation (DOES use weights)")
print(f"user priorities: {USER_PRIORITIES}")
print(f"{'destination':<10}{'budget':>11}{'days':>5}"
f"{'accomTier flips':>16}{'foodTier flips':>16}{'maxRelDelta(mean)':>20}")
worst_delta = 0.0
total_flips_B = 0
for did in DEST_IDS:
dest = opt.get_destination(did)
for budget, days, ad, ch, month in SCENARIOS:
base_res = opt.optimize_with_priorities(
dest, budget, "VND", days, ad, ch, USER_PRIORITIES, month)
base_alloc = base_res["allocation"]
base_tiers = base_res["recommended_tiers"]
a_flip = fd_flip = 0
deltas = []
for _ in range(N_DRAWS):
opt.BUDGET_PRIORITY_DEFAULTS = _perturbed_weights(base, rng)
r = opt.optimize_with_priorities(
dest, budget, "VND", days, ad, ch, USER_PRIORITIES, month)
if r["recommended_tiers"]["accommodation"] != base_tiers["accommodation"]:
a_flip += 1
if r["recommended_tiers"]["food"] != base_tiers["food"]:
fd_flip += 1
deltas.append(_max_rel_delta(base_alloc, r["allocation"]))
opt.BUDGET_PRIORITY_DEFAULTS = base
mean_delta = sum(deltas) / len(deltas)
worst_delta = max(worst_delta, max(deltas))
total_flips_B += a_flip + fd_flip
print(f"{did:<10}{budget:>11,}{days:>5}"
f"{a_flip:>16}{fd_flip:>16}{mean_delta:>19.1%}")
print(f" => recommended-tier flips (accom+food): {total_flips_B} / "
f"{len(DEST_IDS) * len(SCENARIOS) * N_DRAWS * 2}")
print(f" => worst-case max allocation rel-delta under +/-30% weight noise: "
f"{worst_delta:.1%}")
print("\n" + "=" * 78)
print("VERDICT")
print(" (A) Core decision (which tier is affordable / feasible) is "
f"{'INVARIANT' if total_flips_A == 0 else 'SENSITIVE'} to weights")
print(" (B) Advisory allocation tier choices flip in "
f"{total_flips_B} cases; allocation amounts move <= ~weight noise,")
print(" bounded by normalization (Sum=1) -> weights are tunable")
print(" priors, not load-bearing constants.")
print("=" * 78)
if __name__ == "__main__":
main()