Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # test_calibration.py | |
| import sys | |
| sys.path.insert(0, ".") | |
| from server.Procure_RL_environment import ProcureRLEnvironment | |
| from models import NegotiationAction | |
| import random | |
| def run_random_agent(task_id, seed=42): | |
| """Simulate a dumb agent that makes random offers""" | |
| env = ProcureRLEnvironment() | |
| obs = env.reset(seed=seed, task_id=task_id) | |
| rng = random.Random(seed + 1) | |
| config = { | |
| "single_issue": {"price": (38000, 52000)}, | |
| "multi_issue": {"price": (40000, 58000), "payment_days": (30, 90)}, | |
| "adversarial": { | |
| "price": (80000, 120000), | |
| "payment_days": (30, 90), | |
| "support_hours": (80, 200), | |
| }, | |
| } | |
| for step in range(15): | |
| terms = {} | |
| for issue, (lo, hi) in config[task_id].items(): | |
| terms[issue] = rng.uniform(lo, hi) | |
| action = NegotiationAction( | |
| move_type="make_offer", terms=terms, message="Here is my offer." | |
| ) | |
| obs = env.step(action) | |
| if obs.done: | |
| return obs.reward or 0.0 | |
| # Force accept at end | |
| obs = env.step(NegotiationAction(move_type="accept", terms={}, message="")) | |
| return obs.reward or 0.0 | |
| def run_good_agent(task_id, seed=42): | |
| """Simulate a smart agent with collaborative language and adaptive pricing""" | |
| env = ProcureRLEnvironment() | |
| obs = env.reset(seed=seed, task_id=task_id) | |
| # Get opponent's opening to adapt our target | |
| opening_price = obs.current_offer.get("price", 52000) | |
| # Get opponent's floor (never go below floor or opponent won't accept) | |
| floor = ( | |
| env._opponent.price_floor | |
| if hasattr(env._opponent, "price_floor") | |
| else opening_price * 0.80 | |
| ) | |
| # Adaptive targets that stay above floor | |
| if task_id == "single_issue": | |
| # Target 20-25% below opening but MUST be above floor | |
| target_price = max(opening_price * 0.78, floor * 1.05) | |
| targets = {"price": target_price} | |
| elif task_id == "multi_issue": | |
| # Target 20% below opening, above floor | |
| target_price = max(opening_price * 0.80, floor * 1.05) | |
| targets = {"price": target_price, "payment_days": 45} | |
| else: # adversarial | |
| # Target 20% below opening, above floor | |
| target_price = max(opening_price * 0.80, floor * 1.05) | |
| targets = {"price": target_price, "payment_days": 50, "support_hours": 160} | |
| for step in range(10): | |
| action = NegotiationAction( | |
| move_type="make_offer", | |
| terms=targets, | |
| message="I value our partnership and believe this offer reflects fair market value for both parties. I'm flexible and want to find a solution that works for us both.", | |
| ) | |
| obs = env.step(action) | |
| if obs.done: | |
| return obs.reward or 0.0 | |
| obs = env.step(NegotiationAction(move_type="accept", terms={}, message="")) | |
| return obs.reward or 0.0 | |
| print("=== Score Spread Calibration ===") | |
| for task in ["single_issue", "multi_issue", "adversarial"]: | |
| random_scores = [run_random_agent(task, seed=i) for i in range(5)] | |
| good_scores = [run_good_agent(task, seed=i) for i in range(5)] | |
| random_avg = sum(random_scores) / len(random_scores) | |
| good_avg = sum(good_scores) / len(good_scores) | |
| spread = good_avg - random_avg | |
| print(f"\n{task}:") | |
| print( | |
| f" Random agent: {[round(s, 3) for s in random_scores]} avg={random_avg:.3f}" | |
| ) | |
| print( | |
| f" Strategic agent: {[round(s, 3) for s in good_scores]} avg={good_avg:.3f}" | |
| ) | |
| print(f" Spread: {spread:.3f}") | |
| if spread < 0.05: | |
| print(f" ⚠️ WARNING: spread too small — environment may be trivial or broken") | |
| elif good_avg < 0.10: | |
| print(f" ⚠️ WARNING: even good agent scores very low — too hard") | |
| else: | |
| print(f" ✅ Score spread looks healthy") | |