#!/usr/bin/env python3 # test_calibration.py import sys sys.path.insert(0, ".") from server.Procure_RL_environment import ProcureRLEnvironment from models import NegotiationAction import random def run_random_agent(task_id, seed=42): """Simulate a dumb agent that makes random offers""" env = ProcureRLEnvironment() obs = env.reset(seed=seed, task_id=task_id) rng = random.Random(seed + 1) config = { "single_issue": {"price": (38000, 52000)}, "multi_issue": {"price": (40000, 58000), "payment_days": (30, 90)}, "adversarial": { "price": (80000, 120000), "payment_days": (30, 90), "support_hours": (80, 200), }, } for step in range(15): terms = {} for issue, (lo, hi) in config[task_id].items(): terms[issue] = rng.uniform(lo, hi) action = NegotiationAction( move_type="make_offer", terms=terms, message="Here is my offer." ) obs = env.step(action) if obs.done: return obs.reward or 0.0 # Force accept at end obs = env.step(NegotiationAction(move_type="accept", terms={}, message="")) return obs.reward or 0.0 def run_good_agent(task_id, seed=42): """Simulate a smart agent with collaborative language and adaptive pricing""" env = ProcureRLEnvironment() obs = env.reset(seed=seed, task_id=task_id) # Get opponent's opening to adapt our target opening_price = obs.current_offer.get("price", 52000) # Get opponent's floor (never go below floor or opponent won't accept) floor = ( env._opponent.price_floor if hasattr(env._opponent, "price_floor") else opening_price * 0.80 ) # Adaptive targets that stay above floor if task_id == "single_issue": # Target 20-25% below opening but MUST be above floor target_price = max(opening_price * 0.78, floor * 1.05) targets = {"price": target_price} elif task_id == "multi_issue": # Target 20% below opening, above floor target_price = max(opening_price * 0.80, floor * 1.05) targets = {"price": target_price, "payment_days": 45} else: # adversarial # Target 20% below opening, above floor target_price = max(opening_price * 0.80, floor * 1.05) targets = {"price": target_price, "payment_days": 50, "support_hours": 160} for step in range(10): action = NegotiationAction( move_type="make_offer", terms=targets, message="I value our partnership and believe this offer reflects fair market value for both parties. I'm flexible and want to find a solution that works for us both.", ) obs = env.step(action) if obs.done: return obs.reward or 0.0 obs = env.step(NegotiationAction(move_type="accept", terms={}, message="")) return obs.reward or 0.0 print("=== Score Spread Calibration ===") for task in ["single_issue", "multi_issue", "adversarial"]: random_scores = [run_random_agent(task, seed=i) for i in range(5)] good_scores = [run_good_agent(task, seed=i) for i in range(5)] random_avg = sum(random_scores) / len(random_scores) good_avg = sum(good_scores) / len(good_scores) spread = good_avg - random_avg print(f"\n{task}:") print( f" Random agent: {[round(s, 3) for s in random_scores]} avg={random_avg:.3f}" ) print( f" Strategic agent: {[round(s, 3) for s in good_scores]} avg={good_avg:.3f}" ) print(f" Spread: {spread:.3f}") if spread < 0.05: print(f" ⚠️ WARNING: spread too small — environment may be trivial or broken") elif good_avg < 0.10: print(f" ⚠️ WARNING: even good agent scores very low — too hard") else: print(f" ✅ Score spread looks healthy")