procure-rl / test_calibration.py
akshaypulla's picture
Upload folder using huggingface_hub
c1be7c3 verified
#!/usr/bin/env python3
# test_calibration.py
import sys
sys.path.insert(0, ".")
from server.Procure_RL_environment import ProcureRLEnvironment
from models import NegotiationAction
import random
def run_random_agent(task_id, seed=42):
"""Simulate a dumb agent that makes random offers"""
env = ProcureRLEnvironment()
obs = env.reset(seed=seed, task_id=task_id)
rng = random.Random(seed + 1)
config = {
"single_issue": {"price": (38000, 52000)},
"multi_issue": {"price": (40000, 58000), "payment_days": (30, 90)},
"adversarial": {
"price": (80000, 120000),
"payment_days": (30, 90),
"support_hours": (80, 200),
},
}
for step in range(15):
terms = {}
for issue, (lo, hi) in config[task_id].items():
terms[issue] = rng.uniform(lo, hi)
action = NegotiationAction(
move_type="make_offer", terms=terms, message="Here is my offer."
)
obs = env.step(action)
if obs.done:
return obs.reward or 0.0
# Force accept at end
obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
return obs.reward or 0.0
def run_good_agent(task_id, seed=42):
"""Simulate a smart agent with collaborative language and adaptive pricing"""
env = ProcureRLEnvironment()
obs = env.reset(seed=seed, task_id=task_id)
# Get opponent's opening to adapt our target
opening_price = obs.current_offer.get("price", 52000)
# Get opponent's floor (never go below floor or opponent won't accept)
floor = (
env._opponent.price_floor
if hasattr(env._opponent, "price_floor")
else opening_price * 0.80
)
# Adaptive targets that stay above floor
if task_id == "single_issue":
# Target 20-25% below opening but MUST be above floor
target_price = max(opening_price * 0.78, floor * 1.05)
targets = {"price": target_price}
elif task_id == "multi_issue":
# Target 20% below opening, above floor
target_price = max(opening_price * 0.80, floor * 1.05)
targets = {"price": target_price, "payment_days": 45}
else: # adversarial
# Target 20% below opening, above floor
target_price = max(opening_price * 0.80, floor * 1.05)
targets = {"price": target_price, "payment_days": 50, "support_hours": 160}
for step in range(10):
action = NegotiationAction(
move_type="make_offer",
terms=targets,
message="I value our partnership and believe this offer reflects fair market value for both parties. I'm flexible and want to find a solution that works for us both.",
)
obs = env.step(action)
if obs.done:
return obs.reward or 0.0
obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
return obs.reward or 0.0
print("=== Score Spread Calibration ===")
for task in ["single_issue", "multi_issue", "adversarial"]:
random_scores = [run_random_agent(task, seed=i) for i in range(5)]
good_scores = [run_good_agent(task, seed=i) for i in range(5)]
random_avg = sum(random_scores) / len(random_scores)
good_avg = sum(good_scores) / len(good_scores)
spread = good_avg - random_avg
print(f"\n{task}:")
print(
f" Random agent: {[round(s, 3) for s in random_scores]} avg={random_avg:.3f}"
)
print(
f" Strategic agent: {[round(s, 3) for s in good_scores]} avg={good_avg:.3f}"
)
print(f" Spread: {spread:.3f}")
if spread < 0.05:
print(f" ⚠️ WARNING: spread too small — environment may be trivial or broken")
elif good_avg < 0.10:
print(f" ⚠️ WARNING: even good agent scores very low — too hard")
else:
print(f" ✅ Score spread looks healthy")