Spaces:

akshaypulla
/

procure-rl

Sleeping

App Files Files Community

procure-rl / test_calibration.py

akshaypulla

Upload folder using huggingface_hub

c1be7c3 verified about 2 months ago

raw

history blame contribute delete

3.88 kB

	#!/usr/bin/env python3
	# test_calibration.py
	import sys

	sys.path.insert(0, ".")

	from server.Procure_RL_environment import ProcureRLEnvironment
	from models import NegotiationAction
	import random


	def run_random_agent(task_id, seed=42):
	"""Simulate a dumb agent that makes random offers"""
	env = ProcureRLEnvironment()
	obs = env.reset(seed=seed, task_id=task_id)
	rng = random.Random(seed + 1)

	config = {
	"single_issue": {"price": (38000, 52000)},
	"multi_issue": {"price": (40000, 58000), "payment_days": (30, 90)},
	"adversarial": {
	"price": (80000, 120000),
	"payment_days": (30, 90),
	"support_hours": (80, 200),
	},
	}

	for step in range(15):
	terms = {}
	for issue, (lo, hi) in config[task_id].items():
	terms[issue] = rng.uniform(lo, hi)

	action = NegotiationAction(
	move_type="make_offer", terms=terms, message="Here is my offer."
	)
	obs = env.step(action)
	if obs.done:
	return obs.reward or 0.0

	# Force accept at end
	obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
	return obs.reward or 0.0


	def run_good_agent(task_id, seed=42):
	"""Simulate a smart agent with collaborative language and adaptive pricing"""
	env = ProcureRLEnvironment()
	obs = env.reset(seed=seed, task_id=task_id)

	# Get opponent's opening to adapt our target
	opening_price = obs.current_offer.get("price", 52000)
	# Get opponent's floor (never go below floor or opponent won't accept)
	floor = (
	env._opponent.price_floor
	if hasattr(env._opponent, "price_floor")
	else opening_price * 0.80
	)

	# Adaptive targets that stay above floor
	if task_id == "single_issue":
	# Target 20-25% below opening but MUST be above floor
	target_price = max(opening_price * 0.78, floor * 1.05)
	targets = {"price": target_price}
	elif task_id == "multi_issue":
	# Target 20% below opening, above floor
	target_price = max(opening_price * 0.80, floor * 1.05)
	targets = {"price": target_price, "payment_days": 45}
	else: # adversarial
	# Target 20% below opening, above floor
	target_price = max(opening_price * 0.80, floor * 1.05)
	targets = {"price": target_price, "payment_days": 50, "support_hours": 160}

	for step in range(10):
	action = NegotiationAction(
	move_type="make_offer",
	terms=targets,
	message="I value our partnership and believe this offer reflects fair market value for both parties. I'm flexible and want to find a solution that works for us both.",
	)
	obs = env.step(action)
	if obs.done:
	return obs.reward or 0.0

	obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
	return obs.reward or 0.0


	print("=== Score Spread Calibration ===")
	for task in ["single_issue", "multi_issue", "adversarial"]:
	random_scores = [run_random_agent(task, seed=i) for i in range(5)]
	good_scores = [run_good_agent(task, seed=i) for i in range(5)]

	random_avg = sum(random_scores) / len(random_scores)
	good_avg = sum(good_scores) / len(good_scores)
	spread = good_avg - random_avg

	print(f"\n{task}:")
	print(
	f" Random agent: {[round(s, 3) for s in random_scores]} avg={random_avg:.3f}"
	)
	print(
	f" Strategic agent: {[round(s, 3) for s in good_scores]} avg={good_avg:.3f}"
	)
	print(f" Spread: {spread:.3f}")

	if spread < 0.05:
	print(f" ⚠️ WARNING: spread too small — environment may be trivial or broken")
	elif good_avg < 0.10:
	print(f" ⚠️ WARNING: even good agent scores very low — too hard")
	else:
	print(f" ✅ Score spread looks healthy")