Spaces:

akshaypulla
/

procure-rl

Sleeping

App Files Files Community

procure-rl / test_rl_properties.py

akshaypulla

Upload folder using huggingface_hub

c1be7c3 verified about 1 month ago

raw

history blame contribute delete

4.07 kB

	#!/usr/bin/env python3
	# test_rl_properties.py
	import sys

	sys.path.insert(0, ".")

	from server.Procure_RL_environment import ProcureRLEnvironment
	from models import NegotiationAction

	print("=== Test 1: Reproducibility ===")
	env1 = ProcureRLEnvironment()
	obs1 = env1.reset(seed=42, task_id="single_issue")

	env2 = ProcureRLEnvironment()
	obs2 = env2.reset(seed=42, task_id="single_issue")

	assert obs1.supplier_message == obs2.supplier_message, (
	"FAIL: same seed gives different opening"
	)
	print("Same seed = same opening message: PASS")
	print(f"Opening: {obs1.supplier_message[:80]}...")

	print("\n=== Test 2: Different seeds give different behavior ===")
	env3 = ProcureRLEnvironment()
	obs3 = env3.reset(seed=99, task_id="single_issue")
	print(f"Seed 42 opening price: {obs1.current_offer}")
	print(f"Seed 99 opening price: {obs3.current_offer}")

	print("\n=== Test 3: Rapport affects opponent ===")
	# Agent with collaborative language
	env_collab = ProcureRLEnvironment()
	env_collab.reset(seed=42, task_id="single_issue")
	action_collab = NegotiationAction(
	move_type="make_offer",
	terms={"price": 40000},
	message="I genuinely value a long-term partnership and believe this price reflects our mutual interests.",
	)
	obs_c = env_collab.step(action_collab)
	rapport_collab = env_collab.state.rapport_score

	# Agent with aggressive language
	env_aggro = ProcureRLEnvironment()
	env_aggro.reset(seed=42, task_id="single_issue")
	action_aggro = NegotiationAction(
	move_type="make_offer",
	terms={"price": 40000},
	message="This is my final offer. Non-negotiable. Take it or leave it.",
	)
	obs_a = env_aggro.step(action_aggro)
	rapport_aggro = env_aggro.state.rapport_score

	print(f"Collaborative rapport: {rapport_collab:.3f}")
	print(f"Aggressive rapport: {rapport_aggro:.3f}")
	assert rapport_collab > rapport_aggro, "FAIL: rapport not sensitive to language"
	print("Language affects rapport: PASS")

	print("\n=== Test 4: Sequential decisions matter ===")
	env = ProcureRLEnvironment()
	obs = env.reset(seed=42, task_id="single_issue")
	print(f"Round 0: {obs.current_offer}")
	# Make 3 consecutive concessions
	for i in range(3):
	action = NegotiationAction(
	move_type="make_offer",
	terms={"price": 40000 + i * 1000},
	message="We can move slightly on price.",
	)
	obs = env.step(action)
	print(
	f"Round {i + 1}: consecutive_concessions={env.state.consecutive_concessions}, reward={obs.reward}"
	)
	if obs.done:
	break
	print("Sequential state tracking: PASS")

	print("\n=== Test 5: Delayed reward ===")
	env = ProcureRLEnvironment()
	env.reset(seed=42, task_id="single_issue")
	rewards = []
	for i in range(5):
	action = NegotiationAction(
	move_type="make_offer",
	terms={"price": 41000},
	message="I think this is a fair price for both parties.",
	)
	obs = env.step(action)
	rewards.append(obs.reward)
	if obs.done:
	break

	print(f"Intermediate rewards: {rewards[:-1]}")
	print(f"Final reward: {rewards[-1]}")
	assert all(r == 0.0 for r in rewards[:-1]) or rewards[-1] > 0, "Reward structure check"
	print("Reward is delayed to episode end: PASS")

	print("\n=== Test 6: Accept terminates correctly ===")
	env = ProcureRLEnvironment()
	env.reset(seed=42, task_id="single_issue")
	# First make an offer
	env.step(
	NegotiationAction(
	move_type="make_offer", terms={"price": 43000}, message="Reasonable offer."
	)
	)
	# Then accept current terms
	obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
	print(f"Accept: done={obs.done}, reward={obs.reward:.4f}")
	assert obs.done == True, "FAIL: accept should terminate episode"
	assert obs.reward >= 0.0, "FAIL: reward should be non-negative on accept"
	print("Accept terminates episode: PASS")

	print("\n=== Test 7: Reset produces clean state ===")
	env.reset(seed=42, task_id="multi_issue")
	assert env.state.round_number == 0
	assert env.state.deal_reached == False
	assert env.state.cumulative_reward == 0.0
	print("Reset produces clean state: PASS")

	print("\n=== All RL property tests passed ===")