#!/usr/bin/env python3 # test_rl_properties.py import sys sys.path.insert(0, ".") from server.Procure_RL_environment import ProcureRLEnvironment from models import NegotiationAction print("=== Test 1: Reproducibility ===") env1 = ProcureRLEnvironment() obs1 = env1.reset(seed=42, task_id="single_issue") env2 = ProcureRLEnvironment() obs2 = env2.reset(seed=42, task_id="single_issue") assert obs1.supplier_message == obs2.supplier_message, ( "FAIL: same seed gives different opening" ) print("Same seed = same opening message: PASS") print(f"Opening: {obs1.supplier_message[:80]}...") print("\n=== Test 2: Different seeds give different behavior ===") env3 = ProcureRLEnvironment() obs3 = env3.reset(seed=99, task_id="single_issue") print(f"Seed 42 opening price: {obs1.current_offer}") print(f"Seed 99 opening price: {obs3.current_offer}") print("\n=== Test 3: Rapport affects opponent ===") # Agent with collaborative language env_collab = ProcureRLEnvironment() env_collab.reset(seed=42, task_id="single_issue") action_collab = NegotiationAction( move_type="make_offer", terms={"price": 40000}, message="I genuinely value a long-term partnership and believe this price reflects our mutual interests.", ) obs_c = env_collab.step(action_collab) rapport_collab = env_collab.state.rapport_score # Agent with aggressive language env_aggro = ProcureRLEnvironment() env_aggro.reset(seed=42, task_id="single_issue") action_aggro = NegotiationAction( move_type="make_offer", terms={"price": 40000}, message="This is my final offer. Non-negotiable. Take it or leave it.", ) obs_a = env_aggro.step(action_aggro) rapport_aggro = env_aggro.state.rapport_score print(f"Collaborative rapport: {rapport_collab:.3f}") print(f"Aggressive rapport: {rapport_aggro:.3f}") assert rapport_collab > rapport_aggro, "FAIL: rapport not sensitive to language" print("Language affects rapport: PASS") print("\n=== Test 4: Sequential decisions matter ===") env = ProcureRLEnvironment() obs = env.reset(seed=42, task_id="single_issue") print(f"Round 0: {obs.current_offer}") # Make 3 consecutive concessions for i in range(3): action = NegotiationAction( move_type="make_offer", terms={"price": 40000 + i * 1000}, message="We can move slightly on price.", ) obs = env.step(action) print( f"Round {i + 1}: consecutive_concessions={env.state.consecutive_concessions}, reward={obs.reward}" ) if obs.done: break print("Sequential state tracking: PASS") print("\n=== Test 5: Delayed reward ===") env = ProcureRLEnvironment() env.reset(seed=42, task_id="single_issue") rewards = [] for i in range(5): action = NegotiationAction( move_type="make_offer", terms={"price": 41000}, message="I think this is a fair price for both parties.", ) obs = env.step(action) rewards.append(obs.reward) if obs.done: break print(f"Intermediate rewards: {rewards[:-1]}") print(f"Final reward: {rewards[-1]}") assert all(r == 0.0 for r in rewards[:-1]) or rewards[-1] > 0, "Reward structure check" print("Reward is delayed to episode end: PASS") print("\n=== Test 6: Accept terminates correctly ===") env = ProcureRLEnvironment() env.reset(seed=42, task_id="single_issue") # First make an offer env.step( NegotiationAction( move_type="make_offer", terms={"price": 43000}, message="Reasonable offer." ) ) # Then accept current terms obs = env.step(NegotiationAction(move_type="accept", terms={}, message="")) print(f"Accept: done={obs.done}, reward={obs.reward:.4f}") assert obs.done == True, "FAIL: accept should terminate episode" assert obs.reward >= 0.0, "FAIL: reward should be non-negative on accept" print("Accept terminates episode: PASS") print("\n=== Test 7: Reset produces clean state ===") env.reset(seed=42, task_id="multi_issue") assert env.state.round_number == 0 assert env.state.deal_reached == False assert env.state.cumulative_reward == 0.0 print("Reset produces clean state: PASS") print("\n=== All RL property tests passed ===")