Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # test_rl_properties.py | |
| import sys | |
| sys.path.insert(0, ".") | |
| from server.Procure_RL_environment import ProcureRLEnvironment | |
| from models import NegotiationAction | |
| print("=== Test 1: Reproducibility ===") | |
| env1 = ProcureRLEnvironment() | |
| obs1 = env1.reset(seed=42, task_id="single_issue") | |
| env2 = ProcureRLEnvironment() | |
| obs2 = env2.reset(seed=42, task_id="single_issue") | |
| assert obs1.supplier_message == obs2.supplier_message, ( | |
| "FAIL: same seed gives different opening" | |
| ) | |
| print("Same seed = same opening message: PASS") | |
| print(f"Opening: {obs1.supplier_message[:80]}...") | |
| print("\n=== Test 2: Different seeds give different behavior ===") | |
| env3 = ProcureRLEnvironment() | |
| obs3 = env3.reset(seed=99, task_id="single_issue") | |
| print(f"Seed 42 opening price: {obs1.current_offer}") | |
| print(f"Seed 99 opening price: {obs3.current_offer}") | |
| print("\n=== Test 3: Rapport affects opponent ===") | |
| # Agent with collaborative language | |
| env_collab = ProcureRLEnvironment() | |
| env_collab.reset(seed=42, task_id="single_issue") | |
| action_collab = NegotiationAction( | |
| move_type="make_offer", | |
| terms={"price": 40000}, | |
| message="I genuinely value a long-term partnership and believe this price reflects our mutual interests.", | |
| ) | |
| obs_c = env_collab.step(action_collab) | |
| rapport_collab = env_collab.state.rapport_score | |
| # Agent with aggressive language | |
| env_aggro = ProcureRLEnvironment() | |
| env_aggro.reset(seed=42, task_id="single_issue") | |
| action_aggro = NegotiationAction( | |
| move_type="make_offer", | |
| terms={"price": 40000}, | |
| message="This is my final offer. Non-negotiable. Take it or leave it.", | |
| ) | |
| obs_a = env_aggro.step(action_aggro) | |
| rapport_aggro = env_aggro.state.rapport_score | |
| print(f"Collaborative rapport: {rapport_collab:.3f}") | |
| print(f"Aggressive rapport: {rapport_aggro:.3f}") | |
| assert rapport_collab > rapport_aggro, "FAIL: rapport not sensitive to language" | |
| print("Language affects rapport: PASS") | |
| print("\n=== Test 4: Sequential decisions matter ===") | |
| env = ProcureRLEnvironment() | |
| obs = env.reset(seed=42, task_id="single_issue") | |
| print(f"Round 0: {obs.current_offer}") | |
| # Make 3 consecutive concessions | |
| for i in range(3): | |
| action = NegotiationAction( | |
| move_type="make_offer", | |
| terms={"price": 40000 + i * 1000}, | |
| message="We can move slightly on price.", | |
| ) | |
| obs = env.step(action) | |
| print( | |
| f"Round {i + 1}: consecutive_concessions={env.state.consecutive_concessions}, reward={obs.reward}" | |
| ) | |
| if obs.done: | |
| break | |
| print("Sequential state tracking: PASS") | |
| print("\n=== Test 5: Delayed reward ===") | |
| env = ProcureRLEnvironment() | |
| env.reset(seed=42, task_id="single_issue") | |
| rewards = [] | |
| for i in range(5): | |
| action = NegotiationAction( | |
| move_type="make_offer", | |
| terms={"price": 41000}, | |
| message="I think this is a fair price for both parties.", | |
| ) | |
| obs = env.step(action) | |
| rewards.append(obs.reward) | |
| if obs.done: | |
| break | |
| print(f"Intermediate rewards: {rewards[:-1]}") | |
| print(f"Final reward: {rewards[-1]}") | |
| assert all(r == 0.0 for r in rewards[:-1]) or rewards[-1] > 0, "Reward structure check" | |
| print("Reward is delayed to episode end: PASS") | |
| print("\n=== Test 6: Accept terminates correctly ===") | |
| env = ProcureRLEnvironment() | |
| env.reset(seed=42, task_id="single_issue") | |
| # First make an offer | |
| env.step( | |
| NegotiationAction( | |
| move_type="make_offer", terms={"price": 43000}, message="Reasonable offer." | |
| ) | |
| ) | |
| # Then accept current terms | |
| obs = env.step(NegotiationAction(move_type="accept", terms={}, message="")) | |
| print(f"Accept: done={obs.done}, reward={obs.reward:.4f}") | |
| assert obs.done == True, "FAIL: accept should terminate episode" | |
| assert obs.reward >= 0.0, "FAIL: reward should be non-negative on accept" | |
| print("Accept terminates episode: PASS") | |
| print("\n=== Test 7: Reset produces clean state ===") | |
| env.reset(seed=42, task_id="multi_issue") | |
| assert env.state.round_number == 0 | |
| assert env.state.deal_reached == False | |
| assert env.state.cumulative_reward == 0.0 | |
| print("Reset produces clean state: PASS") | |
| print("\n=== All RL property tests passed ===") | |