#!/usr/bin/env python3
# test_rl_properties.py
import sys

sys.path.insert(0, ".")

from server.Procure_RL_environment import ProcureRLEnvironment
from models import NegotiationAction

print("=== Test 1: Reproducibility ===")
env1 = ProcureRLEnvironment()
obs1 = env1.reset(seed=42, task_id="single_issue")

env2 = ProcureRLEnvironment()
obs2 = env2.reset(seed=42, task_id="single_issue")

assert obs1.supplier_message == obs2.supplier_message, (
    "FAIL: same seed gives different opening"
)
print("Same seed = same opening message: PASS")
print(f"Opening: {obs1.supplier_message[:80]}...")

print("\n=== Test 2: Different seeds give different behavior ===")
env3 = ProcureRLEnvironment()
obs3 = env3.reset(seed=99, task_id="single_issue")
print(f"Seed 42 opening price: {obs1.current_offer}")
print(f"Seed 99 opening price: {obs3.current_offer}")

print("\n=== Test 3: Rapport affects opponent ===")
# Agent with collaborative language
env_collab = ProcureRLEnvironment()
env_collab.reset(seed=42, task_id="single_issue")
action_collab = NegotiationAction(
    move_type="make_offer",
    terms={"price": 40000},
    message="I genuinely value a long-term partnership and believe this price reflects our mutual interests.",
)
obs_c = env_collab.step(action_collab)
rapport_collab = env_collab.state.rapport_score

# Agent with aggressive language
env_aggro = ProcureRLEnvironment()
env_aggro.reset(seed=42, task_id="single_issue")
action_aggro = NegotiationAction(
    move_type="make_offer",
    terms={"price": 40000},
    message="This is my final offer. Non-negotiable. Take it or leave it.",
)
obs_a = env_aggro.step(action_aggro)
rapport_aggro = env_aggro.state.rapport_score

print(f"Collaborative rapport: {rapport_collab:.3f}")
print(f"Aggressive rapport: {rapport_aggro:.3f}")
assert rapport_collab > rapport_aggro, "FAIL: rapport not sensitive to language"
print("Language affects rapport: PASS")

print("\n=== Test 4: Sequential decisions matter ===")
env = ProcureRLEnvironment()
obs = env.reset(seed=42, task_id="single_issue")
print(f"Round 0: {obs.current_offer}")
# Make 3 consecutive concessions
for i in range(3):
    action = NegotiationAction(
        move_type="make_offer",
        terms={"price": 40000 + i * 1000},
        message="We can move slightly on price.",
    )
    obs = env.step(action)
    print(
        f"Round {i + 1}: consecutive_concessions={env.state.consecutive_concessions}, reward={obs.reward}"
    )
    if obs.done:
        break
print("Sequential state tracking: PASS")

print("\n=== Test 5: Delayed reward ===")
env = ProcureRLEnvironment()
env.reset(seed=42, task_id="single_issue")
rewards = []
for i in range(5):
    action = NegotiationAction(
        move_type="make_offer",
        terms={"price": 41000},
        message="I think this is a fair price for both parties.",
    )
    obs = env.step(action)
    rewards.append(obs.reward)
    if obs.done:
        break

print(f"Intermediate rewards: {rewards[:-1]}")
print(f"Final reward: {rewards[-1]}")
assert all(r == 0.0 for r in rewards[:-1]) or rewards[-1] > 0, "Reward structure check"
print("Reward is delayed to episode end: PASS")

print("\n=== Test 6: Accept terminates correctly ===")
env = ProcureRLEnvironment()
env.reset(seed=42, task_id="single_issue")
# First make an offer
env.step(
    NegotiationAction(
        move_type="make_offer", terms={"price": 43000}, message="Reasonable offer."
    )
)
# Then accept current terms
obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
print(f"Accept: done={obs.done}, reward={obs.reward:.4f}")
assert obs.done == True, "FAIL: accept should terminate episode"
assert obs.reward >= 0.0, "FAIL: reward should be non-negative on accept"
print("Accept terminates episode: PASS")

print("\n=== Test 7: Reset produces clean state ===")
env.reset(seed=42, task_id="multi_issue")
assert env.state.round_number == 0
assert env.state.deal_reached == False
assert env.state.cumulative_reward == 0.0
print("Reset produces clean state: PASS")

print("\n=== All RL property tests passed ===")