#!/usr/bin/env python3
# test_calibration.py
import sys

sys.path.insert(0, ".")

from server.Procure_RL_environment import ProcureRLEnvironment
from models import NegotiationAction
import random


def run_random_agent(task_id, seed=42):
    """Simulate a dumb agent that makes random offers"""
    env = ProcureRLEnvironment()
    obs = env.reset(seed=seed, task_id=task_id)
    rng = random.Random(seed + 1)

    config = {
        "single_issue": {"price": (38000, 52000)},
        "multi_issue": {"price": (40000, 58000), "payment_days": (30, 90)},
        "adversarial": {
            "price": (80000, 120000),
            "payment_days": (30, 90),
            "support_hours": (80, 200),
        },
    }

    for step in range(15):
        terms = {}
        for issue, (lo, hi) in config[task_id].items():
            terms[issue] = rng.uniform(lo, hi)

        action = NegotiationAction(
            move_type="make_offer", terms=terms, message="Here is my offer."
        )
        obs = env.step(action)
        if obs.done:
            return obs.reward or 0.0

    # Force accept at end
    obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
    return obs.reward or 0.0


def run_good_agent(task_id, seed=42):
    """Simulate a smart agent with collaborative language and adaptive pricing"""
    env = ProcureRLEnvironment()
    obs = env.reset(seed=seed, task_id=task_id)

    # Get opponent's opening to adapt our target
    opening_price = obs.current_offer.get("price", 52000)
    # Get opponent's floor (never go below floor or opponent won't accept)
    floor = (
        env._opponent.price_floor
        if hasattr(env._opponent, "price_floor")
        else opening_price * 0.80
    )

    # Adaptive targets that stay above floor
    if task_id == "single_issue":
        # Target 20-25% below opening but MUST be above floor
        target_price = max(opening_price * 0.78, floor * 1.05)
        targets = {"price": target_price}
    elif task_id == "multi_issue":
        # Target 20% below opening, above floor
        target_price = max(opening_price * 0.80, floor * 1.05)
        targets = {"price": target_price, "payment_days": 45}
    else:  # adversarial
        # Target 20% below opening, above floor
        target_price = max(opening_price * 0.80, floor * 1.05)
        targets = {"price": target_price, "payment_days": 50, "support_hours": 160}

    for step in range(10):
        action = NegotiationAction(
            move_type="make_offer",
            terms=targets,
            message="I value our partnership and believe this offer reflects fair market value for both parties. I'm flexible and want to find a solution that works for us both.",
        )
        obs = env.step(action)
        if obs.done:
            return obs.reward or 0.0

    obs = env.step(NegotiationAction(move_type="accept", terms={}, message=""))
    return obs.reward or 0.0


print("=== Score Spread Calibration ===")
for task in ["single_issue", "multi_issue", "adversarial"]:
    random_scores = [run_random_agent(task, seed=i) for i in range(5)]
    good_scores = [run_good_agent(task, seed=i) for i in range(5)]

    random_avg = sum(random_scores) / len(random_scores)
    good_avg = sum(good_scores) / len(good_scores)
    spread = good_avg - random_avg

    print(f"\n{task}:")
    print(
        f"  Random agent:      {[round(s, 3) for s in random_scores]} avg={random_avg:.3f}"
    )
    print(
        f"  Strategic agent:   {[round(s, 3) for s in good_scores]} avg={good_avg:.3f}"
    )
    print(f"  Spread:            {spread:.3f}")

    if spread < 0.05:
        print(f"  ⚠️  WARNING: spread too small — environment may be trivial or broken")
    elif good_avg < 0.10:
        print(f"  ⚠️  WARNING: even good agent scores very low — too hard")
    else:
        print(f"  ✅ Score spread looks healthy")