import random
import unittest

from dispatch_arena.models import Config, Mode, OrderStatus, VerifierVerdict
from dispatch_arena.server.env import DispatchArenaEnvironment


def heuristic_action(obs):
    state = obs.state
    if state.mode == Mode.NORMAL:
        courier = next((c for c in state.couriers if c.status == "idle" and c.load is None), None)
        order = next(
            (
                o
                for o in state.orders
                if o.status in {"queued", "ready"} and o.assigned_courier_id is None
            ),
            None,
        )
        if courier and order:
            return {"action_type": "assign", "courier_id": courier.id, "order_id": order.id}
        return {"action_type": "hold"}
    for action in ["pickup", "dropoff", "go_pickup", "go_dropoff", "wait"]:
        if action in obs.legal_actions:
            return action
    return obs.legal_actions[0]


def play_episode(env: DispatchArenaEnvironment, seed: int = 1):
    obs = env.reset(seed=seed)
    while not obs.done:
        obs = env.step(heuristic_action(obs))
    return obs


class DispatchArenaEnvironmentTests(unittest.TestCase):
    def test_mini_golden_successful_trajectory(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=12))
        obs = play_episode(env, seed=1)

        self.assertTrue(obs.done)
        self.assertFalse(obs.truncated)
        self.assertEqual(obs.verifier_status, VerifierVerdict.DELIVERED_SUCCESSFULLY)
        self.assertEqual(obs.state.orders[0].status, OrderStatus.DELIVERED)
        self.assertGreater(env.get_episode_summary()["total_reward"], 0.0)

    def test_mini_reward_components_are_returned(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=12))
        obs = env.reset(seed=1)
        self.assertIn("go_pickup", obs.legal_actions)

        obs = env.step("go_pickup")

        self.assertEqual(obs.reward_breakdown.step_cost, -0.1)
        self.assertGreater(obs.reward_breakdown.progress_reward, 0.0)
        self.assertEqual(obs.reward, obs.reward_breakdown.total_reward)

    def test_invalid_action_penalty_without_mutation(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=12))
        env.reset(seed=1)
        before = env.state

        obs = env.step("dropoff")
        after = env.state

        self.assertTrue(obs.info["invalid_action"])
        self.assertLess(obs.reward_breakdown.invalid_penalty, 0.0)
        self.assertEqual(after.couriers[0].node_id, before.couriers[0].node_id)
        self.assertEqual(after.orders[0].status, before.orders[0].status)
        self.assertEqual(after.tick, before.tick + 1)

    def test_timeout_trajectory_is_negative(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=1))
        env.reset(seed=1)

        obs = env.step("wait")

        self.assertTrue(obs.done)
        self.assertTrue(obs.truncated)
        self.assertEqual(obs.verifier_status, VerifierVerdict.TIMEOUT_FAILURE)
        self.assertLess(obs.reward_breakdown.timeout_penalty, 0.0)

    def test_hidden_prep_remaining_never_appears_publicly(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=12, visible_prep=False))
        obs = env.reset(seed=3)
        env.step("wait")

        public_blob = " ".join([str(obs.to_dict()), str(env.state.to_dict()), obs.summary_text, str(env.get_episode_summary())])

        self.assertNotIn("prep_remaining", public_blob)

    def test_visible_mode_can_expose_ready_now_and_prep(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=12, visible_prep=True))
        obs = env.reset(seed=3)

        data = obs.to_dict()

        self.assertIn("prep_remaining", str(data))
        self.assertIn("ready_now", str(data))

    def test_action_mask_matches_legal_action_list(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=12))
        obs = env.reset(seed=4)

        for action, mask_value in zip(["wait", "go_pickup", "go_dropoff", "pickup", "dropoff"], obs.action_mask):
            self.assertEqual(mask_value, 1 if action in obs.legal_actions else 0)

    def test_seeded_reset_is_reproducible(self):
        config = Config(mode=Mode.MINI, max_ticks=12)
        env1 = DispatchArenaEnvironment(config)
        env2 = DispatchArenaEnvironment(config)
        obs1 = env1.reset(seed=42)
        obs2 = env2.reset(seed=42)

        self.assertEqual(obs1.to_dict(), obs2.to_dict())

        actions = ["go_pickup", "wait", "pickup"]
        trace1 = [env1.step(action).to_dict() for action in actions]
        trace2 = [env2.step(action).to_dict() for action in actions]
        self.assertEqual(trace1, trace2)

    def test_random_rollout_never_breaks_invariants(self):
        env = DispatchArenaEnvironment(Config(mode=Mode.MINI, max_ticks=12))
        obs = env.reset(seed=9)
        rng = random.Random(9)

        while not obs.done:
            action = rng.choice(["wait", "go_pickup", "go_dropoff", "pickup", "dropoff"])
            obs = env.step(action)
            delivered = obs.state.orders[0].status == OrderStatus.DELIVERED
            self.assertFalse(delivered and obs.state.couriers[0].load)
            self.assertLessEqual(obs.state.tick, obs.state.max_ticks)

    def test_normal_heuristic_rollout_delivers_some_orders(self):
        config = Config(mode=Mode.NORMAL, max_ticks=18, num_couriers=3, num_orders=5)
        env = DispatchArenaEnvironment(config)
        obs = play_episode(env, seed=5)

        self.assertTrue(obs.done)
        self.assertGreaterEqual(env.get_episode_summary()["delivered_orders"], 1)
        for order in obs.state.orders:
            self.assertFalse(order.status == OrderStatus.DELIVERED and order.assigned_courier_id is None)

    def test_normal_invalid_duplicate_assignment_is_penalized(self):
        config = Config(mode=Mode.NORMAL, max_ticks=18, num_couriers=2, num_orders=3)
        env = DispatchArenaEnvironment(config)
        env.reset(seed=5)
        env.step({"action_type": "assign", "courier_id": "courier_0", "order_id": "order_0"})

        obs = env.step({"action_type": "assign", "courier_id": "courier_1", "order_id": "order_0"})

        self.assertTrue(obs.info["invalid_action"])
        self.assertLess(obs.reward_breakdown.invalid_penalty, 0.0)

    def test_rolling_arrivals_appear_over_time(self):
        config = Config(
            mode=Mode.NORMAL,
            max_ticks=20,
            num_couriers=3,
            num_orders=5,
            scenario_bucket="easy",
            rolling_arrivals=True,
        )
        env = DispatchArenaEnvironment(config)
        obs = env.reset(seed=11)

        initial_visible = len(obs.state.orders)
        # With rolling enabled and num_orders=5, fewer than all orders should
        # be visible at t=0 — the rest are scheduled to arrive later.
        self.assertLess(initial_visible, 5)

        # Run a heuristic loop; by the end, every order should have arrived
        # (i.e., num_orders worth of orders should appear in state.orders).
        while not obs.done:
            obs = env.step(heuristic_action(obs))
        self.assertEqual(len(obs.state.orders), 5)

    def test_pending_arrivals_never_leak_in_public_state(self):
        config = Config(
            mode=Mode.NORMAL,
            max_ticks=20,
            num_couriers=3,
            num_orders=6,
            rolling_arrivals=True,
        )
        env = DispatchArenaEnvironment(config)
        obs = env.reset(seed=13)

        # The env has pending arrivals scheduled for future ticks. The visible
        # state should expose strictly fewer orders than the scenario total.
        self.assertGreater(len(env._pending_arrivals), 0)
        visible_ids = {order.id for order in obs.state.orders}
        pending_ids = {order.id for order in env._pending_arrivals}
        self.assertEqual(visible_ids & pending_ids, set())

        # And no pending order id should appear in any public serialization.
        public_blob = str(obs.to_dict()) + str(env.state.to_dict()) + obs.summary_text
        for pending_id in pending_ids:
            self.assertNotIn(pending_id, public_blob)

    def test_traffic_noise_is_deterministic_and_extends_eta(self):
        config_no_traffic = Config(
            mode=Mode.NORMAL,
            max_ticks=20,
            num_couriers=2,
            num_orders=3,
            traffic_noise=0.0,
        )
        config_with_traffic = Config(
            mode=Mode.NORMAL,
            max_ticks=20,
            num_couriers=2,
            num_orders=3,
            traffic_noise=1.0,
        )

        env_a = DispatchArenaEnvironment(config_with_traffic)
        env_b = DispatchArenaEnvironment(config_with_traffic)
        env_a.reset(seed=21)
        env_b.reset(seed=21)
        # Same seed → identical traffic multipliers.
        self.assertEqual(env_a._traffic_multipliers, env_b._traffic_multipliers)
        # Multipliers must be >= 1.0 (uniform 1.0 .. 1.0+noise).
        self.assertTrue(all(m >= 1.0 for m in env_a._traffic_multipliers.values()))

        env_clean = DispatchArenaEnvironment(config_no_traffic)
        env_clean.reset(seed=21)
        self.assertEqual(env_clean._traffic_multipliers, {})

    def test_traffic_multipliers_never_appear_in_observation(self):
        config = Config(
            mode=Mode.NORMAL,
            max_ticks=20,
            num_couriers=2,
            num_orders=3,
            traffic_noise=0.8,
        )
        env = DispatchArenaEnvironment(config)
        obs = env.reset(seed=33)

        public_blob = str(obs.to_dict()) + str(env.state.to_dict())
        self.assertNotIn("traffic_multiplier", public_blob)
        self.assertNotIn("_traffic", public_blob)


if __name__ == "__main__":
    unittest.main()