File size: 3,192 Bytes
9bbba46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""Tests for per-episode randomization — seed determinism, fee/budget jitter."""

import sys
import os

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from server.permit_env_environment import PermitEnvironment, TASKS


def test_same_seed_same_result():
    """Two resets with the same seed should produce identical observations."""
    env = PermitEnvironment()

    obs_a = env.reset(seed=42, task_name="medium_cafe")
    obs_b = env.reset(seed=42, task_name="medium_cafe")

    assert obs_a.budget_remaining == obs_b.budget_remaining
    assert list(obs_a.permits.keys()) == list(obs_b.permits.keys())
    for pid in obs_a.permits:
        assert obs_a.permits[pid]["fee"] == obs_b.permits[pid]["fee"]


def test_different_seed_different_fees():
    """Two resets with different seeds should produce different fees."""
    env = PermitEnvironment()

    obs_a = env.reset(seed=1, task_name="easy_foodtruck")
    obs_b = env.reset(seed=2, task_name="easy_foodtruck")

    fees_a = {pid: p["fee"] for pid, p in obs_a.permits.items()}
    fees_b = {pid: p["fee"] for pid, p in obs_b.permits.items()}

    # At least one fee should differ (probability of all equal ≈ 0)
    assert fees_a != fees_b, "Fees should differ between seeds"


def test_different_seed_different_budget():
    """Budget should be jittered between seeds."""
    env = PermitEnvironment()

    obs_a = env.reset(seed=10, task_name="hard_restaurant")
    obs_b = env.reset(seed=20, task_name="hard_restaurant")

    assert obs_a.budget_remaining != obs_b.budget_remaining


def test_permit_order_shuffled():
    """Permit iteration order should vary between seeds."""
    env = PermitEnvironment()

    orders = []
    for seed in range(10):
        obs = env.reset(seed=seed, task_name="hard_restaurant")
        orders.append(list(obs.permits.keys()))

    # At least 2 of 10 orders should be different
    unique_orders = set(tuple(o) for o in orders)
    assert len(unique_orders) >= 2, "Permit order should vary across seeds"


def test_fee_jitter_within_bounds():
    """Fees should be within +/-20% of the base fee."""
    env = PermitEnvironment()

    base_fees = {
        pid: cfg["fee"]
        for pid, cfg in TASKS["easy_foodtruck"]["permits"].items()
    }

    for seed in range(20):
        obs = env.reset(seed=seed, task_name="easy_foodtruck")
        for pid, p in obs.permits.items():
            base = base_fees[pid]
            low = base * 0.80 - 0.01  # tiny epsilon for float rounding
            high = base * 1.20 + 0.01
            assert low <= p["fee"] <= high, (
                f"seed={seed} {pid} fee={p['fee']} outside [{low:.2f}, {high:.2f}]"
            )


def test_budget_jitter_within_bounds():
    """Budget should be within +/-10% of the base budget."""
    base_budget = TASKS["medium_cafe"]["budget"]

    env = PermitEnvironment()
    for seed in range(20):
        obs = env.reset(seed=seed, task_name="medium_cafe")
        low = base_budget * 0.90 - 0.01
        high = base_budget * 1.10 + 0.01
        assert low <= obs.budget_remaining <= high, (
            f"seed={seed} budget={obs.budget_remaining} outside [{low:.2f}, {high:.2f}]"
        )