Spaces:
Sleeping
Sleeping
File size: 6,769 Bytes
13b4881 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | import math
from carrom_env.env import CarromEnv
from carrom_env.models import Action, CoinInfo
from carrom_env.constants import MAX_COINS
from carrom_env.green_agent import GreenCarromAgent, Task
def test_reset_shapes():
env = CarromEnv(seed=42)
obs = env.reset()
assert len(obs.positions) == 1 + MAX_COINS
assert len(obs.velocities) == 1 + MAX_COINS
assert len(obs.pocketed) == 1 + MAX_COINS
def test_step_determinism():
env = CarromEnv(seed=123)
env.reset()
action = Action(placement_x=0.0, angle=0.0, force=0.5, spin=0.0)
obs1, r1, t1, tr1, _ = env.step(action)
env2 = CarromEnv(seed=123)
env2.reset()
obs2, r2, t2, tr2, _ = env2.step(action)
assert r1 == r2
assert t1 == t2
assert tr1 == tr2
assert obs1.remaining_coins == obs2.remaining_coins
def test_invalid_action_penalty():
env = CarromEnv(seed=0)
env.reset()
action = Action(placement_x=0.0, angle=0.0, force=-1.0, spin=0.0)
obs, reward, _, _, _ = env.step(action)
assert math.isfinite(reward)
assert obs.remaining_coins >= 0
def test_opponent_turn():
env = CarromEnv(seed=1)
env.reset()
action = Action(placement_x=0.0, angle=0.0, force=0.1, spin=0.0)
obs, _, _, _, info = env.step(action)
assert info["current_player"] in {"agent", "opponent"}
assert obs.current_player in {"agent", "opponent"}
def test_pot_detection():
env = CarromEnv(seed=2)
env.reset()
# Move a coin into the corner pocket and take a zero-force shot.
pocket = env._pocket_centers()[0]
coin = next(c for c in env.coins if not c.pocketed)
coin.body.position = pocket
action = Action(placement_x=0.0, angle=0.0, force=0.0, spin=0.0)
obs, _, _, _, _ = env.step(action)
assert obs.remaining_coins <= MAX_COINS - 1
# --- New tests for v0.2.0 features ---
def test_text_action_aim_at_coin():
"""Text actions should parse and produce valid results."""
env = CarromEnv(seed=10)
env.reset()
action = Action(action_type="text", text="aim at queen_0 with strong force from center")
obs, reward, terminated, truncated, info = env.step(action)
assert math.isfinite(reward)
assert obs.remaining_coins >= 0
def test_text_action_key_value():
"""Key-value text actions should parse correctly."""
env = CarromEnv(seed=11)
env.reset()
action = Action(action_type="text", text="placement_x=0.1 angle=0.3 force=0.7")
obs, reward, terminated, truncated, info = env.step(action)
assert math.isfinite(reward)
def test_text_action_pocket_target():
"""Aiming at a pocket should work."""
env = CarromEnv(seed=12)
env.reset()
action = Action(action_type="text", text="shoot toward top-left pocket with medium force")
obs, reward, terminated, truncated, info = env.step(action)
assert math.isfinite(reward)
def test_observation_has_coin_info():
"""Observation should include per-coin details."""
env = CarromEnv(seed=42)
obs = env.reset()
assert len(obs.coins) > 0
assert isinstance(obs.coins[0], CoinInfo)
assert obs.coins[0].nearest_pocket in {
"bottom-left", "bottom-right", "top-left", "top-right"
}
assert obs.coins[0].pocket_distance > 0
def test_observation_text_summary_rich():
"""Text summary should contain board details for LLM readability."""
env = CarromEnv(seed=42)
obs = env.reset()
assert "Carrom Board State" in obs.text_summary
assert "Active coins:" in obs.text_summary
assert "queen_0" in obs.text_summary
assert "nearest pocket:" in obs.text_summary
def test_observation_turn_info():
"""Observation should include turn/max_turns."""
env = CarromEnv(seed=42, max_turns=100)
obs = env.reset()
assert obs.turn_number == 0
assert obs.max_turns == 100
def test_reward_win_bonus():
"""Agent should get a win bonus when clearing the board with a lead."""
env = CarromEnv(seed=0)
env.reset()
# Pocket all coins manually
for coin in env.coins:
coin.pocketed = True
if coin.body in env.space.bodies:
env.space.remove(coin.body, coin.shape)
# Artificially unpocket one coin to pot it via action
last = env.coins[-1]
last.pocketed = False
pocket = env._pocket_centers()[0]
last.body.position = pocket
env.space.add(last.body, last.shape)
env.agent_score = 10
env.opponent_score = 2
action = Action(placement_x=0.0, angle=0.0, force=0.0)
obs, reward, terminated, truncated, info = env.step(action)
# Should include win bonus
assert reward > 1.0 or terminated
def test_green_agent_default_suite():
"""Green agent (evaluator) should have a tiered default task suite."""
evaluator = GreenCarromAgent()
assert len(evaluator.tasks) >= 3
tiers = {t.tier for t in evaluator.tasks}
assert {"easy", "standard", "hard"}.issubset(tiers)
def test_green_agent_evaluates_purple():
"""Green agent should score a purple-agent (policy fn) across its task suite."""
def purple_agent(obs):
return Action(placement_x=0.0, angle=0.0, force=0.5)
# Compact suite for fast tests
tasks = [
Task(task_id="t_easy", seed=0, max_turns=5, tier="easy"),
Task(task_id="t_standard", seed=1, max_turns=5, tier="standard"),
]
evaluator = GreenCarromAgent(tasks=tasks)
report = evaluator.evaluate(purple_agent)
summary = report.summary()
assert summary["n_tasks"] == 2
assert "avg_reward" in summary
assert "win_rate" in summary
assert "icf_compliance" in summary
assert "efficiency_score" in summary
assert summary["total_sim_steps"] > 0
def test_green_agent_single_task():
"""evaluate_task should return a full TaskResult with ICF compliance."""
def purple(obs):
return Action(placement_x=0.0, angle=0.0, force=0.3)
task = Task(task_id="unit", seed=7, max_turns=3, tier="easy")
evaluator = GreenCarromAgent(tasks=[task])
result = evaluator.evaluate_task(purple, task)
assert result.task_id == "unit"
assert result.tier == "easy"
assert 0.0 <= result.icf_compliance <= 1.0
assert result.total_sim_steps > 0
def test_green_agent_by_tier():
"""by_tier() should group results by difficulty tier."""
def purple(obs):
return Action(placement_x=0.0, angle=0.0, force=0.5)
tasks = [
Task(task_id="e1", seed=0, max_turns=3, tier="easy"),
Task(task_id="s1", seed=1, max_turns=3, tier="standard"),
]
report = GreenCarromAgent(tasks=tasks).evaluate(purple)
by_tier = report.by_tier()
assert "easy" in by_tier
assert "standard" in by_tier
assert by_tier["easy"]["n"] == 1
assert by_tier["standard"]["n"] == 1
|