Spaces:
Running
Running
File size: 3,275 Bytes
136ea72 aad7819 136ea72 b3b9bbd 136ea72 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | from __future__ import annotations
import unittest
from environment import SentinelEnv
class EnvironmentTests(unittest.TestCase):
def test_reset_observation_has_integer_subtask_index(self) -> None:
env = SentinelEnv()
result = env.reset(task_type="task3", seed=3)
self.assertEqual(result["observation"]["subtask_index"], 0)
self.assertEqual(result["info"]["score"], 0.0)
self.assertFalse(result["done"])
def test_accurate_slow_public_slot_costs_two_steps(self) -> None:
env = SentinelEnv()
result = env.reset(task_type="task1", seed=11)
slow_slot = next(
public_id
for public_id, internal_id in env._pool.internal_profile().items()
if internal_id == "S0"
)
result = env.step({
"session_id": result["observation"]["session_id"],
"task_type": "task1",
"action_type": "delegate",
"specialist_id": slow_slot,
})
self.assertEqual(result["info"]["step_count"], 2)
def test_verify_accurate_slow_costs_specialist_plus_verify_step(self) -> None:
env = SentinelEnv()
result = env.reset(task_type="task1", seed=11)
slow_slot = next(
public_id
for public_id, internal_id in env._pool.internal_profile().items()
if internal_id == "S0"
)
result = env.step({
"session_id": result["observation"]["session_id"],
"task_type": "task1",
"action_type": "verify",
"specialist_id": slow_slot,
})
self.assertEqual(result["info"]["step_count"], 3)
def test_self_solve_finishes_long_task_with_normalized_score(self) -> None:
env = SentinelEnv()
result = env.reset(task_type="task3", seed=5)
while not result["done"]:
obs = result["observation"]
result = env.step({
"session_id": obs["session_id"],
"task_type": obs["task_type"],
"action_type": "solve_independently",
"subtask_response": "SELF_SOLVED",
})
self.assertEqual(result["info"]["completion_rate"], 1.0)
self.assertGreater(result["info"]["step_count"], 2)
self.assertGreaterEqual(result["info"]["score"], 0.0)
self.assertLessEqual(result["info"]["score"], 1.0)
self.assertIn("reward_report", result["info"])
self.assertGreater(result["info"]["reward_report"]["reward_events"], 0)
def test_reward_report_tracks_process_components(self) -> None:
env = SentinelEnv()
result = env.reset(task_type="task3", seed=42)
obs = result["observation"]
result = env.step({
"session_id": obs["session_id"],
"task_type": "task3",
"action_type": "delegate",
"specialist_id": "S0",
})
report = env.reward_report()
self.assertEqual(report["reward_events"], 1)
self.assertIn("confidence_alignment", report["component_averages"])
self.assertIn("domain_routing", report["component_averages"])
self.assertEqual(report["events"][0]["action_type"], "delegate")
if __name__ == "__main__":
unittest.main()
|