File size: 3,275 Bytes
136ea72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aad7819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136ea72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3b9bbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136ea72
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from __future__ import annotations

import unittest

from environment import SentinelEnv


class EnvironmentTests(unittest.TestCase):
    def test_reset_observation_has_integer_subtask_index(self) -> None:
        env = SentinelEnv()
        result = env.reset(task_type="task3", seed=3)

        self.assertEqual(result["observation"]["subtask_index"], 0)
        self.assertEqual(result["info"]["score"], 0.0)
        self.assertFalse(result["done"])

    def test_accurate_slow_public_slot_costs_two_steps(self) -> None:
        env = SentinelEnv()
        result = env.reset(task_type="task1", seed=11)
        slow_slot = next(
            public_id
            for public_id, internal_id in env._pool.internal_profile().items()
            if internal_id == "S0"
        )

        result = env.step({
            "session_id": result["observation"]["session_id"],
            "task_type": "task1",
            "action_type": "delegate",
            "specialist_id": slow_slot,
        })

        self.assertEqual(result["info"]["step_count"], 2)

    def test_verify_accurate_slow_costs_specialist_plus_verify_step(self) -> None:
        env = SentinelEnv()
        result = env.reset(task_type="task1", seed=11)
        slow_slot = next(
            public_id
            for public_id, internal_id in env._pool.internal_profile().items()
            if internal_id == "S0"
        )

        result = env.step({
            "session_id": result["observation"]["session_id"],
            "task_type": "task1",
            "action_type": "verify",
            "specialist_id": slow_slot,
        })

        self.assertEqual(result["info"]["step_count"], 3)

    def test_self_solve_finishes_long_task_with_normalized_score(self) -> None:
        env = SentinelEnv()
        result = env.reset(task_type="task3", seed=5)

        while not result["done"]:
            obs = result["observation"]
            result = env.step({
                "session_id": obs["session_id"],
                "task_type": obs["task_type"],
                "action_type": "solve_independently",
                "subtask_response": "SELF_SOLVED",
            })

        self.assertEqual(result["info"]["completion_rate"], 1.0)
        self.assertGreater(result["info"]["step_count"], 2)
        self.assertGreaterEqual(result["info"]["score"], 0.0)
        self.assertLessEqual(result["info"]["score"], 1.0)
        self.assertIn("reward_report", result["info"])
        self.assertGreater(result["info"]["reward_report"]["reward_events"], 0)

    def test_reward_report_tracks_process_components(self) -> None:
        env = SentinelEnv()
        result = env.reset(task_type="task3", seed=42)
        obs = result["observation"]

        result = env.step({
            "session_id": obs["session_id"],
            "task_type": "task3",
            "action_type": "delegate",
            "specialist_id": "S0",
        })

        report = env.reward_report()

        self.assertEqual(report["reward_events"], 1)
        self.assertIn("confidence_alignment", report["component_averages"])
        self.assertIn("domain_routing", report["component_averages"])
        self.assertEqual(report["events"][0]["action_type"], "delegate")


if __name__ == "__main__":
    unittest.main()