File size: 9,908 Bytes
02f4a63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"""Tests for llm_agent helper functions."""

import pytest

from openra_env.agent import _bench_export_policy, _format_llm_api_error, _sanitize_messages
from openra_env.config import LLMConfig


class TestSanitizeMessages:
    """Tests for _sanitize_messages — merges consecutive same-role messages."""

    def test_empty(self):
        assert _sanitize_messages([]) == []

    def test_no_merge_needed(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "hi"},
            {"role": "assistant", "content": "hello"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 3
        assert [m["role"] for m in result] == ["system", "user", "assistant"]

    def test_consecutive_user_merged(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "first"},
            {"role": "user", "content": "second"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 2
        assert result[1]["role"] == "user"
        assert "first" in result[1]["content"]
        assert "second" in result[1]["content"]

    def test_three_consecutive_user_merged(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "a"},
            {"role": "user", "content": "b"},
            {"role": "user", "content": "c"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 2
        assert result[1]["content"] == "a\n\nb\n\nc"

    def test_does_not_mutate_original(self):
        msgs = [
            {"role": "user", "content": "first"},
            {"role": "user", "content": "second"},
        ]
        _sanitize_messages(msgs)
        # Original messages should be untouched
        assert msgs[0]["content"] == "first"
        assert msgs[1]["content"] == "second"
        assert len(msgs) == 2

    def test_mixed_roles_preserved(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "u1"},
            {"role": "assistant", "content": "a1"},
            {"role": "user", "content": "u2"},
            {"role": "user", "content": "u3"},
            {"role": "assistant", "content": "a2"},
        ]
        result = _sanitize_messages(msgs)
        assert [m["role"] for m in result] == ["system", "user", "assistant", "user", "assistant"]
        assert result[3]["content"] == "u2\n\nu3"

    def test_tool_then_user_gets_bridge_assistant(self):
        """Mistral requires tool → assistant → user, not tool → user."""
        msgs = [
            {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}]},
            {"role": "tool", "content": "result1", "tool_call_id": "1"},
            {"role": "user", "content": "briefing"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 4
        assert [m["role"] for m in result] == ["assistant", "tool", "assistant", "user"]
        assert result[2]["content"]  # bridge message is non-empty

    def test_tool_then_assistant_no_extra_bridge(self):
        """When tool → assistant already exists, no bridge is inserted."""
        msgs = [
            {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}]},
            {"role": "tool", "content": "result1", "tool_call_id": "1"},
            {"role": "assistant", "content": "Got the result."},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 3
        assert [m["role"] for m in result] == ["assistant", "tool", "assistant"]

    def test_real_world_scenario(self):
        """Simulates: nudge (user) → next turn briefing (user) → should merge."""
        msgs = [
            {"role": "system", "content": "You are playing Red Alert."},
            {"role": "user", "content": "STRATEGIC BRIEFING: ..."},
            {"role": "assistant", "content": "I will deploy the MCV."},
            {"role": "user", "content": "Continue playing. Use game tools."},
            {"role": "user", "content": "TURN BRIEFING: Funds 5000, ..."},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 4
        roles = [m["role"] for m in result]
        assert roles == ["system", "user", "assistant", "user"]
        assert "Continue playing" in result[3]["content"]
        assert "TURN BRIEFING" in result[3]["content"]

    def test_game_loop_tool_then_briefing(self):
        """Real scenario: tool results from turn N, then briefing user msg for turn N+1."""
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "initial briefing"},
            {"role": "assistant", "content": "", "tool_calls": [{"id": "c1"}]},
            {"role": "tool", "content": '{"ok": true}', "tool_call_id": "c1"},
            {"role": "user", "content": "TURN BRIEFING: tick 500"},
        ]
        result = _sanitize_messages(msgs)
        roles = [m["role"] for m in result]
        assert roles == ["system", "user", "assistant", "tool", "assistant", "user"]
        assert result[4]["role"] == "assistant"  # bridge
        assert result[5]["content"] == "TURN BRIEFING: tick 500"


class TestFormatLLMApiError:
    """Tests for provider error mapping helper."""

    def test_openrouter_tool_route_error_has_actionable_hint(self):
        cfg = LLMConfig(
            base_url="https://openrouter.ai/api/v1/chat/completions",
            model="liquid/lfm-2.5-1.2b-thinking:free",
        )
        msg = _format_llm_api_error(
            404,
            (
                '{"error":{"message":"No endpoints found that support tool use.'
                ' To learn more about provider routing","code":404}}'
            ),
            cfg,
        )
        assert "supports tool calling" in msg
        assert "OpenRA-RL requires tool-calling models" in msg
        assert "not ':free'" in msg

    def test_auth_error_message_preserved(self):
        cfg = LLMConfig(model="foo/bar")
        msg = _format_llm_api_error(401, "unauthorized", cfg)
        assert "Authentication failed (401)" in msg


class TestToolCallingPreflight:
    """Tests for startup preflight capability checks."""

    @pytest.mark.asyncio
    async def test_openrouter_unsupported_tools_is_blocked(self, monkeypatch):
        from openra_env import agent as agent_mod

        cfg = LLMConfig(
            base_url="https://openrouter.ai/api/v1/chat/completions",
            model="liquid/lfm-2.5-1.2b-thinking:free",
        )

        async def _fake_chat_completion(*args, **kwargs):
            raise RuntimeError("No endpoints found that support tool use.")

        monkeypatch.setattr(agent_mod, "chat_completion", _fake_chat_completion)
        ok, err = await agent_mod._preflight_tool_calling_support(cfg)
        assert ok is False
        assert "support tool use" in err.lower()

    @pytest.mark.asyncio
    async def test_non_openrouter_skips_preflight_call(self, monkeypatch):
        from openra_env import agent as agent_mod

        cfg = LLMConfig(
            base_url="http://localhost:11434/v1/chat/completions",
            model="qwen3:4b",
        )
        called = False

        async def _fake_chat_completion(*args, **kwargs):
            nonlocal called
            called = True
            return {}

        monkeypatch.setattr(agent_mod, "chat_completion", _fake_chat_completion)
        ok, err = await agent_mod._preflight_tool_calling_support(cfg)
        assert ok is True
        assert err == ""
        assert called is False


class TestBenchExportPolicy:
    """Tests for when bench export/upload is allowed."""

    def test_always_exports_locally_even_on_error(self):
        should_export, should_upload, reason = _bench_export_policy(encountered_agent_error=True)
        assert should_export is True
        assert should_upload is False
        assert "runtime [error]" in reason.lower()

    def test_allow_export_and_upload_when_no_runtime_error(self):
        should_export, should_upload, reason = _bench_export_policy(encountered_agent_error=False)
        assert should_export is True
        assert should_upload is True
        assert reason == ""


class TestRunAgentPreflightAbort:
    """Regression tests for tool-capability preflight abort path."""

    @pytest.mark.asyncio
    async def test_openrouter_tool_capability_failure_aborts_before_reset(self, monkeypatch, capsys):
        from types import SimpleNamespace
        from openra_env import agent as agent_mod

        cfg = SimpleNamespace(
            agent=SimpleNamespace(server_url="http://localhost:8000", max_turns=0, max_time_s=1800),
            llm=LLMConfig(
                base_url="https://openrouter.ai/api/v1/chat/completions",
                model="liquid/lfm-2.5-1.2b-thinking:free",
                request_timeout_s=120.0,
            ),
        )

        client_constructed = False

        class _FailIfConstructedClient:
            def __init__(self, *args, **kwargs):
                nonlocal client_constructed
                client_constructed = True
                raise AssertionError("OpenRAMCPClient should not be constructed on preflight failure")

        async def _fake_preflight(_llm_config):
            return False, "No endpoints found that support tool use."

        monkeypatch.setattr(agent_mod, "_preflight_tool_calling_support", _fake_preflight)
        monkeypatch.setattr(agent_mod, "OpenRAMCPClient", _FailIfConstructedClient)

        await agent_mod.run_agent(cfg, verbose=False)

        out = capsys.readouterr().out
        assert "Checking model route for tool-calling support..." in out
        assert "Aborting before game launch (no match started)." in out
        assert "Resetting environment (launching OpenRA)..." not in out
        assert client_constructed is False