| """Tests for llm_agent helper functions.""" |
|
|
| import pytest |
|
|
| from openra_env.agent import _bench_export_policy, _format_llm_api_error, _sanitize_messages |
| from openra_env.config import LLMConfig |
|
|
|
|
| class TestSanitizeMessages: |
| """Tests for _sanitize_messages — merges consecutive same-role messages.""" |
|
|
| def test_empty(self): |
| assert _sanitize_messages([]) == [] |
|
|
| def test_no_merge_needed(self): |
| msgs = [ |
| {"role": "system", "content": "sys"}, |
| {"role": "user", "content": "hi"}, |
| {"role": "assistant", "content": "hello"}, |
| ] |
| result = _sanitize_messages(msgs) |
| assert len(result) == 3 |
| assert [m["role"] for m in result] == ["system", "user", "assistant"] |
|
|
| def test_consecutive_user_merged(self): |
| msgs = [ |
| {"role": "system", "content": "sys"}, |
| {"role": "user", "content": "first"}, |
| {"role": "user", "content": "second"}, |
| ] |
| result = _sanitize_messages(msgs) |
| assert len(result) == 2 |
| assert result[1]["role"] == "user" |
| assert "first" in result[1]["content"] |
| assert "second" in result[1]["content"] |
|
|
| def test_three_consecutive_user_merged(self): |
| msgs = [ |
| {"role": "system", "content": "sys"}, |
| {"role": "user", "content": "a"}, |
| {"role": "user", "content": "b"}, |
| {"role": "user", "content": "c"}, |
| ] |
| result = _sanitize_messages(msgs) |
| assert len(result) == 2 |
| assert result[1]["content"] == "a\n\nb\n\nc" |
|
|
| def test_does_not_mutate_original(self): |
| msgs = [ |
| {"role": "user", "content": "first"}, |
| {"role": "user", "content": "second"}, |
| ] |
| _sanitize_messages(msgs) |
| |
| assert msgs[0]["content"] == "first" |
| assert msgs[1]["content"] == "second" |
| assert len(msgs) == 2 |
|
|
| def test_mixed_roles_preserved(self): |
| msgs = [ |
| {"role": "system", "content": "sys"}, |
| {"role": "user", "content": "u1"}, |
| {"role": "assistant", "content": "a1"}, |
| {"role": "user", "content": "u2"}, |
| {"role": "user", "content": "u3"}, |
| {"role": "assistant", "content": "a2"}, |
| ] |
| result = _sanitize_messages(msgs) |
| assert [m["role"] for m in result] == ["system", "user", "assistant", "user", "assistant"] |
| assert result[3]["content"] == "u2\n\nu3" |
|
|
| def test_tool_then_user_gets_bridge_assistant(self): |
| """Mistral requires tool → assistant → user, not tool → user.""" |
| msgs = [ |
| {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}]}, |
| {"role": "tool", "content": "result1", "tool_call_id": "1"}, |
| {"role": "user", "content": "briefing"}, |
| ] |
| result = _sanitize_messages(msgs) |
| assert len(result) == 4 |
| assert [m["role"] for m in result] == ["assistant", "tool", "assistant", "user"] |
| assert result[2]["content"] |
|
|
| def test_tool_then_assistant_no_extra_bridge(self): |
| """When tool → assistant already exists, no bridge is inserted.""" |
| msgs = [ |
| {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}]}, |
| {"role": "tool", "content": "result1", "tool_call_id": "1"}, |
| {"role": "assistant", "content": "Got the result."}, |
| ] |
| result = _sanitize_messages(msgs) |
| assert len(result) == 3 |
| assert [m["role"] for m in result] == ["assistant", "tool", "assistant"] |
|
|
| def test_real_world_scenario(self): |
| """Simulates: nudge (user) → next turn briefing (user) → should merge.""" |
| msgs = [ |
| {"role": "system", "content": "You are playing Red Alert."}, |
| {"role": "user", "content": "STRATEGIC BRIEFING: ..."}, |
| {"role": "assistant", "content": "I will deploy the MCV."}, |
| {"role": "user", "content": "Continue playing. Use game tools."}, |
| {"role": "user", "content": "TURN BRIEFING: Funds 5000, ..."}, |
| ] |
| result = _sanitize_messages(msgs) |
| assert len(result) == 4 |
| roles = [m["role"] for m in result] |
| assert roles == ["system", "user", "assistant", "user"] |
| assert "Continue playing" in result[3]["content"] |
| assert "TURN BRIEFING" in result[3]["content"] |
|
|
| def test_game_loop_tool_then_briefing(self): |
| """Real scenario: tool results from turn N, then briefing user msg for turn N+1.""" |
| msgs = [ |
| {"role": "system", "content": "sys"}, |
| {"role": "user", "content": "initial briefing"}, |
| {"role": "assistant", "content": "", "tool_calls": [{"id": "c1"}]}, |
| {"role": "tool", "content": '{"ok": true}', "tool_call_id": "c1"}, |
| {"role": "user", "content": "TURN BRIEFING: tick 500"}, |
| ] |
| result = _sanitize_messages(msgs) |
| roles = [m["role"] for m in result] |
| assert roles == ["system", "user", "assistant", "tool", "assistant", "user"] |
| assert result[4]["role"] == "assistant" |
| assert result[5]["content"] == "TURN BRIEFING: tick 500" |
|
|
|
|
| class TestFormatLLMApiError: |
| """Tests for provider error mapping helper.""" |
|
|
| def test_openrouter_tool_route_error_has_actionable_hint(self): |
| cfg = LLMConfig( |
| base_url="https://openrouter.ai/api/v1/chat/completions", |
| model="liquid/lfm-2.5-1.2b-thinking:free", |
| ) |
| msg = _format_llm_api_error( |
| 404, |
| ( |
| '{"error":{"message":"No endpoints found that support tool use.' |
| ' To learn more about provider routing","code":404}}' |
| ), |
| cfg, |
| ) |
| assert "supports tool calling" in msg |
| assert "OpenRA-RL requires tool-calling models" in msg |
| assert "not ':free'" in msg |
|
|
| def test_auth_error_message_preserved(self): |
| cfg = LLMConfig(model="foo/bar") |
| msg = _format_llm_api_error(401, "unauthorized", cfg) |
| assert "Authentication failed (401)" in msg |
|
|
|
|
| class TestToolCallingPreflight: |
| """Tests for startup preflight capability checks.""" |
|
|
| @pytest.mark.asyncio |
| async def test_openrouter_unsupported_tools_is_blocked(self, monkeypatch): |
| from openra_env import agent as agent_mod |
|
|
| cfg = LLMConfig( |
| base_url="https://openrouter.ai/api/v1/chat/completions", |
| model="liquid/lfm-2.5-1.2b-thinking:free", |
| ) |
|
|
| async def _fake_chat_completion(*args, **kwargs): |
| raise RuntimeError("No endpoints found that support tool use.") |
|
|
| monkeypatch.setattr(agent_mod, "chat_completion", _fake_chat_completion) |
| ok, err = await agent_mod._preflight_tool_calling_support(cfg) |
| assert ok is False |
| assert "support tool use" in err.lower() |
|
|
| @pytest.mark.asyncio |
| async def test_non_openrouter_skips_preflight_call(self, monkeypatch): |
| from openra_env import agent as agent_mod |
|
|
| cfg = LLMConfig( |
| base_url="http://localhost:11434/v1/chat/completions", |
| model="qwen3:4b", |
| ) |
| called = False |
|
|
| async def _fake_chat_completion(*args, **kwargs): |
| nonlocal called |
| called = True |
| return {} |
|
|
| monkeypatch.setattr(agent_mod, "chat_completion", _fake_chat_completion) |
| ok, err = await agent_mod._preflight_tool_calling_support(cfg) |
| assert ok is True |
| assert err == "" |
| assert called is False |
|
|
|
|
| class TestBenchExportPolicy: |
| """Tests for when bench export/upload is allowed.""" |
|
|
| def test_always_exports_locally_even_on_error(self): |
| should_export, should_upload, reason = _bench_export_policy(encountered_agent_error=True) |
| assert should_export is True |
| assert should_upload is False |
| assert "runtime [error]" in reason.lower() |
|
|
| def test_allow_export_and_upload_when_no_runtime_error(self): |
| should_export, should_upload, reason = _bench_export_policy(encountered_agent_error=False) |
| assert should_export is True |
| assert should_upload is True |
| assert reason == "" |
|
|
|
|
| class TestRunAgentPreflightAbort: |
| """Regression tests for tool-capability preflight abort path.""" |
|
|
| @pytest.mark.asyncio |
| async def test_openrouter_tool_capability_failure_aborts_before_reset(self, monkeypatch, capsys): |
| from types import SimpleNamespace |
| from openra_env import agent as agent_mod |
|
|
| cfg = SimpleNamespace( |
| agent=SimpleNamespace(server_url="http://localhost:8000", max_turns=0, max_time_s=1800), |
| llm=LLMConfig( |
| base_url="https://openrouter.ai/api/v1/chat/completions", |
| model="liquid/lfm-2.5-1.2b-thinking:free", |
| request_timeout_s=120.0, |
| ), |
| ) |
|
|
| client_constructed = False |
|
|
| class _FailIfConstructedClient: |
| def __init__(self, *args, **kwargs): |
| nonlocal client_constructed |
| client_constructed = True |
| raise AssertionError("OpenRAMCPClient should not be constructed on preflight failure") |
|
|
| async def _fake_preflight(_llm_config): |
| return False, "No endpoints found that support tool use." |
|
|
| monkeypatch.setattr(agent_mod, "_preflight_tool_calling_support", _fake_preflight) |
| monkeypatch.setattr(agent_mod, "OpenRAMCPClient", _FailIfConstructedClient) |
|
|
| await agent_mod.run_agent(cfg, verbose=False) |
|
|
| out = capsys.readouterr().out |
| assert "Checking model route for tool-calling support..." in out |
| assert "Aborting before game launch (no match started)." in out |
| assert "Resetting environment (launching OpenRA)..." not in out |
| assert client_constructed is False |
|
|