Spaces:

qpluslab
/

openra-rl

Paused

File size: 9,908 Bytes

02f4a63

"""Tests for llm_agent helper functions."""

import pytest

from openra_env.agent import _bench_export_policy, _format_llm_api_error, _sanitize_messages
from openra_env.config import LLMConfig


class TestSanitizeMessages:
    """Tests for _sanitize_messages — merges consecutive same-role messages."""

    def test_empty(self):
        assert _sanitize_messages([]) == []

    def test_no_merge_needed(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "hi"},
            {"role": "assistant", "content": "hello"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 3
        assert [m["role"] for m in result] == ["system", "user", "assistant"]

    def test_consecutive_user_merged(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "first"},
            {"role": "user", "content": "second"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 2
        assert result[1]["role"] == "user"
        assert "first" in result[1]["content"]
        assert "second" in result[1]["content"]

    def test_three_consecutive_user_merged(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "a"},
            {"role": "user", "content": "b"},
            {"role": "user", "content": "c"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 2
        assert result[1]["content"] == "a\n\nb\n\nc"

    def test_does_not_mutate_original(self):
        msgs = [
            {"role": "user", "content": "first"},
            {"role": "user", "content": "second"},
        ]
        _sanitize_messages(msgs)
        # Original messages should be untouched
        assert msgs[0]["content"] == "first"
        assert msgs[1]["content"] == "second"
        assert len(msgs) == 2

    def test_mixed_roles_preserved(self):
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "u1"},
            {"role": "assistant", "content": "a1"},
            {"role": "user", "content": "u2"},
            {"role": "user", "content": "u3"},
            {"role": "assistant", "content": "a2"},
        ]
        result = _sanitize_messages(msgs)
        assert [m["role"] for m in result] == ["system", "user", "assistant", "user", "assistant"]
        assert result[3]["content"] == "u2\n\nu3"

    def test_tool_then_user_gets_bridge_assistant(self):
        """Mistral requires tool → assistant → user, not tool → user."""
        msgs = [
            {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}]},
            {"role": "tool", "content": "result1", "tool_call_id": "1"},
            {"role": "user", "content": "briefing"},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 4
        assert [m["role"] for m in result] == ["assistant", "tool", "assistant", "user"]
        assert result[2]["content"]  # bridge message is non-empty

    def test_tool_then_assistant_no_extra_bridge(self):
        """When tool → assistant already exists, no bridge is inserted."""
        msgs = [
            {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}]},
            {"role": "tool", "content": "result1", "tool_call_id": "1"},
            {"role": "assistant", "content": "Got the result."},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 3
        assert [m["role"] for m in result] == ["assistant", "tool", "assistant"]

    def test_real_world_scenario(self):
        """Simulates: nudge (user) → next turn briefing (user) → should merge."""
        msgs = [
            {"role": "system", "content": "You are playing Red Alert."},
            {"role": "user", "content": "STRATEGIC BRIEFING: ..."},
            {"role": "assistant", "content": "I will deploy the MCV."},
            {"role": "user", "content": "Continue playing. Use game tools."},
            {"role": "user", "content": "TURN BRIEFING: Funds 5000, ..."},
        ]
        result = _sanitize_messages(msgs)
        assert len(result) == 4
        roles = [m["role"] for m in result]
        assert roles == ["system", "user", "assistant", "user"]
        assert "Continue playing" in result[3]["content"]
        assert "TURN BRIEFING" in result[3]["content"]

    def test_game_loop_tool_then_briefing(self):
        """Real scenario: tool results from turn N, then briefing user msg for turn N+1."""
        msgs = [
            {"role": "system", "content": "sys"},
            {"role": "user", "content": "initial briefing"},
            {"role": "assistant", "content": "", "tool_calls": [{"id": "c1"}]},
            {"role": "tool", "content": '{"ok": true}', "tool_call_id": "c1"},
            {"role": "user", "content": "TURN BRIEFING: tick 500"},
        ]
        result = _sanitize_messages(msgs)
        roles = [m["role"] for m in result]
        assert roles == ["system", "user", "assistant", "tool", "assistant", "user"]
        assert result[4]["role"] == "assistant"  # bridge
        assert result[5]["content"] == "TURN BRIEFING: tick 500"


class TestFormatLLMApiError:
    """Tests for provider error mapping helper."""

    def test_openrouter_tool_route_error_has_actionable_hint(self):
        cfg = LLMConfig(
            base_url="https://openrouter.ai/api/v1/chat/completions",
            model="liquid/lfm-2.5-1.2b-thinking:free",
        )
        msg = _format_llm_api_error(
            404,
            (
                '{"error":{"message":"No endpoints found that support tool use.'
                ' To learn more about provider routing","code":404}}'
            ),
            cfg,
        )
        assert "supports tool calling" in msg
        assert "OpenRA-RL requires tool-calling models" in msg
        assert "not ':free'" in msg

    def test_auth_error_message_preserved(self):
        cfg = LLMConfig(model="foo/bar")
        msg = _format_llm_api_error(401, "unauthorized", cfg)
        assert "Authentication failed (401)" in msg


class TestToolCallingPreflight:
    """Tests for startup preflight capability checks."""

    @pytest.mark.asyncio
    async def test_openrouter_unsupported_tools_is_blocked(self, monkeypatch):
        from openra_env import agent as agent_mod

        cfg = LLMConfig(
            base_url="https://openrouter.ai/api/v1/chat/completions",
            model="liquid/lfm-2.5-1.2b-thinking:free",
        )

        async def _fake_chat_completion(*args, **kwargs):
            raise RuntimeError("No endpoints found that support tool use.")

        monkeypatch.setattr(agent_mod, "chat_completion", _fake_chat_completion)
        ok, err = await agent_mod._preflight_tool_calling_support(cfg)
        assert ok is False
        assert "support tool use" in err.lower()

    @pytest.mark.asyncio
    async def test_non_openrouter_skips_preflight_call(self, monkeypatch):
        from openra_env import agent as agent_mod

        cfg = LLMConfig(
            base_url="http://localhost:11434/v1/chat/completions",
            model="qwen3:4b",
        )
        called = False

        async def _fake_chat_completion(*args, **kwargs):
            nonlocal called
            called = True
            return {}

        monkeypatch.setattr(agent_mod, "chat_completion", _fake_chat_completion)
        ok, err = await agent_mod._preflight_tool_calling_support(cfg)
        assert ok is True
        assert err == ""
        assert called is False


class TestBenchExportPolicy:
    """Tests for when bench export/upload is allowed."""

    def test_always_exports_locally_even_on_error(self):
        should_export, should_upload, reason = _bench_export_policy(encountered_agent_error=True)
        assert should_export is True
        assert should_upload is False
        assert "runtime [error]" in reason.lower()

    def test_allow_export_and_upload_when_no_runtime_error(self):
        should_export, should_upload, reason = _bench_export_policy(encountered_agent_error=False)
        assert should_export is True
        assert should_upload is True
        assert reason == ""


class TestRunAgentPreflightAbort:
    """Regression tests for tool-capability preflight abort path."""

    @pytest.mark.asyncio
    async def test_openrouter_tool_capability_failure_aborts_before_reset(self, monkeypatch, capsys):
        from types import SimpleNamespace
        from openra_env import agent as agent_mod

        cfg = SimpleNamespace(
            agent=SimpleNamespace(server_url="http://localhost:8000", max_turns=0, max_time_s=1800),
            llm=LLMConfig(
                base_url="https://openrouter.ai/api/v1/chat/completions",
                model="liquid/lfm-2.5-1.2b-thinking:free",
                request_timeout_s=120.0,
            ),
        )

        client_constructed = False

        class _FailIfConstructedClient:
            def __init__(self, *args, **kwargs):
                nonlocal client_constructed
                client_constructed = True
                raise AssertionError("OpenRAMCPClient should not be constructed on preflight failure")

        async def _fake_preflight(_llm_config):
            return False, "No endpoints found that support tool use."

        monkeypatch.setattr(agent_mod, "_preflight_tool_calling_support", _fake_preflight)
        monkeypatch.setattr(agent_mod, "OpenRAMCPClient", _FailIfConstructedClient)

        await agent_mod.run_agent(cfg, verbose=False)

        out = capsys.readouterr().out
        assert "Checking model route for tool-calling support..." in out
        assert "Aborting before game launch (no match started)." in out
        assert "Resetting environment (launching OpenRA)..." not in out
        assert client_constructed is False