Spaces:

minhtudragon
/

headroom

Running

File size: 13,995 Bytes

"""Tests for CLI proxy env variable handling and backend validation.

Verifies that:
1. OPENAI_TARGET_API_URL and GEMINI_TARGET_API_URL env vars are read by `headroom proxy`
2. litellm-* backends are accepted by both CLI and argparse paths
"""

import os
from unittest.mock import patch

import pytest

click = pytest.importorskip("click")
pytest.importorskip("fastapi")

from click.testing import CliRunner  # noqa: E402

from headroom.cli.main import main  # noqa: E402


@pytest.fixture
def runner():
    return CliRunner()


class TestCLIProxyEnvVars:
    """Test that the CLI proxy command reads API URL env vars."""

    def test_headroom_host_from_env(self, runner):
        """HEADROOM_HOST env var should be passed to ProxyConfig."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy"],
                env={"HEADROOM_HOST": "0.0.0.0"},
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].host == "0.0.0.0"

    def test_headroom_port_from_env(self, runner):
        """HEADROOM_PORT env var should be passed to ProxyConfig."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy"],
                env={"HEADROOM_PORT": "9797"},
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].port == 9797

    def test_headroom_budget_from_env(self, runner):
        """HEADROOM_BUDGET env var should be passed to ProxyConfig."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy"],
                env={"HEADROOM_BUDGET": "100.5"},
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].budget_limit_usd == 100.5

    def test_openai_target_api_url_from_env(self, runner):
        """OPENAI_TARGET_API_URL env var should be passed to ProxyConfig."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy"],
                env={"OPENAI_TARGET_API_URL": "http://my-vllm:4000"},
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].openai_api_url == "http://my-vllm:4000"

    def test_gemini_target_api_url_from_env(self, runner):
        """GEMINI_TARGET_API_URL env var should be passed to ProxyConfig."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy"],
                env={"GEMINI_TARGET_API_URL": "http://my-gemini:5000"},
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].gemini_api_url == "http://my-gemini:5000"

    def test_openai_api_url_cli_flag(self, runner):
        """--openai-api-url CLI flag should take precedence."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy", "--openai-api-url", "http://from-cli:4000"],
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].openai_api_url == "http://from-cli:4000"

    def test_cli_flag_overrides_env_var(self, runner):
        """CLI flag should take precedence over env var."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy", "--openai-api-url", "http://from-cli:4000"],
                env={"OPENAI_TARGET_API_URL": "http://from-env:4000"},
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].openai_api_url == "http://from-cli:4000"

    def test_no_env_var_defaults_to_none(self, runner):
        """Without env var or flag, openai_api_url should be None."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        # Ensure the env var is not set
        env = {k: v for k, v in os.environ.items() if k != "OPENAI_TARGET_API_URL"}

        with (
            patch("headroom.proxy.server.run_server", mock_run_server),
            patch.dict(os.environ, env, clear=True),
        ):
            result = runner.invoke(
                main,
                ["proxy"],
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].openai_api_url is None

    def test_both_api_urls_from_env(self, runner):
        """Both OPENAI and GEMINI target URLs can be set via env."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy"],
                env={
                    "OPENAI_TARGET_API_URL": "http://my-vllm:4000",
                    "GEMINI_TARGET_API_URL": "http://my-gemini:5000",
                },
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].openai_api_url == "http://my-vllm:4000"
        assert captured_config["config"].gemini_api_url == "http://my-gemini:5000"

    def test_retry_and_connect_timeout_cli_flags(self, runner):
        """Fast-fail CLI flags should map into ProxyConfig."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                [
                    "proxy",
                    "--retry-max-attempts",
                    "1",
                    "--connect-timeout-seconds",
                    "3",
                ],
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].retry_max_attempts == 1
        assert captured_config["config"].connect_timeout_seconds == 3

    def test_production_scaling_env_vars(self, runner):
        captured = {}

        def mock_run_server(config, **kwargs):
            captured["config"] = config
            captured["kwargs"] = kwargs

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy"],
                env={
                    "HEADROOM_WORKERS": "4",
                    "HEADROOM_LIMIT_CONCURRENCY": "250",
                    "HEADROOM_MAX_CONNECTIONS": "200",
                    "HEADROOM_MAX_KEEPALIVE": "50",
                },
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured["config"].max_connections == 200
        assert captured["config"].max_keepalive_connections == 50
        assert captured["kwargs"] == {"workers": 4, "limit_concurrency": 250}

    def test_production_scaling_cli_flags_override_env_vars(self, runner):
        captured = {}

        def mock_run_server(config, **kwargs):
            captured["config"] = config
            captured["kwargs"] = kwargs

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                [
                    "proxy",
                    "--workers",
                    "3",
                    "--limit-concurrency",
                    "125",
                    "--max-connections",
                    "150",
                    "--max-keepalive",
                    "25",
                ],
                env={
                    "HEADROOM_WORKERS": "4",
                    "HEADROOM_LIMIT_CONCURRENCY": "250",
                    "HEADROOM_MAX_CONNECTIONS": "200",
                    "HEADROOM_MAX_KEEPALIVE": "50",
                },
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured["config"].max_connections == 150
        assert captured["config"].max_keepalive_connections == 25
        assert captured["kwargs"] == {"workers": 3, "limit_concurrency": 125}


class TestCLIProxyBackend:
    """Test that litellm-* backends are accepted by the CLI."""

    def test_litellm_hosted_vllm_backend_accepted(self, runner):
        """--backend litellm-hosted_vllm should be accepted (not rejected)."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy", "--backend", "litellm-hosted_vllm"],
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].backend == "litellm-hosted_vllm"

    def test_litellm_vertex_backend_accepted(self, runner):
        """--backend litellm-vertex should be accepted."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy", "--backend", "litellm-vertex"],
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].backend == "litellm-vertex"

    def test_litellm_backend_with_openai_url(self, runner):
        """Full vLLM setup: litellm backend + OPENAI_TARGET_API_URL."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                [
                    "proxy",
                    "--backend",
                    "litellm-hosted_vllm",
                    "--openai-api-url",
                    "http://my-vllm:4000",
                ],
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].backend == "litellm-hosted_vllm"
        assert captured_config["config"].openai_api_url == "http://my-vllm:4000"


class TestCLIAnyllmProviderEnv:
    """Test that HEADROOM_ANYLLM_PROVIDER env var is read by the CLI."""

    def test_anyllm_provider_from_env(self, runner):
        """HEADROOM_ANYLLM_PROVIDER env var should override the default."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy", "--backend", "anyllm"],
                env={"HEADROOM_ANYLLM_PROVIDER": "llamacpp"},
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].anyllm_provider == "llamacpp"

    def test_anyllm_provider_cli_flag_works(self, runner):
        """--anyllm-provider flag should still work."""
        captured_config = {}

        def mock_run_server(config):
            captured_config["config"] = config

        with patch("headroom.proxy.server.run_server", mock_run_server):
            result = runner.invoke(
                main,
                ["proxy", "--backend", "anyllm", "--anyllm-provider", "groq"],
                catch_exceptions=False,
            )

        assert result.exit_code == 0, result.output
        assert captured_config["config"].anyllm_provider == "groq"


class TestArgparseBackendValidation:
    """Test that the argparse path (python -m headroom.proxy.server) accepts litellm-* backends."""

    def test_argparse_accepts_litellm_backend(self):
        """The argparse --backend should accept litellm-hosted_vllm (no choices restriction)."""
        import argparse

        # Recreate the parser matching server.py's main() argparse setup
        # We just need to verify argparse doesn't reject litellm-* values
        parser = argparse.ArgumentParser()
        parser.add_argument("--backend", default="anthropic")
        args = parser.parse_args(["--backend", "litellm-hosted_vllm"])
        assert args.backend == "litellm-hosted_vllm"