headroom / tests /test_cli /test_cli_proxy_env.py
tudragon154203
fix: route count_tokens to api.anthropic.com, not proxy base_url
0adb431
"""Tests for CLI proxy env variable handling and backend validation.
Verifies that:
1. OPENAI_TARGET_API_URL and GEMINI_TARGET_API_URL env vars are read by `headroom proxy`
2. litellm-* backends are accepted by both CLI and argparse paths
"""
import os
from unittest.mock import patch
import pytest
click = pytest.importorskip("click")
pytest.importorskip("fastapi")
from click.testing import CliRunner # noqa: E402
from headroom.cli.main import main # noqa: E402
@pytest.fixture
def runner():
return CliRunner()
class TestCLIProxyEnvVars:
"""Test that the CLI proxy command reads API URL env vars."""
def test_headroom_host_from_env(self, runner):
"""HEADROOM_HOST env var should be passed to ProxyConfig."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy"],
env={"HEADROOM_HOST": "0.0.0.0"},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].host == "0.0.0.0"
def test_headroom_port_from_env(self, runner):
"""HEADROOM_PORT env var should be passed to ProxyConfig."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy"],
env={"HEADROOM_PORT": "9797"},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].port == 9797
def test_headroom_budget_from_env(self, runner):
"""HEADROOM_BUDGET env var should be passed to ProxyConfig."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy"],
env={"HEADROOM_BUDGET": "100.5"},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].budget_limit_usd == 100.5
def test_openai_target_api_url_from_env(self, runner):
"""OPENAI_TARGET_API_URL env var should be passed to ProxyConfig."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy"],
env={"OPENAI_TARGET_API_URL": "http://my-vllm:4000"},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].openai_api_url == "http://my-vllm:4000"
def test_gemini_target_api_url_from_env(self, runner):
"""GEMINI_TARGET_API_URL env var should be passed to ProxyConfig."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy"],
env={"GEMINI_TARGET_API_URL": "http://my-gemini:5000"},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].gemini_api_url == "http://my-gemini:5000"
def test_openai_api_url_cli_flag(self, runner):
"""--openai-api-url CLI flag should take precedence."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy", "--openai-api-url", "http://from-cli:4000"],
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].openai_api_url == "http://from-cli:4000"
def test_cli_flag_overrides_env_var(self, runner):
"""CLI flag should take precedence over env var."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy", "--openai-api-url", "http://from-cli:4000"],
env={"OPENAI_TARGET_API_URL": "http://from-env:4000"},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].openai_api_url == "http://from-cli:4000"
def test_no_env_var_defaults_to_none(self, runner):
"""Without env var or flag, openai_api_url should be None."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
# Ensure the env var is not set
env = {k: v for k, v in os.environ.items() if k != "OPENAI_TARGET_API_URL"}
with (
patch("headroom.proxy.server.run_server", mock_run_server),
patch.dict(os.environ, env, clear=True),
):
result = runner.invoke(
main,
["proxy"],
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].openai_api_url is None
def test_both_api_urls_from_env(self, runner):
"""Both OPENAI and GEMINI target URLs can be set via env."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy"],
env={
"OPENAI_TARGET_API_URL": "http://my-vllm:4000",
"GEMINI_TARGET_API_URL": "http://my-gemini:5000",
},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].openai_api_url == "http://my-vllm:4000"
assert captured_config["config"].gemini_api_url == "http://my-gemini:5000"
def test_retry_and_connect_timeout_cli_flags(self, runner):
"""Fast-fail CLI flags should map into ProxyConfig."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
[
"proxy",
"--retry-max-attempts",
"1",
"--connect-timeout-seconds",
"3",
],
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].retry_max_attempts == 1
assert captured_config["config"].connect_timeout_seconds == 3
def test_production_scaling_env_vars(self, runner):
captured = {}
def mock_run_server(config, **kwargs):
captured["config"] = config
captured["kwargs"] = kwargs
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy"],
env={
"HEADROOM_WORKERS": "4",
"HEADROOM_LIMIT_CONCURRENCY": "250",
"HEADROOM_MAX_CONNECTIONS": "200",
"HEADROOM_MAX_KEEPALIVE": "50",
},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured["config"].max_connections == 200
assert captured["config"].max_keepalive_connections == 50
assert captured["kwargs"] == {"workers": 4, "limit_concurrency": 250}
def test_production_scaling_cli_flags_override_env_vars(self, runner):
captured = {}
def mock_run_server(config, **kwargs):
captured["config"] = config
captured["kwargs"] = kwargs
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
[
"proxy",
"--workers",
"3",
"--limit-concurrency",
"125",
"--max-connections",
"150",
"--max-keepalive",
"25",
],
env={
"HEADROOM_WORKERS": "4",
"HEADROOM_LIMIT_CONCURRENCY": "250",
"HEADROOM_MAX_CONNECTIONS": "200",
"HEADROOM_MAX_KEEPALIVE": "50",
},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured["config"].max_connections == 150
assert captured["config"].max_keepalive_connections == 25
assert captured["kwargs"] == {"workers": 3, "limit_concurrency": 125}
class TestCLIProxyBackend:
"""Test that litellm-* backends are accepted by the CLI."""
def test_litellm_hosted_vllm_backend_accepted(self, runner):
"""--backend litellm-hosted_vllm should be accepted (not rejected)."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy", "--backend", "litellm-hosted_vllm"],
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].backend == "litellm-hosted_vllm"
def test_litellm_vertex_backend_accepted(self, runner):
"""--backend litellm-vertex should be accepted."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy", "--backend", "litellm-vertex"],
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].backend == "litellm-vertex"
def test_litellm_backend_with_openai_url(self, runner):
"""Full vLLM setup: litellm backend + OPENAI_TARGET_API_URL."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
[
"proxy",
"--backend",
"litellm-hosted_vllm",
"--openai-api-url",
"http://my-vllm:4000",
],
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].backend == "litellm-hosted_vllm"
assert captured_config["config"].openai_api_url == "http://my-vllm:4000"
class TestCLIAnyllmProviderEnv:
"""Test that HEADROOM_ANYLLM_PROVIDER env var is read by the CLI."""
def test_anyllm_provider_from_env(self, runner):
"""HEADROOM_ANYLLM_PROVIDER env var should override the default."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy", "--backend", "anyllm"],
env={"HEADROOM_ANYLLM_PROVIDER": "llamacpp"},
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].anyllm_provider == "llamacpp"
def test_anyllm_provider_cli_flag_works(self, runner):
"""--anyllm-provider flag should still work."""
captured_config = {}
def mock_run_server(config):
captured_config["config"] = config
with patch("headroom.proxy.server.run_server", mock_run_server):
result = runner.invoke(
main,
["proxy", "--backend", "anyllm", "--anyllm-provider", "groq"],
catch_exceptions=False,
)
assert result.exit_code == 0, result.output
assert captured_config["config"].anyllm_provider == "groq"
class TestArgparseBackendValidation:
"""Test that the argparse path (python -m headroom.proxy.server) accepts litellm-* backends."""
def test_argparse_accepts_litellm_backend(self):
"""The argparse --backend should accept litellm-hosted_vllm (no choices restriction)."""
import argparse
# Recreate the parser matching server.py's main() argparse setup
# We just need to verify argparse doesn't reject litellm-* values
parser = argparse.ArgumentParser()
parser.add_argument("--backend", default="anthropic")
args = parser.parse_args(["--backend", "litellm-hosted_vllm"])
assert args.backend == "litellm-hosted_vllm"