Spaces:
Running
Running
| """Tests for CLI proxy env variable handling and backend validation. | |
| Verifies that: | |
| 1. OPENAI_TARGET_API_URL and GEMINI_TARGET_API_URL env vars are read by `headroom proxy` | |
| 2. litellm-* backends are accepted by both CLI and argparse paths | |
| """ | |
| import os | |
| from unittest.mock import patch | |
| import pytest | |
| click = pytest.importorskip("click") | |
| pytest.importorskip("fastapi") | |
| from click.testing import CliRunner # noqa: E402 | |
| from headroom.cli.main import main # noqa: E402 | |
| def runner(): | |
| return CliRunner() | |
| class TestCLIProxyEnvVars: | |
| """Test that the CLI proxy command reads API URL env vars.""" | |
| def test_headroom_host_from_env(self, runner): | |
| """HEADROOM_HOST env var should be passed to ProxyConfig.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| env={"HEADROOM_HOST": "0.0.0.0"}, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].host == "0.0.0.0" | |
| def test_headroom_port_from_env(self, runner): | |
| """HEADROOM_PORT env var should be passed to ProxyConfig.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| env={"HEADROOM_PORT": "9797"}, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].port == 9797 | |
| def test_headroom_budget_from_env(self, runner): | |
| """HEADROOM_BUDGET env var should be passed to ProxyConfig.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| env={"HEADROOM_BUDGET": "100.5"}, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].budget_limit_usd == 100.5 | |
| def test_openai_target_api_url_from_env(self, runner): | |
| """OPENAI_TARGET_API_URL env var should be passed to ProxyConfig.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| env={"OPENAI_TARGET_API_URL": "http://my-vllm:4000"}, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].openai_api_url == "http://my-vllm:4000" | |
| def test_gemini_target_api_url_from_env(self, runner): | |
| """GEMINI_TARGET_API_URL env var should be passed to ProxyConfig.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| env={"GEMINI_TARGET_API_URL": "http://my-gemini:5000"}, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].gemini_api_url == "http://my-gemini:5000" | |
| def test_openai_api_url_cli_flag(self, runner): | |
| """--openai-api-url CLI flag should take precedence.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy", "--openai-api-url", "http://from-cli:4000"], | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].openai_api_url == "http://from-cli:4000" | |
| def test_cli_flag_overrides_env_var(self, runner): | |
| """CLI flag should take precedence over env var.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy", "--openai-api-url", "http://from-cli:4000"], | |
| env={"OPENAI_TARGET_API_URL": "http://from-env:4000"}, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].openai_api_url == "http://from-cli:4000" | |
| def test_no_env_var_defaults_to_none(self, runner): | |
| """Without env var or flag, openai_api_url should be None.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| # Ensure the env var is not set | |
| env = {k: v for k, v in os.environ.items() if k != "OPENAI_TARGET_API_URL"} | |
| with ( | |
| patch("headroom.proxy.server.run_server", mock_run_server), | |
| patch.dict(os.environ, env, clear=True), | |
| ): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].openai_api_url is None | |
| def test_both_api_urls_from_env(self, runner): | |
| """Both OPENAI and GEMINI target URLs can be set via env.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| env={ | |
| "OPENAI_TARGET_API_URL": "http://my-vllm:4000", | |
| "GEMINI_TARGET_API_URL": "http://my-gemini:5000", | |
| }, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].openai_api_url == "http://my-vllm:4000" | |
| assert captured_config["config"].gemini_api_url == "http://my-gemini:5000" | |
| def test_retry_and_connect_timeout_cli_flags(self, runner): | |
| """Fast-fail CLI flags should map into ProxyConfig.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| [ | |
| "proxy", | |
| "--retry-max-attempts", | |
| "1", | |
| "--connect-timeout-seconds", | |
| "3", | |
| ], | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].retry_max_attempts == 1 | |
| assert captured_config["config"].connect_timeout_seconds == 3 | |
| def test_production_scaling_env_vars(self, runner): | |
| captured = {} | |
| def mock_run_server(config, **kwargs): | |
| captured["config"] = config | |
| captured["kwargs"] = kwargs | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy"], | |
| env={ | |
| "HEADROOM_WORKERS": "4", | |
| "HEADROOM_LIMIT_CONCURRENCY": "250", | |
| "HEADROOM_MAX_CONNECTIONS": "200", | |
| "HEADROOM_MAX_KEEPALIVE": "50", | |
| }, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured["config"].max_connections == 200 | |
| assert captured["config"].max_keepalive_connections == 50 | |
| assert captured["kwargs"] == {"workers": 4, "limit_concurrency": 250} | |
| def test_production_scaling_cli_flags_override_env_vars(self, runner): | |
| captured = {} | |
| def mock_run_server(config, **kwargs): | |
| captured["config"] = config | |
| captured["kwargs"] = kwargs | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| [ | |
| "proxy", | |
| "--workers", | |
| "3", | |
| "--limit-concurrency", | |
| "125", | |
| "--max-connections", | |
| "150", | |
| "--max-keepalive", | |
| "25", | |
| ], | |
| env={ | |
| "HEADROOM_WORKERS": "4", | |
| "HEADROOM_LIMIT_CONCURRENCY": "250", | |
| "HEADROOM_MAX_CONNECTIONS": "200", | |
| "HEADROOM_MAX_KEEPALIVE": "50", | |
| }, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured["config"].max_connections == 150 | |
| assert captured["config"].max_keepalive_connections == 25 | |
| assert captured["kwargs"] == {"workers": 3, "limit_concurrency": 125} | |
| class TestCLIProxyBackend: | |
| """Test that litellm-* backends are accepted by the CLI.""" | |
| def test_litellm_hosted_vllm_backend_accepted(self, runner): | |
| """--backend litellm-hosted_vllm should be accepted (not rejected).""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy", "--backend", "litellm-hosted_vllm"], | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].backend == "litellm-hosted_vllm" | |
| def test_litellm_vertex_backend_accepted(self, runner): | |
| """--backend litellm-vertex should be accepted.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy", "--backend", "litellm-vertex"], | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].backend == "litellm-vertex" | |
| def test_litellm_backend_with_openai_url(self, runner): | |
| """Full vLLM setup: litellm backend + OPENAI_TARGET_API_URL.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| [ | |
| "proxy", | |
| "--backend", | |
| "litellm-hosted_vllm", | |
| "--openai-api-url", | |
| "http://my-vllm:4000", | |
| ], | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].backend == "litellm-hosted_vllm" | |
| assert captured_config["config"].openai_api_url == "http://my-vllm:4000" | |
| class TestCLIAnyllmProviderEnv: | |
| """Test that HEADROOM_ANYLLM_PROVIDER env var is read by the CLI.""" | |
| def test_anyllm_provider_from_env(self, runner): | |
| """HEADROOM_ANYLLM_PROVIDER env var should override the default.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy", "--backend", "anyllm"], | |
| env={"HEADROOM_ANYLLM_PROVIDER": "llamacpp"}, | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].anyllm_provider == "llamacpp" | |
| def test_anyllm_provider_cli_flag_works(self, runner): | |
| """--anyllm-provider flag should still work.""" | |
| captured_config = {} | |
| def mock_run_server(config): | |
| captured_config["config"] = config | |
| with patch("headroom.proxy.server.run_server", mock_run_server): | |
| result = runner.invoke( | |
| main, | |
| ["proxy", "--backend", "anyllm", "--anyllm-provider", "groq"], | |
| catch_exceptions=False, | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert captured_config["config"].anyllm_provider == "groq" | |
| class TestArgparseBackendValidation: | |
| """Test that the argparse path (python -m headroom.proxy.server) accepts litellm-* backends.""" | |
| def test_argparse_accepts_litellm_backend(self): | |
| """The argparse --backend should accept litellm-hosted_vllm (no choices restriction).""" | |
| import argparse | |
| # Recreate the parser matching server.py's main() argparse setup | |
| # We just need to verify argparse doesn't reject litellm-* values | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--backend", default="anthropic") | |
| args = parser.parse_args(["--backend", "litellm-hosted_vllm"]) | |
| assert args.backend == "litellm-hosted_vllm" | |