Spaces:

minhtudragon
/

headroom

Running

headroom / tests /test_cli /test_cli_proxy_env.py

tudragon154203

fix: route count_tokens to api.anthropic.com, not proxy base_url

0adb431 24 days ago

14 kB

	"""Tests for CLI proxy env variable handling and backend validation.

	Verifies that:
	1. OPENAI_TARGET_API_URL and GEMINI_TARGET_API_URL env vars are read by `headroom proxy`
	2. litellm-* backends are accepted by both CLI and argparse paths
	"""

	import os
	from unittest.mock import patch

	import pytest

	click = pytest.importorskip("click")
	pytest.importorskip("fastapi")

	from click.testing import CliRunner # noqa: E402

	from headroom.cli.main import main # noqa: E402


	@pytest.fixture
	def runner():
	return CliRunner()


	class TestCLIProxyEnvVars:
	"""Test that the CLI proxy command reads API URL env vars."""

	def test_headroom_host_from_env(self, runner):
	"""HEADROOM_HOST env var should be passed to ProxyConfig."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy"],
	env={"HEADROOM_HOST": "0.0.0.0"},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].host == "0.0.0.0"

	def test_headroom_port_from_env(self, runner):
	"""HEADROOM_PORT env var should be passed to ProxyConfig."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy"],
	env={"HEADROOM_PORT": "9797"},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].port == 9797

	def test_headroom_budget_from_env(self, runner):
	"""HEADROOM_BUDGET env var should be passed to ProxyConfig."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy"],
	env={"HEADROOM_BUDGET": "100.5"},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].budget_limit_usd == 100.5

	def test_openai_target_api_url_from_env(self, runner):
	"""OPENAI_TARGET_API_URL env var should be passed to ProxyConfig."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy"],
	env={"OPENAI_TARGET_API_URL": "http://my-vllm:4000"},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].openai_api_url == "http://my-vllm:4000"

	def test_gemini_target_api_url_from_env(self, runner):
	"""GEMINI_TARGET_API_URL env var should be passed to ProxyConfig."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy"],
	env={"GEMINI_TARGET_API_URL": "http://my-gemini:5000"},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].gemini_api_url == "http://my-gemini:5000"

	def test_openai_api_url_cli_flag(self, runner):
	"""--openai-api-url CLI flag should take precedence."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy", "--openai-api-url", "http://from-cli:4000"],
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].openai_api_url == "http://from-cli:4000"

	def test_cli_flag_overrides_env_var(self, runner):
	"""CLI flag should take precedence over env var."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy", "--openai-api-url", "http://from-cli:4000"],
	env={"OPENAI_TARGET_API_URL": "http://from-env:4000"},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].openai_api_url == "http://from-cli:4000"

	def test_no_env_var_defaults_to_none(self, runner):
	"""Without env var or flag, openai_api_url should be None."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	# Ensure the env var is not set
	env = {k: v for k, v in os.environ.items() if k != "OPENAI_TARGET_API_URL"}

	with (
	patch("headroom.proxy.server.run_server", mock_run_server),
	patch.dict(os.environ, env, clear=True),
	):
	result = runner.invoke(
	main,
	["proxy"],
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].openai_api_url is None

	def test_both_api_urls_from_env(self, runner):
	"""Both OPENAI and GEMINI target URLs can be set via env."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy"],
	env={
	"OPENAI_TARGET_API_URL": "http://my-vllm:4000",
	"GEMINI_TARGET_API_URL": "http://my-gemini:5000",
	},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].openai_api_url == "http://my-vllm:4000"
	assert captured_config["config"].gemini_api_url == "http://my-gemini:5000"

	def test_retry_and_connect_timeout_cli_flags(self, runner):
	"""Fast-fail CLI flags should map into ProxyConfig."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	[
	"proxy",
	"--retry-max-attempts",
	"1",
	"--connect-timeout-seconds",
	"3",
	],
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].retry_max_attempts == 1
	assert captured_config["config"].connect_timeout_seconds == 3

	def test_production_scaling_env_vars(self, runner):
	captured = {}

	def mock_run_server(config, **kwargs):
	captured["config"] = config
	captured["kwargs"] = kwargs

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy"],
	env={
	"HEADROOM_WORKERS": "4",
	"HEADROOM_LIMIT_CONCURRENCY": "250",
	"HEADROOM_MAX_CONNECTIONS": "200",
	"HEADROOM_MAX_KEEPALIVE": "50",
	},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured["config"].max_connections == 200
	assert captured["config"].max_keepalive_connections == 50
	assert captured["kwargs"] == {"workers": 4, "limit_concurrency": 250}

	def test_production_scaling_cli_flags_override_env_vars(self, runner):
	captured = {}

	def mock_run_server(config, **kwargs):
	captured["config"] = config
	captured["kwargs"] = kwargs

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	[
	"proxy",
	"--workers",
	"3",
	"--limit-concurrency",
	"125",
	"--max-connections",
	"150",
	"--max-keepalive",
	"25",
	],
	env={
	"HEADROOM_WORKERS": "4",
	"HEADROOM_LIMIT_CONCURRENCY": "250",
	"HEADROOM_MAX_CONNECTIONS": "200",
	"HEADROOM_MAX_KEEPALIVE": "50",
	},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured["config"].max_connections == 150
	assert captured["config"].max_keepalive_connections == 25
	assert captured["kwargs"] == {"workers": 3, "limit_concurrency": 125}


	class TestCLIProxyBackend:
	"""Test that litellm-* backends are accepted by the CLI."""

	def test_litellm_hosted_vllm_backend_accepted(self, runner):
	"""--backend litellm-hosted_vllm should be accepted (not rejected)."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy", "--backend", "litellm-hosted_vllm"],
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].backend == "litellm-hosted_vllm"

	def test_litellm_vertex_backend_accepted(self, runner):
	"""--backend litellm-vertex should be accepted."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy", "--backend", "litellm-vertex"],
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].backend == "litellm-vertex"

	def test_litellm_backend_with_openai_url(self, runner):
	"""Full vLLM setup: litellm backend + OPENAI_TARGET_API_URL."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	[
	"proxy",
	"--backend",
	"litellm-hosted_vllm",
	"--openai-api-url",
	"http://my-vllm:4000",
	],
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].backend == "litellm-hosted_vllm"
	assert captured_config["config"].openai_api_url == "http://my-vllm:4000"


	class TestCLIAnyllmProviderEnv:
	"""Test that HEADROOM_ANYLLM_PROVIDER env var is read by the CLI."""

	def test_anyllm_provider_from_env(self, runner):
	"""HEADROOM_ANYLLM_PROVIDER env var should override the default."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy", "--backend", "anyllm"],
	env={"HEADROOM_ANYLLM_PROVIDER": "llamacpp"},
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].anyllm_provider == "llamacpp"

	def test_anyllm_provider_cli_flag_works(self, runner):
	"""--anyllm-provider flag should still work."""
	captured_config = {}

	def mock_run_server(config):
	captured_config["config"] = config

	with patch("headroom.proxy.server.run_server", mock_run_server):
	result = runner.invoke(
	main,
	["proxy", "--backend", "anyllm", "--anyllm-provider", "groq"],
	catch_exceptions=False,
	)

	assert result.exit_code == 0, result.output
	assert captured_config["config"].anyllm_provider == "groq"


	class TestArgparseBackendValidation:
	"""Test that the argparse path (python -m headroom.proxy.server) accepts litellm-* backends."""

	def test_argparse_accepts_litellm_backend(self):
	"""The argparse --backend should accept litellm-hosted_vllm (no choices restriction)."""
	import argparse

	# Recreate the parser matching server.py's main() argparse setup
	# We just need to verify argparse doesn't reject litellm-* values
	parser = argparse.ArgumentParser()
	parser.add_argument("--backend", default="anthropic")
	args = parser.parse_args(["--backend", "litellm-hosted_vllm"])
	assert args.backend == "litellm-hosted_vllm"