ml-intern

Sleeping

App Files Files Community

ml-intern / tests /unit /test_llm_params.py

lewtun HF Staff

Add CLI local model support (#228)

4668dbd unverified 27 days ago

raw

history blame contribute delete

6.63 kB

	import pytest

	from agent.core.hf_tokens import resolve_hf_request_token
	from agent.core.llm_params import (
	UnsupportedEffortError,
	_resolve_hf_router_token,
	_resolve_llm_params,
	)


	def test_openai_xhigh_effort_is_forwarded():
	params = _resolve_llm_params(
	"openai/gpt-5.5",
	reasoning_effort="xhigh",
	strict=True,
	)

	assert params["model"] == "openai/gpt-5.5"
	assert params["reasoning_effort"] == "xhigh"


	def test_openai_max_effort_is_still_rejected():
	try:
	_resolve_llm_params(
	"openai/gpt-5.4",
	reasoning_effort="max",
	strict=True,
	)
	except UnsupportedEffortError as exc:
	assert "OpenAI doesn't accept effort='max'" in str(exc)
	else:
	raise AssertionError("Expected UnsupportedEffortError for max effort")


	def test_resolve_ollama_params_adds_v1_and_uses_default_key(monkeypatch):
	monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
	monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434")

	params = _resolve_llm_params("ollama/llama3.1:8b")

	assert params == {
	"model": "openai/llama3.1:8b",
	"api_base": "http://localhost:11434/v1",
	"api_key": "sk-local-no-key-required",
	}


	def test_resolve_vllm_params_keeps_existing_v1_and_trims_slash(monkeypatch):
	monkeypatch.delenv("VLLM_API_KEY", raising=False)
	monkeypatch.setenv("VLLM_BASE_URL", "http://localhost:8000/v1/")

	params = _resolve_llm_params("vllm/meta-llama/Llama-3.1-8B-Instruct")

	assert params["model"] == "openai/meta-llama/Llama-3.1-8B-Instruct"
	assert params["api_base"] == "http://localhost:8000/v1"
	assert params["api_key"] == "sk-local-no-key-required"


	def test_resolve_lm_studio_params_uses_api_key_override(monkeypatch):
	monkeypatch.setenv("LMSTUDIO_BASE_URL", "http://127.0.0.1:1234")
	monkeypatch.setenv("LMSTUDIO_API_KEY", "local-secret")
	monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9999")
	monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-secret")

	params = _resolve_llm_params("lm_studio/google/gemma-3-4b")

	assert params["model"] == "openai/google/gemma-3-4b"
	assert params["api_base"] == "http://127.0.0.1:1234/v1"
	assert params["api_key"] == "local-secret"


	def test_resolve_local_params_uses_shared_fallback_env(monkeypatch):
	monkeypatch.delenv("VLLM_BASE_URL", raising=False)
	monkeypatch.delenv("VLLM_API_KEY", raising=False)
	monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9000/v1/")
	monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-local-secret")

	params = _resolve_llm_params("vllm/custom-model")

	assert params["model"] == "openai/custom-model"
	assert params["api_base"] == "http://localhost:9000/v1"
	assert params["api_key"] == "shared-local-secret"


	def test_resolve_llamacpp_params_strips_provider_prefix(monkeypatch):
	monkeypatch.delenv("LLAMACPP_API_KEY", raising=False)
	monkeypatch.setenv("LLAMACPP_BASE_URL", "http://localhost:8080")

	params = _resolve_llm_params("llamacpp/unsloth/Qwen3.5-2B")

	assert params["model"] == "openai/unsloth/Qwen3.5-2B"
	assert params["api_base"] == "http://localhost:8080/v1"


	def test_local_params_reject_reasoning_effort_in_strict_mode():
	with pytest.raises(UnsupportedEffortError, match="reasoning_effort"):
	_resolve_llm_params("ollama/llama3.1", reasoning_effort="high", strict=True)


	def test_local_params_drop_reasoning_effort_in_non_strict_mode():
	params = _resolve_llm_params(
	"ollama/llama3.1",
	reasoning_effort="high",
	strict=False,
	)

	assert params["model"] == "openai/llama3.1"
	assert "reasoning_effort" not in params
	assert "extra_body" not in params


	def test_openai_compat_prefix_is_not_a_local_escape_hatch():
	with pytest.raises(ValueError, match="Unsupported local model id"):
	_resolve_llm_params("openai-compat/custom-model")


	def test_empty_local_model_id_is_not_treated_as_hf_router():
	with pytest.raises(ValueError, match="Unsupported local model id"):
	_resolve_llm_params("ollama/")


	def test_hf_router_token_prefers_inference_token(monkeypatch):
	monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ")
	monkeypatch.setenv("HF_TOKEN", "hf-token")

	assert _resolve_hf_router_token("session-token") == "inference-token"


	def test_hf_router_token_prefers_session_over_hf_cache(monkeypatch):
	monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
	monkeypatch.setenv("HF_TOKEN", "hf-token")

	assert _resolve_hf_router_token(" session-token ") == "session-token"


	def test_hf_router_token_uses_hf_token_env_via_huggingface_hub(monkeypatch):
	monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
	monkeypatch.setenv("HF_TOKEN", " hf-token ")

	assert _resolve_hf_router_token(None) == "hf-token"


	def test_hf_router_token_uses_huggingface_hub_cache(monkeypatch):
	import huggingface_hub

	monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
	monkeypatch.delenv("HF_TOKEN", raising=False)
	monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token")

	assert _resolve_hf_router_token(None) == "cached-token"


	def test_hf_router_token_swallows_huggingface_hub_errors(monkeypatch):
	import huggingface_hub

	def fail():
	raise RuntimeError("cache unavailable")

	monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
	monkeypatch.delenv("HF_TOKEN", raising=False)
	monkeypatch.setattr(huggingface_hub, "get_token", fail)

	assert _resolve_hf_router_token(None) is None


	def test_hf_router_params_set_bill_to_only_for_inference_token(monkeypatch):
	monkeypatch.setenv("INFERENCE_TOKEN", "inference-token")
	monkeypatch.setenv("HF_BILL_TO", "test-org")

	params = _resolve_llm_params("moonshotai/Kimi-K2.6")

	assert params["api_key"] == "inference-token"
	assert params["extra_headers"] == {"X-HF-Bill-To": "test-org"}


	def test_hf_request_token_keeps_browser_user_precedence(monkeypatch):
	class Request:
	headers = {"Authorization": "Bearer browser-token"}
	cookies = {"hf_access_token": "cookie-token"}

	monkeypatch.setenv("HF_TOKEN", "server-token")

	assert resolve_hf_request_token(Request()) == "browser-token"


	def test_hf_request_token_does_not_use_cached_login(monkeypatch):
	import huggingface_hub

	class Request:
	headers = {}
	cookies = {}

	monkeypatch.delenv("HF_TOKEN", raising=False)
	monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token")

	assert resolve_hf_request_token(Request()) is None