import pytest from agent.core.hf_tokens import resolve_hf_request_token from agent.core.llm_params import ( UnsupportedEffortError, _resolve_hf_router_token, _resolve_llm_params, ) def test_openai_xhigh_effort_is_forwarded(): params = _resolve_llm_params( "openai/gpt-5.5", reasoning_effort="xhigh", strict=True, ) assert params["model"] == "openai/gpt-5.5" assert params["reasoning_effort"] == "xhigh" def test_openai_max_effort_is_still_rejected(): try: _resolve_llm_params( "openai/gpt-5.4", reasoning_effort="max", strict=True, ) except UnsupportedEffortError as exc: assert "OpenAI doesn't accept effort='max'" in str(exc) else: raise AssertionError("Expected UnsupportedEffortError for max effort") def test_resolve_ollama_params_adds_v1_and_uses_default_key(monkeypatch): monkeypatch.delenv("OLLAMA_API_KEY", raising=False) monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434") params = _resolve_llm_params("ollama/llama3.1:8b") assert params == { "model": "openai/llama3.1:8b", "api_base": "http://localhost:11434/v1", "api_key": "sk-local-no-key-required", } def test_resolve_vllm_params_keeps_existing_v1_and_trims_slash(monkeypatch): monkeypatch.delenv("VLLM_API_KEY", raising=False) monkeypatch.setenv("VLLM_BASE_URL", "http://localhost:8000/v1/") params = _resolve_llm_params("vllm/meta-llama/Llama-3.1-8B-Instruct") assert params["model"] == "openai/meta-llama/Llama-3.1-8B-Instruct" assert params["api_base"] == "http://localhost:8000/v1" assert params["api_key"] == "sk-local-no-key-required" def test_resolve_lm_studio_params_uses_api_key_override(monkeypatch): monkeypatch.setenv("LMSTUDIO_BASE_URL", "http://127.0.0.1:1234") monkeypatch.setenv("LMSTUDIO_API_KEY", "local-secret") monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9999") monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-secret") params = _resolve_llm_params("lm_studio/google/gemma-3-4b") assert params["model"] == "openai/google/gemma-3-4b" assert params["api_base"] == "http://127.0.0.1:1234/v1" assert params["api_key"] == "local-secret" def test_resolve_local_params_uses_shared_fallback_env(monkeypatch): monkeypatch.delenv("VLLM_BASE_URL", raising=False) monkeypatch.delenv("VLLM_API_KEY", raising=False) monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9000/v1/") monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-local-secret") params = _resolve_llm_params("vllm/custom-model") assert params["model"] == "openai/custom-model" assert params["api_base"] == "http://localhost:9000/v1" assert params["api_key"] == "shared-local-secret" def test_resolve_llamacpp_params_strips_provider_prefix(monkeypatch): monkeypatch.delenv("LLAMACPP_API_KEY", raising=False) monkeypatch.setenv("LLAMACPP_BASE_URL", "http://localhost:8080") params = _resolve_llm_params("llamacpp/unsloth/Qwen3.5-2B") assert params["model"] == "openai/unsloth/Qwen3.5-2B" assert params["api_base"] == "http://localhost:8080/v1" def test_local_params_reject_reasoning_effort_in_strict_mode(): with pytest.raises(UnsupportedEffortError, match="reasoning_effort"): _resolve_llm_params("ollama/llama3.1", reasoning_effort="high", strict=True) def test_local_params_drop_reasoning_effort_in_non_strict_mode(): params = _resolve_llm_params( "ollama/llama3.1", reasoning_effort="high", strict=False, ) assert params["model"] == "openai/llama3.1" assert "reasoning_effort" not in params assert "extra_body" not in params def test_openai_compat_prefix_is_not_a_local_escape_hatch(): with pytest.raises(ValueError, match="Unsupported local model id"): _resolve_llm_params("openai-compat/custom-model") def test_empty_local_model_id_is_not_treated_as_hf_router(): with pytest.raises(ValueError, match="Unsupported local model id"): _resolve_llm_params("ollama/") def test_hf_router_token_prefers_inference_token(monkeypatch): monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ") monkeypatch.setenv("HF_TOKEN", "hf-token") assert _resolve_hf_router_token("session-token") == "inference-token" def test_hf_router_token_prefers_session_over_hf_cache(monkeypatch): monkeypatch.delenv("INFERENCE_TOKEN", raising=False) monkeypatch.setenv("HF_TOKEN", "hf-token") assert _resolve_hf_router_token(" session-token ") == "session-token" def test_hf_router_token_uses_hf_token_env_via_huggingface_hub(monkeypatch): monkeypatch.delenv("INFERENCE_TOKEN", raising=False) monkeypatch.setenv("HF_TOKEN", " hf-token ") assert _resolve_hf_router_token(None) == "hf-token" def test_hf_router_token_uses_huggingface_hub_cache(monkeypatch): import huggingface_hub monkeypatch.delenv("INFERENCE_TOKEN", raising=False) monkeypatch.delenv("HF_TOKEN", raising=False) monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token") assert _resolve_hf_router_token(None) == "cached-token" def test_hf_router_token_swallows_huggingface_hub_errors(monkeypatch): import huggingface_hub def fail(): raise RuntimeError("cache unavailable") monkeypatch.delenv("INFERENCE_TOKEN", raising=False) monkeypatch.delenv("HF_TOKEN", raising=False) monkeypatch.setattr(huggingface_hub, "get_token", fail) assert _resolve_hf_router_token(None) is None def test_hf_router_params_set_bill_to_only_for_inference_token(monkeypatch): monkeypatch.setenv("INFERENCE_TOKEN", "inference-token") monkeypatch.setenv("HF_BILL_TO", "test-org") params = _resolve_llm_params("moonshotai/Kimi-K2.6") assert params["api_key"] == "inference-token" assert params["extra_headers"] == {"X-HF-Bill-To": "test-org"} def test_hf_request_token_keeps_browser_user_precedence(monkeypatch): class Request: headers = {"Authorization": "Bearer browser-token"} cookies = {"hf_access_token": "cookie-token"} monkeypatch.setenv("HF_TOKEN", "server-token") assert resolve_hf_request_token(Request()) == "browser-token" def test_hf_request_token_does_not_use_cached_login(monkeypatch): import huggingface_hub class Request: headers = {} cookies = {} monkeypatch.delenv("HF_TOKEN", raising=False) monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token") assert resolve_hf_request_token(Request()) is None