Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| import pytest | |
| from agent.core.hf_tokens import resolve_hf_request_token | |
| from agent.core.llm_params import ( | |
| UnsupportedEffortError, | |
| _resolve_hf_router_token, | |
| _resolve_llm_params, | |
| ) | |
| def test_openai_xhigh_effort_is_forwarded(): | |
| params = _resolve_llm_params( | |
| "openai/gpt-5.5", | |
| reasoning_effort="xhigh", | |
| strict=True, | |
| ) | |
| assert params["model"] == "openai/gpt-5.5" | |
| assert params["reasoning_effort"] == "xhigh" | |
| def test_openai_max_effort_is_still_rejected(): | |
| try: | |
| _resolve_llm_params( | |
| "openai/gpt-5.4", | |
| reasoning_effort="max", | |
| strict=True, | |
| ) | |
| except UnsupportedEffortError as exc: | |
| assert "OpenAI doesn't accept effort='max'" in str(exc) | |
| else: | |
| raise AssertionError("Expected UnsupportedEffortError for max effort") | |
| def test_resolve_ollama_params_adds_v1_and_uses_default_key(monkeypatch): | |
| monkeypatch.delenv("OLLAMA_API_KEY", raising=False) | |
| monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434") | |
| params = _resolve_llm_params("ollama/llama3.1:8b") | |
| assert params == { | |
| "model": "openai/llama3.1:8b", | |
| "api_base": "http://localhost:11434/v1", | |
| "api_key": "sk-local-no-key-required", | |
| } | |
| def test_resolve_vllm_params_keeps_existing_v1_and_trims_slash(monkeypatch): | |
| monkeypatch.delenv("VLLM_API_KEY", raising=False) | |
| monkeypatch.setenv("VLLM_BASE_URL", "http://localhost:8000/v1/") | |
| params = _resolve_llm_params("vllm/meta-llama/Llama-3.1-8B-Instruct") | |
| assert params["model"] == "openai/meta-llama/Llama-3.1-8B-Instruct" | |
| assert params["api_base"] == "http://localhost:8000/v1" | |
| assert params["api_key"] == "sk-local-no-key-required" | |
| def test_resolve_lm_studio_params_uses_api_key_override(monkeypatch): | |
| monkeypatch.setenv("LMSTUDIO_BASE_URL", "http://127.0.0.1:1234") | |
| monkeypatch.setenv("LMSTUDIO_API_KEY", "local-secret") | |
| monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9999") | |
| monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-secret") | |
| params = _resolve_llm_params("lm_studio/google/gemma-3-4b") | |
| assert params["model"] == "openai/google/gemma-3-4b" | |
| assert params["api_base"] == "http://127.0.0.1:1234/v1" | |
| assert params["api_key"] == "local-secret" | |
| def test_resolve_local_params_uses_shared_fallback_env(monkeypatch): | |
| monkeypatch.delenv("VLLM_BASE_URL", raising=False) | |
| monkeypatch.delenv("VLLM_API_KEY", raising=False) | |
| monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9000/v1/") | |
| monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-local-secret") | |
| params = _resolve_llm_params("vllm/custom-model") | |
| assert params["model"] == "openai/custom-model" | |
| assert params["api_base"] == "http://localhost:9000/v1" | |
| assert params["api_key"] == "shared-local-secret" | |
| def test_resolve_llamacpp_params_strips_provider_prefix(monkeypatch): | |
| monkeypatch.delenv("LLAMACPP_API_KEY", raising=False) | |
| monkeypatch.setenv("LLAMACPP_BASE_URL", "http://localhost:8080") | |
| params = _resolve_llm_params("llamacpp/unsloth/Qwen3.5-2B") | |
| assert params["model"] == "openai/unsloth/Qwen3.5-2B" | |
| assert params["api_base"] == "http://localhost:8080/v1" | |
| def test_local_params_reject_reasoning_effort_in_strict_mode(): | |
| with pytest.raises(UnsupportedEffortError, match="reasoning_effort"): | |
| _resolve_llm_params("ollama/llama3.1", reasoning_effort="high", strict=True) | |
| def test_local_params_drop_reasoning_effort_in_non_strict_mode(): | |
| params = _resolve_llm_params( | |
| "ollama/llama3.1", | |
| reasoning_effort="high", | |
| strict=False, | |
| ) | |
| assert params["model"] == "openai/llama3.1" | |
| assert "reasoning_effort" not in params | |
| assert "extra_body" not in params | |
| def test_openai_compat_prefix_is_not_a_local_escape_hatch(): | |
| with pytest.raises(ValueError, match="Unsupported local model id"): | |
| _resolve_llm_params("openai-compat/custom-model") | |
| def test_empty_local_model_id_is_not_treated_as_hf_router(): | |
| with pytest.raises(ValueError, match="Unsupported local model id"): | |
| _resolve_llm_params("ollama/") | |
| def test_hf_router_token_prefers_inference_token(monkeypatch): | |
| monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ") | |
| monkeypatch.setenv("HF_TOKEN", "hf-token") | |
| assert _resolve_hf_router_token("session-token") == "inference-token" | |
| def test_hf_router_token_prefers_session_over_hf_cache(monkeypatch): | |
| monkeypatch.delenv("INFERENCE_TOKEN", raising=False) | |
| monkeypatch.setenv("HF_TOKEN", "hf-token") | |
| assert _resolve_hf_router_token(" session-token ") == "session-token" | |
| def test_hf_router_token_uses_hf_token_env_via_huggingface_hub(monkeypatch): | |
| monkeypatch.delenv("INFERENCE_TOKEN", raising=False) | |
| monkeypatch.setenv("HF_TOKEN", " hf-token ") | |
| assert _resolve_hf_router_token(None) == "hf-token" | |
| def test_hf_router_token_uses_huggingface_hub_cache(monkeypatch): | |
| import huggingface_hub | |
| monkeypatch.delenv("INFERENCE_TOKEN", raising=False) | |
| monkeypatch.delenv("HF_TOKEN", raising=False) | |
| monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token") | |
| assert _resolve_hf_router_token(None) == "cached-token" | |
| def test_hf_router_token_swallows_huggingface_hub_errors(monkeypatch): | |
| import huggingface_hub | |
| def fail(): | |
| raise RuntimeError("cache unavailable") | |
| monkeypatch.delenv("INFERENCE_TOKEN", raising=False) | |
| monkeypatch.delenv("HF_TOKEN", raising=False) | |
| monkeypatch.setattr(huggingface_hub, "get_token", fail) | |
| assert _resolve_hf_router_token(None) is None | |
| def test_hf_router_params_set_bill_to_only_for_inference_token(monkeypatch): | |
| monkeypatch.setenv("INFERENCE_TOKEN", "inference-token") | |
| monkeypatch.setenv("HF_BILL_TO", "test-org") | |
| params = _resolve_llm_params("moonshotai/Kimi-K2.6") | |
| assert params["api_key"] == "inference-token" | |
| assert params["extra_headers"] == {"X-HF-Bill-To": "test-org"} | |
| def test_hf_request_token_keeps_browser_user_precedence(monkeypatch): | |
| class Request: | |
| headers = {"Authorization": "Bearer browser-token"} | |
| cookies = {"hf_access_token": "cookie-token"} | |
| monkeypatch.setenv("HF_TOKEN", "server-token") | |
| assert resolve_hf_request_token(Request()) == "browser-token" | |
| def test_hf_request_token_does_not_use_cached_login(monkeypatch): | |
| import huggingface_hub | |
| class Request: | |
| headers = {} | |
| cookies = {} | |
| monkeypatch.delenv("HF_TOKEN", raising=False) | |
| monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token") | |
| assert resolve_hf_request_token(Request()) is None | |