File size: 6,632 Bytes
4668dbd 7a76ad1 0545e40 7a76ad1 4668dbd 7a76ad1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | import pytest
from agent.core.hf_tokens import resolve_hf_request_token
from agent.core.llm_params import (
UnsupportedEffortError,
_resolve_hf_router_token,
_resolve_llm_params,
)
def test_openai_xhigh_effort_is_forwarded():
params = _resolve_llm_params(
"openai/gpt-5.5",
reasoning_effort="xhigh",
strict=True,
)
assert params["model"] == "openai/gpt-5.5"
assert params["reasoning_effort"] == "xhigh"
def test_openai_max_effort_is_still_rejected():
try:
_resolve_llm_params(
"openai/gpt-5.4",
reasoning_effort="max",
strict=True,
)
except UnsupportedEffortError as exc:
assert "OpenAI doesn't accept effort='max'" in str(exc)
else:
raise AssertionError("Expected UnsupportedEffortError for max effort")
def test_resolve_ollama_params_adds_v1_and_uses_default_key(monkeypatch):
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434")
params = _resolve_llm_params("ollama/llama3.1:8b")
assert params == {
"model": "openai/llama3.1:8b",
"api_base": "http://localhost:11434/v1",
"api_key": "sk-local-no-key-required",
}
def test_resolve_vllm_params_keeps_existing_v1_and_trims_slash(monkeypatch):
monkeypatch.delenv("VLLM_API_KEY", raising=False)
monkeypatch.setenv("VLLM_BASE_URL", "http://localhost:8000/v1/")
params = _resolve_llm_params("vllm/meta-llama/Llama-3.1-8B-Instruct")
assert params["model"] == "openai/meta-llama/Llama-3.1-8B-Instruct"
assert params["api_base"] == "http://localhost:8000/v1"
assert params["api_key"] == "sk-local-no-key-required"
def test_resolve_lm_studio_params_uses_api_key_override(monkeypatch):
monkeypatch.setenv("LMSTUDIO_BASE_URL", "http://127.0.0.1:1234")
monkeypatch.setenv("LMSTUDIO_API_KEY", "local-secret")
monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9999")
monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-secret")
params = _resolve_llm_params("lm_studio/google/gemma-3-4b")
assert params["model"] == "openai/google/gemma-3-4b"
assert params["api_base"] == "http://127.0.0.1:1234/v1"
assert params["api_key"] == "local-secret"
def test_resolve_local_params_uses_shared_fallback_env(monkeypatch):
monkeypatch.delenv("VLLM_BASE_URL", raising=False)
monkeypatch.delenv("VLLM_API_KEY", raising=False)
monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:9000/v1/")
monkeypatch.setenv("LOCAL_LLM_API_KEY", "shared-local-secret")
params = _resolve_llm_params("vllm/custom-model")
assert params["model"] == "openai/custom-model"
assert params["api_base"] == "http://localhost:9000/v1"
assert params["api_key"] == "shared-local-secret"
def test_resolve_llamacpp_params_strips_provider_prefix(monkeypatch):
monkeypatch.delenv("LLAMACPP_API_KEY", raising=False)
monkeypatch.setenv("LLAMACPP_BASE_URL", "http://localhost:8080")
params = _resolve_llm_params("llamacpp/unsloth/Qwen3.5-2B")
assert params["model"] == "openai/unsloth/Qwen3.5-2B"
assert params["api_base"] == "http://localhost:8080/v1"
def test_local_params_reject_reasoning_effort_in_strict_mode():
with pytest.raises(UnsupportedEffortError, match="reasoning_effort"):
_resolve_llm_params("ollama/llama3.1", reasoning_effort="high", strict=True)
def test_local_params_drop_reasoning_effort_in_non_strict_mode():
params = _resolve_llm_params(
"ollama/llama3.1",
reasoning_effort="high",
strict=False,
)
assert params["model"] == "openai/llama3.1"
assert "reasoning_effort" not in params
assert "extra_body" not in params
def test_openai_compat_prefix_is_not_a_local_escape_hatch():
with pytest.raises(ValueError, match="Unsupported local model id"):
_resolve_llm_params("openai-compat/custom-model")
def test_empty_local_model_id_is_not_treated_as_hf_router():
with pytest.raises(ValueError, match="Unsupported local model id"):
_resolve_llm_params("ollama/")
def test_hf_router_token_prefers_inference_token(monkeypatch):
monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ")
monkeypatch.setenv("HF_TOKEN", "hf-token")
assert _resolve_hf_router_token("session-token") == "inference-token"
def test_hf_router_token_prefers_session_over_hf_cache(monkeypatch):
monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
monkeypatch.setenv("HF_TOKEN", "hf-token")
assert _resolve_hf_router_token(" session-token ") == "session-token"
def test_hf_router_token_uses_hf_token_env_via_huggingface_hub(monkeypatch):
monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
monkeypatch.setenv("HF_TOKEN", " hf-token ")
assert _resolve_hf_router_token(None) == "hf-token"
def test_hf_router_token_uses_huggingface_hub_cache(monkeypatch):
import huggingface_hub
monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token")
assert _resolve_hf_router_token(None) == "cached-token"
def test_hf_router_token_swallows_huggingface_hub_errors(monkeypatch):
import huggingface_hub
def fail():
raise RuntimeError("cache unavailable")
monkeypatch.delenv("INFERENCE_TOKEN", raising=False)
monkeypatch.delenv("HF_TOKEN", raising=False)
monkeypatch.setattr(huggingface_hub, "get_token", fail)
assert _resolve_hf_router_token(None) is None
def test_hf_router_params_set_bill_to_only_for_inference_token(monkeypatch):
monkeypatch.setenv("INFERENCE_TOKEN", "inference-token")
monkeypatch.setenv("HF_BILL_TO", "test-org")
params = _resolve_llm_params("moonshotai/Kimi-K2.6")
assert params["api_key"] == "inference-token"
assert params["extra_headers"] == {"X-HF-Bill-To": "test-org"}
def test_hf_request_token_keeps_browser_user_precedence(monkeypatch):
class Request:
headers = {"Authorization": "Bearer browser-token"}
cookies = {"hf_access_token": "cookie-token"}
monkeypatch.setenv("HF_TOKEN", "server-token")
assert resolve_hf_request_token(Request()) == "browser-token"
def test_hf_request_token_does_not_use_cached_login(monkeypatch):
import huggingface_hub
class Request:
headers = {}
cookies = {}
monkeypatch.delenv("HF_TOKEN", raising=False)
monkeypatch.setattr(huggingface_hub, "get_token", lambda: "cached-token")
assert resolve_hf_request_token(Request()) is None
|