abidlabs HF Staff commited on
Commit
092df19
·
1 Parent(s): 5038080

Fail fast on Router-ineligible API tokens

Browse files
Files changed (1) hide show
  1. backend/routes/v1_responses.py +106 -0
backend/routes/v1_responses.py CHANGED
@@ -14,11 +14,13 @@ pauses with ``status="incomplete"`` until /approvals is called.
14
  """
15
 
16
  import asyncio
 
17
  import logging
18
  import time
19
  from datetime import UTC, datetime
20
  from typing import Any
21
 
 
22
  from dependencies import get_api_user
23
  from fastapi import APIRouter, Depends, Request
24
  from fastapi.responses import JSONResponse, StreamingResponse
@@ -49,6 +51,7 @@ from session_manager import (
49
  from usage import build_usage_response
50
 
51
  from agent.core.hf_tokens import resolve_hf_request_token
 
52
 
53
  logger = logging.getLogger(__name__)
54
 
@@ -56,7 +59,10 @@ router = APIRouter(prefix="/v1", tags=["v1"])
56
 
57
  _SSE_KEEPALIVE_SECONDS = 15
58
  _COLLECTION_FETCH_TIMEOUT_SECONDS = 10.0
 
 
59
  _background_v1_tasks: set[asyncio.Task] = set()
 
60
 
61
 
62
  def _spawn(coro) -> None:
@@ -73,6 +79,105 @@ def _now() -> datetime:
73
  return datetime.now(UTC)
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # ---------------------------------------------------------------------------
77
  # Shared helpers
78
  # ---------------------------------------------------------------------------
@@ -487,6 +592,7 @@ async def _resolve_session_for_create(
487
  except Exception:
488
  raise V1APIError(400, f"Unknown model: {body.model}", code="model_not_found")
489
  model = body.model or _default_model_for_user(user)
 
490
  try:
491
  session_id = await session_manager.create_session(
492
  user_id=user["user_id"],
 
14
  """
15
 
16
  import asyncio
17
+ import hashlib
18
  import logging
19
  import time
20
  from datetime import UTC, datetime
21
  from typing import Any
22
 
23
+ import httpx
24
  from dependencies import get_api_user
25
  from fastapi import APIRouter, Depends, Request
26
  from fastapi.responses import JSONResponse, StreamingResponse
 
51
  from usage import build_usage_response
52
 
53
  from agent.core.hf_tokens import resolve_hf_request_token
54
+ from agent.core.model_ids import HF_ROUTER_BASE_URL, strip_huggingface_model_prefix
55
 
56
  logger = logging.getLogger(__name__)
57
 
 
59
 
60
  _SSE_KEEPALIVE_SECONDS = 15
61
  _COLLECTION_FETCH_TIMEOUT_SECONDS = 10.0
62
+ _ROUTER_PREFLIGHT_TIMEOUT_SECONDS = 20.0
63
+ _ROUTER_PREFLIGHT_CACHE_TTL_SECONDS = 300.0
64
  _background_v1_tasks: set[asyncio.Task] = set()
65
+ _router_preflight_cache: dict[tuple[str, str], float] = {}
66
 
67
 
68
  def _spawn(coro) -> None:
 
79
  return datetime.now(UTC)
80
 
81
 
82
+ def _token_fingerprint(token: str) -> str:
83
+ return hashlib.sha256(token.encode("utf-8")).hexdigest()[:16]
84
+
85
+
86
+ def _router_preflight_cache_key(token: str, model: str) -> tuple[str, str]:
87
+ normalized_model = strip_huggingface_model_prefix(model) or model
88
+ return (_token_fingerprint(token), normalized_model)
89
+
90
+
91
+ async def _preflight_hf_router_access(model: str, hf_token: str | None) -> None:
92
+ """Fail fast when a /v1 Bearer token cannot call HF Router.
93
+
94
+ ``whoami-v2`` accepts plain user access tokens, but tokens without the
95
+ Inference Providers permission still fail later inside the agent loop with
96
+ a generic auth message. A one-token probe gives API clients an immediate,
97
+ actionable 403 before we create a response/session.
98
+ """
99
+ if not hf_token:
100
+ raise V1APIError(
101
+ 401,
102
+ "Missing Hugging Face token. Pass 'Authorization: Bearer hf_...'.",
103
+ code="invalid_api_key",
104
+ error_type="authentication_error",
105
+ )
106
+
107
+ cache_key = _router_preflight_cache_key(hf_token, model)
108
+ now = time.monotonic()
109
+ cached_until = _router_preflight_cache.get(cache_key)
110
+ if cached_until and cached_until > now:
111
+ return
112
+ if cached_until:
113
+ _router_preflight_cache.pop(cache_key, None)
114
+
115
+ normalized_model = cache_key[1]
116
+ payload = {
117
+ "model": f"openai/{normalized_model}",
118
+ "messages": [{"role": "user", "content": "Reply with OK."}],
119
+ "max_tokens": 1,
120
+ }
121
+ try:
122
+ async with httpx.AsyncClient(
123
+ timeout=_ROUTER_PREFLIGHT_TIMEOUT_SECONDS
124
+ ) as client:
125
+ response = await client.post(
126
+ f"{HF_ROUTER_BASE_URL.rstrip('/')}/chat/completions",
127
+ headers={"Authorization": f"Bearer {hf_token}"},
128
+ json=payload,
129
+ )
130
+ except httpx.HTTPError as e:
131
+ logger.warning("HF Router preflight skipped for %s: %s", normalized_model, e)
132
+ return
133
+
134
+ if response.status_code < 400:
135
+ _router_preflight_cache[cache_key] = (
136
+ time.monotonic() + _ROUTER_PREFLIGHT_CACHE_TTL_SECONDS
137
+ )
138
+ return
139
+
140
+ try:
141
+ error_body = response.json()
142
+ except ValueError:
143
+ error_body = {}
144
+ raw_message = str(
145
+ error_body.get("error")
146
+ or error_body.get("message")
147
+ or response.text
148
+ or "HF Router rejected the token."
149
+ )
150
+ err_lower = raw_message.lower()
151
+ if response.status_code in {401, 403} and (
152
+ "insufficient permissions" in err_lower
153
+ or "authentication" in err_lower
154
+ or "unauthorized" in err_lower
155
+ ):
156
+ raise V1APIError(
157
+ 403,
158
+ (
159
+ "Your Hugging Face token is valid, but it cannot call "
160
+ "Inference Providers through HF Router. Create or use a user "
161
+ "access token with Inference Providers permission, then retry."
162
+ ),
163
+ code="inference_provider_permission_required",
164
+ error_type="authentication_error",
165
+ )
166
+ if response.status_code in {401, 403}:
167
+ raise V1APIError(
168
+ response.status_code,
169
+ raw_message,
170
+ code="router_auth_failed",
171
+ error_type="authentication_error",
172
+ )
173
+ if 400 <= response.status_code < 500:
174
+ raise V1APIError(
175
+ response.status_code,
176
+ raw_message,
177
+ code="router_preflight_failed",
178
+ )
179
+
180
+
181
  # ---------------------------------------------------------------------------
182
  # Shared helpers
183
  # ---------------------------------------------------------------------------
 
592
  except Exception:
593
  raise V1APIError(400, f"Unknown model: {body.model}", code="model_not_found")
594
  model = body.model or _default_model_for_user(user)
595
+ await _preflight_hf_router_access(model, hf_token)
596
  try:
597
  session_id = await session_manager.create_session(
598
  user_id=user["user_id"],