Commit ·
0bce5fd
1
Parent(s): 126d8cd
Fix HF fallback to catch all 4xx errors; add model_id override to _call_openrouter
Browse files- HF fallback now catches any status_code < 500 (not just 402/404/503)
- 400 "model not valid/not on serverless" was crashing instead of falling back
- _call_openrouter() accepts optional model_id param so HF fallback passes
CODEMINE_MODEL_ID (OpenRouter format) instead of HF_MODEL_ID (hub format)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- model_client.py +13 -8
model_client.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
# ---- Changelog ----
|
| 2 |
# [2026-04-16] Claude (Sonnet 4.6) — Add HuggingFace Inference API as primary provider
|
| 3 |
-
# What: "huggingface" provider added; auto-fallback to OpenRouter on 402
|
| 4 |
# Why: Leverage HF more; OpenRouter stays as backup. Explicit user request.
|
| 5 |
-
# How: Same OpenAI-compat path as OpenRouter. _call_huggingface() catches 402 and
|
| 6 |
-
# via _call_openrouter(). HF_MODEL_ID env var
|
| 7 |
# [2026-03-29] Switchblade (TQB / Block E) — Anthropic model client
|
| 8 |
# What: Claude API client with retry logic, replacing HuggingFace InferenceClient
|
| 9 |
# Why: PRD Block E — swap from Kimi K2.5 (HF) to Claude (Anthropic SDK)
|
|
@@ -185,16 +185,19 @@ def _convert_messages_to_openai(messages: list) -> list:
|
|
| 185 |
return converted
|
| 186 |
|
| 187 |
|
| 188 |
-
def _call_openrouter(client, system_prompt, messages, tools, max_retries, max_tokens):
|
| 189 |
"""OpenRouter call via OpenAI-compatible SDK.
|
| 190 |
|
| 191 |
OpenRouter supports tool_use for Claude and other models via the
|
| 192 |
standard OpenAI tools format. We convert Anthropic-style tool defs
|
| 193 |
to OpenAI format and wrap the response to match Anthropic's structure.
|
|
|
|
|
|
|
| 194 |
"""
|
| 195 |
from openai import APITimeoutError, APIConnectionError, APIStatusError
|
| 196 |
|
| 197 |
-
model_id
|
|
|
|
| 198 |
last_error = None
|
| 199 |
|
| 200 |
# Convert Anthropic tool format to OpenAI format
|
|
@@ -262,15 +265,17 @@ def _call_huggingface(client, system_prompt, messages, tools, max_retries, max_t
|
|
| 262 |
return _wrap_openai_response(response)
|
| 263 |
|
| 264 |
except APIStatusError as e:
|
| 265 |
-
if e.status_code
|
| 266 |
logger.warning(
|
| 267 |
-
"HF Inference API
|
|
|
|
| 268 |
)
|
| 269 |
or_client = OpenAI(
|
| 270 |
base_url="https://openrouter.ai/api/v1",
|
| 271 |
api_key=os.getenv("OPENROUTER_API_KEY"),
|
| 272 |
)
|
| 273 |
-
|
|
|
|
| 274 |
elif e.status_code >= 500:
|
| 275 |
last_error = e
|
| 276 |
logger.warning("HF %d error on attempt %d/%d: %s", e.status_code, attempt + 1, max_retries, e)
|
|
|
|
| 1 |
# ---- Changelog ----
|
| 2 |
# [2026-04-16] Claude (Sonnet 4.6) — Add HuggingFace Inference API as primary provider
|
| 3 |
+
# What: "huggingface" provider added; auto-fallback to OpenRouter on 402/404/503
|
| 4 |
# Why: Leverage HF more; OpenRouter stays as backup. Explicit user request.
|
| 5 |
+
# How: Same OpenAI-compat path as OpenRouter. _call_huggingface() catches 402/404/503 and
|
| 6 |
+
# retries via _call_openrouter(). HF_MODEL_ID env var (format differs from OpenRouter).
|
| 7 |
# [2026-03-29] Switchblade (TQB / Block E) — Anthropic model client
|
| 8 |
# What: Claude API client with retry logic, replacing HuggingFace InferenceClient
|
| 9 |
# Why: PRD Block E — swap from Kimi K2.5 (HF) to Claude (Anthropic SDK)
|
|
|
|
| 185 |
return converted
|
| 186 |
|
| 187 |
|
| 188 |
+
def _call_openrouter(client, system_prompt, messages, tools, max_retries, max_tokens, model_id=None):
|
| 189 |
"""OpenRouter call via OpenAI-compatible SDK.
|
| 190 |
|
| 191 |
OpenRouter supports tool_use for Claude and other models via the
|
| 192 |
standard OpenAI tools format. We convert Anthropic-style tool defs
|
| 193 |
to OpenAI format and wrap the response to match Anthropic's structure.
|
| 194 |
+
|
| 195 |
+
model_id: override for fallback callers (e.g. HF fallback needs OR model format).
|
| 196 |
"""
|
| 197 |
from openai import APITimeoutError, APIConnectionError, APIStatusError
|
| 198 |
|
| 199 |
+
if model_id is None:
|
| 200 |
+
model_id = get_model_id()
|
| 201 |
last_error = None
|
| 202 |
|
| 203 |
# Convert Anthropic tool format to OpenAI format
|
|
|
|
| 265 |
return _wrap_openai_response(response)
|
| 266 |
|
| 267 |
except APIStatusError as e:
|
| 268 |
+
if e.status_code < 500:
|
| 269 |
logger.warning(
|
| 270 |
+
"HF Inference API %d (model unavailable/not on serverless/credits). Falling back to OpenRouter.",
|
| 271 |
+
e.status_code,
|
| 272 |
)
|
| 273 |
or_client = OpenAI(
|
| 274 |
base_url="https://openrouter.ai/api/v1",
|
| 275 |
api_key=os.getenv("OPENROUTER_API_KEY"),
|
| 276 |
)
|
| 277 |
+
or_model = os.getenv("CODEMINE_MODEL_ID", "qwen/qwen3-coder")
|
| 278 |
+
return _call_openrouter(or_client, system_prompt, messages, tools, max_retries, max_tokens, model_id=or_model)
|
| 279 |
elif e.status_code >= 500:
|
| 280 |
last_error = e
|
| 281 |
logger.warning("HF %d error on attempt %d/%d: %s", e.status_code, attempt + 1, max_retries, e)
|