Executor-Tyrant-Framework Claude Sonnet 4.6 commited on
Commit
0bce5fd
·
1 Parent(s): 126d8cd

Fix HF fallback to catch all 4xx errors; add model_id override to _call_openrouter

Browse files

- HF fallback now catches any status_code < 500 (not just 402/404/503)
- 400 "model not valid/not on serverless" was crashing instead of falling back
- _call_openrouter() accepts optional model_id param so HF fallback passes
CODEMINE_MODEL_ID (OpenRouter format) instead of HF_MODEL_ID (hub format)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. model_client.py +13 -8
model_client.py CHANGED
@@ -1,9 +1,9 @@
1
  # ---- Changelog ----
2
  # [2026-04-16] Claude (Sonnet 4.6) — Add HuggingFace Inference API as primary provider
3
- # What: "huggingface" provider added; auto-fallback to OpenRouter on 402 (credits exhausted)
4
  # Why: Leverage HF more; OpenRouter stays as backup. Explicit user request.
5
- # How: Same OpenAI-compat path as OpenRouter. _call_huggingface() catches 402 and retries
6
- # via _call_openrouter(). HF_MODEL_ID env var for HF model name (format differs from OR).
7
  # [2026-03-29] Switchblade (TQB / Block E) — Anthropic model client
8
  # What: Claude API client with retry logic, replacing HuggingFace InferenceClient
9
  # Why: PRD Block E — swap from Kimi K2.5 (HF) to Claude (Anthropic SDK)
@@ -185,16 +185,19 @@ def _convert_messages_to_openai(messages: list) -> list:
185
  return converted
186
 
187
 
188
- def _call_openrouter(client, system_prompt, messages, tools, max_retries, max_tokens):
189
  """OpenRouter call via OpenAI-compatible SDK.
190
 
191
  OpenRouter supports tool_use for Claude and other models via the
192
  standard OpenAI tools format. We convert Anthropic-style tool defs
193
  to OpenAI format and wrap the response to match Anthropic's structure.
 
 
194
  """
195
  from openai import APITimeoutError, APIConnectionError, APIStatusError
196
 
197
- model_id = get_model_id()
 
198
  last_error = None
199
 
200
  # Convert Anthropic tool format to OpenAI format
@@ -262,15 +265,17 @@ def _call_huggingface(client, system_prompt, messages, tools, max_retries, max_t
262
  return _wrap_openai_response(response)
263
 
264
  except APIStatusError as e:
265
- if e.status_code == 402:
266
  logger.warning(
267
- "HF Inference API 402 (credits/quota). Falling back to OpenRouter."
 
268
  )
269
  or_client = OpenAI(
270
  base_url="https://openrouter.ai/api/v1",
271
  api_key=os.getenv("OPENROUTER_API_KEY"),
272
  )
273
- return _call_openrouter(or_client, system_prompt, messages, tools, max_retries, max_tokens)
 
274
  elif e.status_code >= 500:
275
  last_error = e
276
  logger.warning("HF %d error on attempt %d/%d: %s", e.status_code, attempt + 1, max_retries, e)
 
1
  # ---- Changelog ----
2
  # [2026-04-16] Claude (Sonnet 4.6) — Add HuggingFace Inference API as primary provider
3
+ # What: "huggingface" provider added; auto-fallback to OpenRouter on 402/404/503
4
  # Why: Leverage HF more; OpenRouter stays as backup. Explicit user request.
5
+ # How: Same OpenAI-compat path as OpenRouter. _call_huggingface() catches 402/404/503 and
6
+ # retries via _call_openrouter(). HF_MODEL_ID env var (format differs from OpenRouter).
7
  # [2026-03-29] Switchblade (TQB / Block E) — Anthropic model client
8
  # What: Claude API client with retry logic, replacing HuggingFace InferenceClient
9
  # Why: PRD Block E — swap from Kimi K2.5 (HF) to Claude (Anthropic SDK)
 
185
  return converted
186
 
187
 
188
+ def _call_openrouter(client, system_prompt, messages, tools, max_retries, max_tokens, model_id=None):
189
  """OpenRouter call via OpenAI-compatible SDK.
190
 
191
  OpenRouter supports tool_use for Claude and other models via the
192
  standard OpenAI tools format. We convert Anthropic-style tool defs
193
  to OpenAI format and wrap the response to match Anthropic's structure.
194
+
195
+ model_id: override for fallback callers (e.g. HF fallback needs OR model format).
196
  """
197
  from openai import APITimeoutError, APIConnectionError, APIStatusError
198
 
199
+ if model_id is None:
200
+ model_id = get_model_id()
201
  last_error = None
202
 
203
  # Convert Anthropic tool format to OpenAI format
 
265
  return _wrap_openai_response(response)
266
 
267
  except APIStatusError as e:
268
+ if e.status_code < 500:
269
  logger.warning(
270
+ "HF Inference API %d (model unavailable/not on serverless/credits). Falling back to OpenRouter.",
271
+ e.status_code,
272
  )
273
  or_client = OpenAI(
274
  base_url="https://openrouter.ai/api/v1",
275
  api_key=os.getenv("OPENROUTER_API_KEY"),
276
  )
277
+ or_model = os.getenv("CODEMINE_MODEL_ID", "qwen/qwen3-coder")
278
+ return _call_openrouter(or_client, system_prompt, messages, tools, max_retries, max_tokens, model_id=or_model)
279
  elif e.status_code >= 500:
280
  last_error = e
281
  logger.warning("HF %d error on attempt %d/%d: %s", e.status_code, attempt + 1, max_retries, e)