RayMelius Claude Sonnet 4.6 commited on
Commit
0b7d7ac
·
1 Parent(s): ff275e2

Fix HF inference URL routing for org/model format models

Browse files

The condition '/ in m.split(\"/\")[0]' was always False (split removes
the slash), so only RayMelius/ models used the direct inference API —
all other org/model names (Qwen/, meta-llama/, etc.) hit the router
and got 400 'not supported by any provider'.

Fix: use direct inference API for any model containing a slash.
Also: replace Qwen/Qwen2.5-7B-Instruct-1M with Qwen/Qwen2.5-7B-Instruct
in the HF model list (-1M variant not available on inference API).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. dashboard/dashboard.py +3 -3
dashboard/dashboard.py CHANGED
@@ -46,7 +46,7 @@ GROQ_MODELS = [
46
  ]
47
  HF_MODELS = [
48
  "RayMelius/stockex-analyst",
49
- "Qwen/Qwen2.5-7B-Instruct-1M",
50
  "meta-llama/Llama-3.1-8B-Instruct",
51
  "mistralai/Mistral-7B-Instruct-v0.3",
52
  ]
@@ -144,8 +144,8 @@ def _call_llm(prompt, force_provider=None, force_model=None):
144
  if not HF_TOKEN:
145
  return None, "HuggingFace not configured (HF_TOKEN not set)"
146
  m = model or HF_MODEL
147
- # Use direct inference API for custom models, router for known public models
148
- if m.startswith("RayMelius/") or "/" in m.split("/")[0]:
149
  url = f"https://api-inference.huggingface.co/models/{m}/v1/chat/completions"
150
  else:
151
  url = HF_URL
 
46
  ]
47
  HF_MODELS = [
48
  "RayMelius/stockex-analyst",
49
+ "Qwen/Qwen2.5-7B-Instruct",
50
  "meta-llama/Llama-3.1-8B-Instruct",
51
  "mistralai/Mistral-7B-Instruct-v0.3",
52
  ]
 
144
  if not HF_TOKEN:
145
  return None, "HuggingFace not configured (HF_TOKEN not set)"
146
  m = model or HF_MODEL
147
+ # Use direct inference API for any org/model format; router for bare model names
148
+ if "/" in m:
149
  url = f"https://api-inference.huggingface.co/models/{m}/v1/chat/completions"
150
  else:
151
  url = HF_URL