Fix HF inference URL routing for org/model format models
Browse filesThe condition '/ in m.split(\"/\")[0]' was always False (split removes
the slash), so only RayMelius/ models used the direct inference API —
all other org/model names (Qwen/, meta-llama/, etc.) hit the router
and got 400 'not supported by any provider'.
Fix: use direct inference API for any model containing a slash.
Also: replace Qwen/Qwen2.5-7B-Instruct-1M with Qwen/Qwen2.5-7B-Instruct
in the HF model list (-1M variant not available on inference API).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- dashboard/dashboard.py +3 -3
dashboard/dashboard.py
CHANGED
|
@@ -46,7 +46,7 @@ GROQ_MODELS = [
|
|
| 46 |
]
|
| 47 |
HF_MODELS = [
|
| 48 |
"RayMelius/stockex-analyst",
|
| 49 |
-
"Qwen/Qwen2.5-7B-Instruct
|
| 50 |
"meta-llama/Llama-3.1-8B-Instruct",
|
| 51 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 52 |
]
|
|
@@ -144,8 +144,8 @@ def _call_llm(prompt, force_provider=None, force_model=None):
|
|
| 144 |
if not HF_TOKEN:
|
| 145 |
return None, "HuggingFace not configured (HF_TOKEN not set)"
|
| 146 |
m = model or HF_MODEL
|
| 147 |
-
# Use direct inference API for
|
| 148 |
-
if
|
| 149 |
url = f"https://api-inference.huggingface.co/models/{m}/v1/chat/completions"
|
| 150 |
else:
|
| 151 |
url = HF_URL
|
|
|
|
| 46 |
]
|
| 47 |
HF_MODELS = [
|
| 48 |
"RayMelius/stockex-analyst",
|
| 49 |
+
"Qwen/Qwen2.5-7B-Instruct",
|
| 50 |
"meta-llama/Llama-3.1-8B-Instruct",
|
| 51 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 52 |
]
|
|
|
|
| 144 |
if not HF_TOKEN:
|
| 145 |
return None, "HuggingFace not configured (HF_TOKEN not set)"
|
| 146 |
m = model or HF_MODEL
|
| 147 |
+
# Use direct inference API for any org/model format; router for bare model names
|
| 148 |
+
if "/" in m:
|
| 149 |
url = f"https://api-inference.huggingface.co/models/{m}/v1/chat/completions"
|
| 150 |
else:
|
| 151 |
url = HF_URL
|