Fix HF provider: require token, hide from menu if absent, fix stuck provider
Browse files- HFInferenceClient.__init__ now raises ValueError if no token found
(checks HF_TOKEN, HUGGINGFACE_TOKEN, HF_API_TOKEN); prevents silently
creating a broken client that gets stuck as active provider
- /llm/providers: HF models only shown when token is available
- /llm/provider: ValueError from missing credentials returns 400 not 500
- Fix Gemini label: "1.5 Flash" (matches actual default model)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/api/routes.py +13 -5
- src/soci/engine/llm.py +9 -3
src/soci/api/routes.py
CHANGED
|
@@ -279,11 +279,17 @@ async def get_llm_providers():
|
|
| 279 |
if os.environ.get("GROQ_API_KEY"):
|
| 280 |
providers.append({"id": "groq", "label": "Groq Llama 8B", "icon": "β‘", "model": ""})
|
| 281 |
if os.environ.get("GEMINI_API_KEY"):
|
| 282 |
-
providers.append({"id": "gemini", "label": "Gemini
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
providers.append({"id": "ollama", "label": "Ollama (local)", "icon": "π¦", "model": ""})
|
| 288 |
return {"current": current, "current_model": current_model, "providers": providers}
|
| 289 |
|
|
@@ -318,6 +324,8 @@ async def set_llm_provider(req: SwitchProviderRequest):
|
|
| 318 |
try:
|
| 319 |
await switch_llm_provider(req.provider, model=req.model or None)
|
| 320 |
return {"ok": True, "provider": req.provider, "model": req.model}
|
|
|
|
|
|
|
| 321 |
except Exception as e:
|
| 322 |
raise HTTPException(status_code=500, detail=str(e))
|
| 323 |
|
|
|
|
| 279 |
if os.environ.get("GROQ_API_KEY"):
|
| 280 |
providers.append({"id": "groq", "label": "Groq Llama 8B", "icon": "β‘", "model": ""})
|
| 281 |
if os.environ.get("GEMINI_API_KEY"):
|
| 282 |
+
providers.append({"id": "gemini", "label": "Gemini 1.5 Flash", "icon": "β¦", "model": ""})
|
| 283 |
+
has_hf = bool(
|
| 284 |
+
os.environ.get("HF_TOKEN")
|
| 285 |
+
or os.environ.get("HUGGINGFACE_TOKEN")
|
| 286 |
+
or os.environ.get("HF_API_TOKEN")
|
| 287 |
+
)
|
| 288 |
+
if has_hf:
|
| 289 |
+
providers.append({"id": "hf", "model": "HuggingFaceH4/zephyr-7b-beta", "label": "HF Zephyr 7B", "icon": "π€"})
|
| 290 |
+
providers.append({"id": "hf", "model": "Qwen/Qwen2.5-7B-Instruct", "label": "HF Qwen 2.5 7B", "icon": "π€"})
|
| 291 |
+
providers.append({"id": "hf", "model": "meta-llama/Llama-3.2-3B-Instruct", "label": "HF Llama 3.2 3B", "icon": "π€"})
|
| 292 |
+
providers.append({"id": "hf", "model": "mistralai/Mistral-7B-Instruct-v0.3", "label": "HF Mistral 7B", "icon": "π€"})
|
| 293 |
providers.append({"id": "ollama", "label": "Ollama (local)", "icon": "π¦", "model": ""})
|
| 294 |
return {"current": current, "current_model": current_model, "providers": providers}
|
| 295 |
|
|
|
|
| 324 |
try:
|
| 325 |
await switch_llm_provider(req.provider, model=req.model or None)
|
| 326 |
return {"ok": True, "provider": req.provider, "model": req.model}
|
| 327 |
+
except ValueError as e:
|
| 328 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 329 |
except Exception as e:
|
| 330 |
raise HTTPException(status_code=500, detail=str(e))
|
| 331 |
|
src/soci/engine/llm.py
CHANGED
|
@@ -951,10 +951,16 @@ class HFInferenceClient:
|
|
| 951 |
default_model: str = MODEL_HF_SMOL,
|
| 952 |
max_retries: int = 3,
|
| 953 |
) -> None:
|
| 954 |
-
self.api_key =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 955 |
if not self.api_key:
|
| 956 |
-
|
| 957 |
-
"
|
|
|
|
| 958 |
)
|
| 959 |
self.default_model = default_model
|
| 960 |
self.max_retries = max_retries
|
|
|
|
| 951 |
default_model: str = MODEL_HF_SMOL,
|
| 952 |
max_retries: int = 3,
|
| 953 |
) -> None:
|
| 954 |
+
self.api_key = (
|
| 955 |
+
api_key
|
| 956 |
+
or os.environ.get("HF_TOKEN", "")
|
| 957 |
+
or os.environ.get("HUGGINGFACE_TOKEN", "")
|
| 958 |
+
or os.environ.get("HF_API_TOKEN", "")
|
| 959 |
+
)
|
| 960 |
if not self.api_key:
|
| 961 |
+
raise ValueError(
|
| 962 |
+
"No HuggingFace token found. Set HF_TOKEN (or HUGGINGFACE_TOKEN) "
|
| 963 |
+
"to a token with 'Inference Providers (Write)' permission."
|
| 964 |
)
|
| 965 |
self.default_model = default_model
|
| 966 |
self.max_retries = max_retries
|