| """Build InferenceClient with a provider that accepts the user's HF token.""" | |
| from __future__ import annotations | |
| import os | |
| from huggingface_hub import InferenceClient | |
| def inference_client_kwargs(token: str) -> dict: | |
| """ | |
| Default: **no** ``provider`` β the library uses ``auto``: first provider for this | |
| model per your https://hf.co/settings/inference-providers order. | |
| Forcing ``hf-inference`` breaks many chat models (e.g. Qwen2.5-7B-Instruct is only on | |
| together / featherless-ai β the router then returns **404** for β¦/hf-inference/models/β¦). | |
| Set ``HF_INFERENCE_PROVIDER`` to pin one provider (e.g. ``together``, ``sambanova``) | |
| or ``auto`` explicitly. Use ``hf-inference`` only for models that actually list it. | |
| """ | |
| raw = os.environ.get("HF_INFERENCE_PROVIDER") | |
| if raw is None: | |
| return {"token": token} | |
| r = raw.strip().lower() | |
| if r in ("", "auto"): | |
| return {"token": token} | |
| return {"token": token, "provider": r} | |
| def make_inference_client(token: str) -> InferenceClient: | |
| return InferenceClient(**inference_client_kwargs(token)) | |