File size: 1,115 Bytes
524e3cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | """Build InferenceClient with a provider that accepts the user's HF token."""
from __future__ import annotations
import os
from huggingface_hub import InferenceClient
def inference_client_kwargs(token: str) -> dict:
"""
Default: **no** ``provider`` → the library uses ``auto``: first provider for this
model per your https://hf.co/settings/inference-providers order.
Forcing ``hf-inference`` breaks many chat models (e.g. Qwen2.5-7B-Instruct is only on
together / featherless-ai — the router then returns **404** for …/hf-inference/models/…).
Set ``HF_INFERENCE_PROVIDER`` to pin one provider (e.g. ``together``, ``sambanova``)
or ``auto`` explicitly. Use ``hf-inference`` only for models that actually list it.
"""
raw = os.environ.get("HF_INFERENCE_PROVIDER")
if raw is None:
return {"token": token}
r = raw.strip().lower()
if r in ("", "auto"):
return {"token": token}
return {"token": token, "provider": r}
def make_inference_client(token: str) -> InferenceClient:
return InferenceClient(**inference_client_kwargs(token))
|