Final_Assignment_Template / inference_client_factory.py
W01fAI's picture
Upload 7 files
524e3cf verified
raw
history blame
1.12 kB
"""Build InferenceClient with a provider that accepts the user's HF token."""
from __future__ import annotations
import os
from huggingface_hub import InferenceClient
def inference_client_kwargs(token: str) -> dict:
"""
Default: **no** ``provider`` β†’ the library uses ``auto``: first provider for this
model per your https://hf.co/settings/inference-providers order.
Forcing ``hf-inference`` breaks many chat models (e.g. Qwen2.5-7B-Instruct is only on
together / featherless-ai β€” the router then returns **404** for …/hf-inference/models/…).
Set ``HF_INFERENCE_PROVIDER`` to pin one provider (e.g. ``together``, ``sambanova``)
or ``auto`` explicitly. Use ``hf-inference`` only for models that actually list it.
"""
raw = os.environ.get("HF_INFERENCE_PROVIDER")
if raw is None:
return {"token": token}
r = raw.strip().lower()
if r in ("", "auto"):
return {"token": token}
return {"token": token, "provider": r}
def make_inference_client(token: str) -> InferenceClient:
return InferenceClient(**inference_client_kwargs(token))