File size: 1,115 Bytes
524e3cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""Build InferenceClient with a provider that accepts the user's HF token."""

from __future__ import annotations

import os

from huggingface_hub import InferenceClient


def inference_client_kwargs(token: str) -> dict:
    """
    Default: **no** ``provider`` → the library uses ``auto``: first provider for this
    model per your https://hf.co/settings/inference-providers order.

    Forcing ``hf-inference`` breaks many chat models (e.g. Qwen2.5-7B-Instruct is only on
    together / featherless-ai — the router then returns **404** for …/hf-inference/models/…).

    Set ``HF_INFERENCE_PROVIDER`` to pin one provider (e.g. ``together``, ``sambanova``)
    or ``auto`` explicitly. Use ``hf-inference`` only for models that actually list it.
    """
    raw = os.environ.get("HF_INFERENCE_PROVIDER")
    if raw is None:
        return {"token": token}
    r = raw.strip().lower()
    if r in ("", "auto"):
        return {"token": token}
    return {"token": token, "provider": r}


def make_inference_client(token: str) -> InferenceClient:
    return InferenceClient(**inference_client_kwargs(token))