Spaces:

agents-course
/

Final_Assignment_Template

Running

Final_Assignment_Template / inference_client_factory.py

Upload 7 files

524e3cf verified 6 days ago

1.12 kB

	"""Build InferenceClient with a provider that accepts the user's HF token."""

	from __future__ import annotations

	import os

	from huggingface_hub import InferenceClient


	def inference_client_kwargs(token: str) -> dict:
	"""
	Default: no ``provider`` → the library uses ``auto``: first provider for this
	model per your https://hf.co/settings/inference-providers order.

	Forcing ``hf-inference`` breaks many chat models (e.g. Qwen2.5-7B-Instruct is only on
	together / featherless-ai — the router then returns 404 for …/hf-inference/models/…).

	Set ``HF_INFERENCE_PROVIDER`` to pin one provider (e.g. ``together``, ``sambanova``)
	or ``auto`` explicitly. Use ``hf-inference`` only for models that actually list it.
	"""
	raw = os.environ.get("HF_INFERENCE_PROVIDER")
	if raw is None:
	return {"token": token}
	r = raw.strip().lower()
	if r in ("", "auto"):
	return {"token": token}
	return {"token": token, "provider": r}


	def make_inference_client(token: str) -> InferenceClient:
	return InferenceClient(**inference_client_kwargs(token))