Spaces:
Sleeping
Sleeping
Upload hf_inference.py
Browse files- core/hf_inference.py +11 -6
core/hf_inference.py
CHANGED
|
@@ -335,12 +335,17 @@ class HFInferenceClient:
|
|
| 335 |
if not self.token:
|
| 336 |
raise ValueError("HF_TOKEN is required for inference")
|
| 337 |
|
| 338 |
-
#
|
| 339 |
-
#
|
| 340 |
-
#
|
| 341 |
-
#
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
def get_model_for_agent(self, agent_name: str) -> str:
|
| 346 |
"""Get the appropriate model for an agent."""
|
|
|
|
| 335 |
if not self.token:
|
| 336 |
raise ValueError("HF_TOKEN is required for inference")
|
| 337 |
|
| 338 |
+
# Use explicit provider="hf-inference" to route through HF's own
|
| 339 |
+
# serverless inference. Without this, huggingface_hub >=0.28 picks
|
| 340 |
+
# providers via "auto" which can fail with Request ID errors when
|
| 341 |
+
# the model is not available on the auto-selected third-party provider.
|
| 342 |
+
try:
|
| 343 |
+
self.sync_client = InferenceClient(token=self.token, provider="hf-inference")
|
| 344 |
+
self.async_client = AsyncInferenceClient(token=self.token, provider="hf-inference")
|
| 345 |
+
except TypeError:
|
| 346 |
+
# Older huggingface_hub (<0.28) doesn't support provider param
|
| 347 |
+
self.sync_client = InferenceClient(token=self.token)
|
| 348 |
+
self.async_client = AsyncInferenceClient(token=self.token)
|
| 349 |
|
| 350 |
def get_model_for_agent(self, agent_name: str) -> str:
|
| 351 |
"""Get the appropriate model for an agent."""
|