riazmo commited on
Commit
4abbd8b
·
verified ·
1 Parent(s): bc137d7

Upload hf_inference.py

Browse files
Files changed (1) hide show
  1. core/hf_inference.py +11 -6
core/hf_inference.py CHANGED
@@ -335,12 +335,17 @@ class HFInferenceClient:
335
  if not self.token:
336
  raise ValueError("HF_TOKEN is required for inference")
337
 
338
- # Do NOT set base_url the huggingface_hub library routes requests
339
- # to the correct endpoint automatically based on the model parameter.
340
- # Setting base_url overrides per-model routing and causes API errors
341
- # when different models are used across agents.
342
- self.sync_client = InferenceClient(token=self.token)
343
- self.async_client = AsyncInferenceClient(token=self.token)
 
 
 
 
 
344
 
345
  def get_model_for_agent(self, agent_name: str) -> str:
346
  """Get the appropriate model for an agent."""
 
335
  if not self.token:
336
  raise ValueError("HF_TOKEN is required for inference")
337
 
338
+ # Use explicit provider="hf-inference" to route through HF's own
339
+ # serverless inference. Without this, huggingface_hub >=0.28 picks
340
+ # providers via "auto" which can fail with Request ID errors when
341
+ # the model is not available on the auto-selected third-party provider.
342
+ try:
343
+ self.sync_client = InferenceClient(token=self.token, provider="hf-inference")
344
+ self.async_client = AsyncInferenceClient(token=self.token, provider="hf-inference")
345
+ except TypeError:
346
+ # Older huggingface_hub (<0.28) doesn't support provider param
347
+ self.sync_client = InferenceClient(token=self.token)
348
+ self.async_client = AsyncInferenceClient(token=self.token)
349
 
350
  def get_model_for_agent(self, agent_name: str) -> str:
351
  """Get the appropriate model for an agent."""