Spaces:

riazmo
/

Design-System-Extractor-2

Running

riazmo commited on Feb 1

Commit

4abbd8b

verified ·

1 Parent(s): bc137d7

Upload hf_inference.py

Files changed (1) hide show

core/hf_inference.py CHANGED Viewed

@@ -335,12 +335,17 @@ class HFInferenceClient:
         if not self.token:
             raise ValueError("HF_TOKEN is required for inference")
-        # Do NOT set base_url — the huggingface_hub library routes requests
-        # to the correct endpoint automatically based on the model parameter.
-        # Setting base_url overrides per-model routing and causes API errors
-        # when different models are used across agents.
-        self.sync_client = InferenceClient(token=self.token)
-        self.async_client = AsyncInferenceClient(token=self.token)
     def get_model_for_agent(self, agent_name: str) -> str:
         """Get the appropriate model for an agent."""

         if not self.token:
             raise ValueError("HF_TOKEN is required for inference")
+        # Use explicit provider="hf-inference" to route through HF's own
+        # serverless inference.  Without this, huggingface_hub >=0.28 picks
+        # providers via "auto" which can fail with Request ID errors when
+        # the model is not available on the auto-selected third-party provider.
+        try:
+            self.sync_client = InferenceClient(token=self.token, provider="hf-inference")
+            self.async_client = AsyncInferenceClient(token=self.token, provider="hf-inference")
+        except TypeError:
+            # Older huggingface_hub (<0.28) doesn't support provider param
+            self.sync_client = InferenceClient(token=self.token)
+            self.async_client = AsyncInferenceClient(token=self.token)
     def get_model_for_agent(self, agent_name: str) -> str:
         """Get the appropriate model for an agent."""