Spaces:
Sleeping
Sleeping
Fix HF provider: try together/fireworks-ai per modelli grandi
Browse fileshf-inference supporta solo modelli piccoli. I modelli 72B/70B richiedono
together o fireworks-ai, che vengono rutati attraverso HF API gateway
(nessun blocco di rete da HF Spaces). Prova 3 provider per ogni modello.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
agents.py
CHANGED
|
@@ -183,36 +183,37 @@ class HFInferenceClient:
|
|
| 183 |
def generate_content(self, prompt: str, temperature: float = 0.7,
|
| 184 |
max_tokens: int = 8192) -> str:
|
| 185 |
"""
|
| 186 |
-
Genera contenuto via HF
|
| 187 |
-
|
|
|
|
| 188 |
"""
|
| 189 |
if not self.available:
|
| 190 |
raise RuntimeError("HF InferenceClient non disponibile")
|
| 191 |
|
| 192 |
chain = [self.model] + [m for m in HF_MODEL_CHAIN if m != self.model]
|
|
|
|
| 193 |
last_error = None
|
| 194 |
|
| 195 |
for model in chain:
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
f"HF
|
| 212 |
-
|
| 213 |
-
last_error = e
|
| 214 |
|
| 215 |
-
raise RuntimeError(f"All HF models exhausted. Last error: {last_error}")
|
| 216 |
|
| 217 |
|
| 218 |
def get_active_provider() -> str:
|
|
|
|
| 183 |
def generate_content(self, prompt: str, temperature: float = 0.7,
|
| 184 |
max_tokens: int = 8192) -> str:
|
| 185 |
"""
|
| 186 |
+
Genera contenuto via HF Inference.
|
| 187 |
+
Prova ogni modello con provider multipli (rutati attraverso HF → nessun blocco di rete).
|
| 188 |
+
Ordine provider: together → fireworks-ai → hf-inference (fallback piccoli modelli).
|
| 189 |
"""
|
| 190 |
if not self.available:
|
| 191 |
raise RuntimeError("HF InferenceClient non disponibile")
|
| 192 |
|
| 193 |
chain = [self.model] + [m for m in HF_MODEL_CHAIN if m != self.model]
|
| 194 |
+
providers = ["together", "fireworks-ai", "hf-inference"]
|
| 195 |
last_error = None
|
| 196 |
|
| 197 |
for model in chain:
|
| 198 |
+
for provider in providers:
|
| 199 |
+
try:
|
| 200 |
+
tmp_client = _HFInferenceClient(provider=provider, token=self.token)
|
| 201 |
+
response = tmp_client.chat_completion(
|
| 202 |
+
model=model,
|
| 203 |
+
messages=[{"role": "user", "content": prompt}],
|
| 204 |
+
max_tokens=max_tokens,
|
| 205 |
+
temperature=temperature,
|
| 206 |
+
)
|
| 207 |
+
content = response.choices[0].message.content or ""
|
| 208 |
+
if content.strip():
|
| 209 |
+
logger.info(f"HF [{provider}/{model}] succeeded")
|
| 210 |
+
return content
|
| 211 |
+
logger.warning(f"HF [{provider}/{model}] empty response, trying next")
|
| 212 |
+
except Exception as e:
|
| 213 |
+
logger.warning(f"HF [{provider}/{model}] failed: {str(e)[:100]}")
|
| 214 |
+
last_error = e
|
|
|
|
| 215 |
|
| 216 |
+
raise RuntimeError(f"All HF models/providers exhausted. Last error: {last_error}")
|
| 217 |
|
| 218 |
|
| 219 |
def get_active_provider() -> str:
|