mtornani Claude Sonnet 4.6 commited on
Commit
3e460dc
·
1 Parent(s): 2edaee7

Fix HF provider: try together/fireworks-ai per modelli grandi

Browse files

hf-inference supporta solo modelli piccoli. I modelli 72B/70B richiedono
together o fireworks-ai, che vengono rutati attraverso HF API gateway
(nessun blocco di rete da HF Spaces). Prova 3 provider per ogni modello.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. agents.py +22 -21
agents.py CHANGED
@@ -183,36 +183,37 @@ class HFInferenceClient:
183
  def generate_content(self, prompt: str, temperature: float = 0.7,
184
  max_tokens: int = 8192) -> str:
185
  """
186
- Genera contenuto via HF Serverless Inference.
187
- Tenta la catena HF_MODEL_CHAIN in ordine su errore o risposta vuota.
 
188
  """
189
  if not self.available:
190
  raise RuntimeError("HF InferenceClient non disponibile")
191
 
192
  chain = [self.model] + [m for m in HF_MODEL_CHAIN if m != self.model]
 
193
  last_error = None
194
 
195
  for model in chain:
196
- try:
197
- response = self.client.chat_completion(
198
- model=model,
199
- messages=[{"role": "user", "content": prompt}],
200
- max_tokens=max_tokens,
201
- temperature=temperature,
202
- )
203
- content = response.choices[0].message.content or ""
204
- if content.strip():
205
- if model != self.model:
206
- logger.info(f"HF: fallback model {model} succeeded")
207
- return content
208
- logger.warning(f"HF: model {model} returned empty, trying next")
209
- except Exception as e:
210
- logger.warning(
211
- f"HF: model {model} failed ({type(e).__name__}: {str(e)[:120]}), trying next"
212
- )
213
- last_error = e
214
 
215
- raise RuntimeError(f"All HF models exhausted. Last error: {last_error}")
216
 
217
 
218
  def get_active_provider() -> str:
 
183
  def generate_content(self, prompt: str, temperature: float = 0.7,
184
  max_tokens: int = 8192) -> str:
185
  """
186
+ Genera contenuto via HF Inference.
187
+ Prova ogni modello con provider multipli (rutati attraverso HF nessun blocco di rete).
188
+ Ordine provider: together → fireworks-ai → hf-inference (fallback piccoli modelli).
189
  """
190
  if not self.available:
191
  raise RuntimeError("HF InferenceClient non disponibile")
192
 
193
  chain = [self.model] + [m for m in HF_MODEL_CHAIN if m != self.model]
194
+ providers = ["together", "fireworks-ai", "hf-inference"]
195
  last_error = None
196
 
197
  for model in chain:
198
+ for provider in providers:
199
+ try:
200
+ tmp_client = _HFInferenceClient(provider=provider, token=self.token)
201
+ response = tmp_client.chat_completion(
202
+ model=model,
203
+ messages=[{"role": "user", "content": prompt}],
204
+ max_tokens=max_tokens,
205
+ temperature=temperature,
206
+ )
207
+ content = response.choices[0].message.content or ""
208
+ if content.strip():
209
+ logger.info(f"HF [{provider}/{model}] succeeded")
210
+ return content
211
+ logger.warning(f"HF [{provider}/{model}] empty response, trying next")
212
+ except Exception as e:
213
+ logger.warning(f"HF [{provider}/{model}] failed: {str(e)[:100]}")
214
+ last_error = e
 
215
 
216
+ raise RuntimeError(f"All HF models/providers exhausted. Last error: {last_error}")
217
 
218
 
219
  def get_active_provider() -> str: