ayush2917's picture
Update app/utils/hf_client.py
0631b95 verified
# app/utils/hf_client.py
import httpx
import asyncio
from app.config import HF_TOKEN
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
async def call_hf_with_fallback(models: list, prompt: str, max_new_tokens=256, temperature=0.2):
"""
Try each model in fallback list until success.
Returns generated text on success, or None on total failure.
"""
for model_id in models:
if not model_id:
continue
model_id = model_id.strip()
url = f"https://api-inference.huggingface.co/models/{model_id}"
for attempt in range(3):
try:
async with httpx.AsyncClient(timeout=60.0) as client:
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_new_tokens,
"temperature": temperature
}
}
print(f"[HF] Trying model: {model_id} (attempt {attempt+1})")
r = await client.post(url, headers=HEADERS, json=payload)
# success
if r.status_code == 200:
data = r.json()
if isinstance(data, list) and data and "generated_text" in data[0]:
return data[0]["generated_text"]
if isinstance(data, dict) and "generated_text" in data:
return data["generated_text"]
# sometimes HF returns plain text
try:
return str(data)
except Exception:
return None
# skip permanently broken model endpoints
if r.status_code in (404, 410):
print(f"[HF] Model {model_id} not usable: {r.status_code}")
break
# for other errors, try again
print(f"[HF] Model {model_id} returned status {r.status_code}. Retrying...")
except Exception as e:
print(f"[HF] Error calling model {model_id}: {e}")
# continue to retry for this model_id
# no model worked
print("[HF] All fallback models failed")
return None