| |
| import httpx |
| import asyncio |
| from app.config import HF_TOKEN |
|
|
| HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} |
|
|
| async def call_hf_with_fallback(models: list, prompt: str, max_new_tokens=256, temperature=0.2): |
| """ |
| Try each model in fallback list until success. |
| Returns generated text on success, or None on total failure. |
| """ |
| for model_id in models: |
| if not model_id: |
| continue |
| model_id = model_id.strip() |
| url = f"https://api-inference.huggingface.co/models/{model_id}" |
| for attempt in range(3): |
| try: |
| async with httpx.AsyncClient(timeout=60.0) as client: |
| payload = { |
| "inputs": prompt, |
| "parameters": { |
| "max_new_tokens": max_new_tokens, |
| "temperature": temperature |
| } |
| } |
| print(f"[HF] Trying model: {model_id} (attempt {attempt+1})") |
| r = await client.post(url, headers=HEADERS, json=payload) |
| |
| if r.status_code == 200: |
| data = r.json() |
| if isinstance(data, list) and data and "generated_text" in data[0]: |
| return data[0]["generated_text"] |
| if isinstance(data, dict) and "generated_text" in data: |
| return data["generated_text"] |
| |
| try: |
| return str(data) |
| except Exception: |
| return None |
| |
| if r.status_code in (404, 410): |
| print(f"[HF] Model {model_id} not usable: {r.status_code}") |
| break |
| |
| print(f"[HF] Model {model_id} returned status {r.status_code}. Retrying...") |
| except Exception as e: |
| print(f"[HF] Error calling model {model_id}: {e}") |
| |
| |
| print("[HF] All fallback models failed") |
| return None |
|
|