Spaces:

ayush2917
/

gate-chemical-engineering-practice

Sleeping

App Files Files Community

gate-chemical-engineering-practice / app /utils /hf_client.py

ayush2917

Update app/utils/hf_client.py

0631b95 verified 5 months ago

raw

history blame contribute delete

2.36 kB

	# app/utils/hf_client.py
	import httpx
	import asyncio
	from app.config import HF_TOKEN

	HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}

	async def call_hf_with_fallback(models: list, prompt: str, max_new_tokens=256, temperature=0.2):
	"""
	Try each model in fallback list until success.
	Returns generated text on success, or None on total failure.
	"""
	for model_id in models:
	if not model_id:
	continue
	model_id = model_id.strip()
	url = f"https://api-inference.huggingface.co/models/{model_id}"
	for attempt in range(3):
	try:
	async with httpx.AsyncClient(timeout=60.0) as client:
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": max_new_tokens,
	"temperature": temperature
	}
	}
	print(f"[HF] Trying model: {model_id} (attempt {attempt+1})")
	r = await client.post(url, headers=HEADERS, json=payload)
	# success
	if r.status_code == 200:
	data = r.json()
	if isinstance(data, list) and data and "generated_text" in data[0]:
	return data[0]["generated_text"]
	if isinstance(data, dict) and "generated_text" in data:
	return data["generated_text"]
	# sometimes HF returns plain text
	try:
	return str(data)
	except Exception:
	return None
	# skip permanently broken model endpoints
	if r.status_code in (404, 410):
	print(f"[HF] Model {model_id} not usable: {r.status_code}")
	break
	# for other errors, try again
	print(f"[HF] Model {model_id} returned status {r.status_code}. Retrying...")
	except Exception as e:
	print(f"[HF] Error calling model {model_id}: {e}")
	# continue to retry for this model_id
	# no model worked
	print("[HF] All fallback models failed")
	return None