from __future__ import annotations import requests def generar_respuesta_hf( prompt: str, model_id: str, api_key: str, timeout_segundos: int, numero_reintentos: int, ) -> str: url_hf_inference = f"https://router.huggingface.co/hf-inference/models/{model_id}" url_chat = "https://router.huggingface.co/v1/chat/completions" headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } payload_hf_inference = { "inputs": prompt, "parameters": { "return_full_text": False, "max_new_tokens": 64, "temperature": 0.0, }, } payload_chat = { "model": model_id, "messages": [{"role": "user", "content": prompt}], "max_tokens": 64, "temperature": 0.0, } ultimo_error = None for _ in range(numero_reintentos): try: response = requests.post( url_hf_inference, headers=headers, json=payload_hf_inference, timeout=timeout_segundos, ) if response.status_code == 404: response = requests.post( url_chat, headers=headers, json=payload_chat, timeout=timeout_segundos, ) if response.status_code >= 400: if response.status_code == 404: ultimo_error = ( "HTTP 404: modelo no disponible en Hugging Face Inference Router " "o sin permisos para tu token. Prueba otro model_id." ) else: ultimo_error = f"HTTP {response.status_code}: {response.text[:250]}" continue body = response.json() if isinstance(body, list) and body: primer_item = body[0] if isinstance(primer_item, dict) and "generated_text" in primer_item: return str(primer_item["generated_text"]) if isinstance(body, dict): if "generated_text" in body: return str(body["generated_text"]) if "choices" in body and isinstance(body["choices"], list) and body["choices"]: choice_0 = body["choices"][0] if isinstance(choice_0, dict): if "text" in choice_0: return str(choice_0["text"]) message = choice_0.get("message") if isinstance(message, dict) and "content" in message: return str(message["content"]) if "error" in body: ultimo_error = str(body["error"]) continue ultimo_error = "Respuesta inesperada del proveedor de inferencia." except requests.RequestException as exc: ultimo_error = str(exc) raise RuntimeError(ultimo_error or "No se obtuvo respuesta vĂ¡lida del proveedor de inferencia.")