import os from typing import Any, Dict, List import requests HF_TOKEN = os.getenv("HF_TOKEN") HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "microsoft/Phi-3.5-mini-instruct") HF_INFERENCE_URL = os.getenv( "HF_INFERENCE_URL", f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}", ) if not HF_TOKEN: raise RuntimeError("HF_TOKEN nao definido.") def _messages_to_prompt(messages: List[Dict[str, str]]) -> str: lines: List[str] = [] for m in messages: role = m.get("role", "user").upper() content = m.get("content", "") lines.append(f"[{role}]\n{content}") lines.append("[ASSISTANT]\n") return "\n\n".join(lines) def call_hf(messages: List[Dict[str, str]], max_tokens: int = 500, temperature: float = 0.2) -> str: prompt = _messages_to_prompt(messages) headers = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json", } payload: Dict[str, Any] = { "inputs": prompt, "parameters": { "max_new_tokens": max_tokens, "temperature": temperature, "return_full_text": False, }, } response = requests.post(HF_INFERENCE_URL, headers=headers, json=payload, timeout=120) response.raise_for_status() data = response.json() # Typical formats from HF Inference API if isinstance(data, list) and data and isinstance(data[0], dict): if "generated_text" in data[0]: return str(data[0]["generated_text"]).strip() if isinstance(data, dict): if "generated_text" in data: return str(data["generated_text"]).strip() if "error" in data: raise RuntimeError(f"HF inference error: {data['error']}") raise RuntimeError(f"HF inference response format unexpected: {data}")