File size: 1,816 Bytes
1a0b19c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
from typing import Any, Dict, List

import requests

HF_TOKEN = os.getenv("HF_TOKEN")
HF_TEXT_MODEL = os.getenv("HF_TEXT_MODEL", "microsoft/Phi-3.5-mini-instruct")
HF_INFERENCE_URL = os.getenv(
    "HF_INFERENCE_URL",
    f"https://api-inference.huggingface.co/models/{HF_TEXT_MODEL}",
)

if not HF_TOKEN:
    raise RuntimeError("HF_TOKEN nao definido.")


def _messages_to_prompt(messages: List[Dict[str, str]]) -> str:
    lines: List[str] = []
    for m in messages:
        role = m.get("role", "user").upper()
        content = m.get("content", "")
        lines.append(f"[{role}]\n{content}")
    lines.append("[ASSISTANT]\n")
    return "\n\n".join(lines)


def call_hf(messages: List[Dict[str, str]], max_tokens: int = 500, temperature: float = 0.2) -> str:
    prompt = _messages_to_prompt(messages)

    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json",
    }
    payload: Dict[str, Any] = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_tokens,
            "temperature": temperature,
            "return_full_text": False,
        },
    }

    response = requests.post(HF_INFERENCE_URL, headers=headers, json=payload, timeout=120)
    response.raise_for_status()
    data = response.json()

    # Typical formats from HF Inference API
    if isinstance(data, list) and data and isinstance(data[0], dict):
        if "generated_text" in data[0]:
            return str(data[0]["generated_text"]).strip()

    if isinstance(data, dict):
        if "generated_text" in data:
            return str(data["generated_text"]).strip()
        if "error" in data:
            raise RuntimeError(f"HF inference error: {data['error']}")

    raise RuntimeError(f"HF inference response format unexpected: {data}")