File size: 1,441 Bytes
2068d15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
import requests
from dotenv import load_dotenv
from pathlib import Path

env_path = Path(__file__).resolve().parents[2] / '.env'
load_dotenv(dotenv_path=env_path)

HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
REPO_ID = os.getenv("HUGGINGFACE_MODEL", "HuggingFaceH4/zephyr-7b-beta")
API_URL = f"https://api-inference.huggingface.co/models/{REPO_ID}"

HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}


def call_llm(prompt: str, max_length: int = 200) -> str:
    payload = {
        "inputs": prompt,
        "parameters": {"max_new_tokens": max_length, "temperature": 0.2}
    }
    try:
        print(f"[llm_client] POST {API_URL}")
        print(f"[llm_client] HEADERS: {HEADERS}")
        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
        print(f"[llm_client] Status code: {response.status_code}")
        text = response.text
        print(f"[llm_client] Response text: {text}")
        response.raise_for_status()
        data = response.json()
        if isinstance(data, list) and data and isinstance(data[0], dict) and "generated_text" in data[0]:
            return data[0]["generated_text"].strip()
        if isinstance(data, dict) and "generated_text" in data:
            return data["generated_text"].strip()
        return str(data)
    except Exception as e:
        print(f"[llm_client] error HTTP HF: {e}")
        return f"error in generate response: {e}"