kgb-chatbot / llm.py
thomascerniglia's picture
Upload 6 files
1fed44b verified
from typing import List
from huggingface_hub import InferenceClient
from config import HF_TOKEN, MODEL_ID, SYSTEM_PROMPT
def generate_reply(messages: List[dict]) -> str:
"""
messages = [{"role":"system"/"user"/"assistant","content":"..."}]
"""
client = InferenceClient(model=MODEL_ID, token=HF_TOKEN or None)
# Convert to simple chat prompt (works well with many chatty LLMs)
parts = []
sys = next((m["content"] for m in messages if m["role"]=="system"), SYSTEM_PROMPT)
parts.append(f"System: {sys}")
for m in messages:
if m["role"] == "user":
parts.append(f"User: {m['content']}")
elif m["role"] == "assistant":
parts.append(f"Assistant: {m['content']}")
parts.append("Assistant:")
prompt = "\n".join(parts)
out = client.text_generation(
prompt,
max_new_tokens=400,
temperature=0.2,
do_sample=True,
return_full_text=False,
)
return out.strip()