Spaces:
Runtime error
Runtime error
| from typing import List | |
| from huggingface_hub import InferenceClient | |
| from config import HF_TOKEN, MODEL_ID, SYSTEM_PROMPT | |
| def generate_reply(messages: List[dict]) -> str: | |
| """ | |
| messages = [{"role":"system"/"user"/"assistant","content":"..."}] | |
| """ | |
| client = InferenceClient(model=MODEL_ID, token=HF_TOKEN or None) | |
| # Convert to simple chat prompt (works well with many chatty LLMs) | |
| parts = [] | |
| sys = next((m["content"] for m in messages if m["role"]=="system"), SYSTEM_PROMPT) | |
| parts.append(f"System: {sys}") | |
| for m in messages: | |
| if m["role"] == "user": | |
| parts.append(f"User: {m['content']}") | |
| elif m["role"] == "assistant": | |
| parts.append(f"Assistant: {m['content']}") | |
| parts.append("Assistant:") | |
| prompt = "\n".join(parts) | |
| out = client.text_generation( | |
| prompt, | |
| max_new_tokens=400, | |
| temperature=0.2, | |
| do_sample=True, | |
| return_full_text=False, | |
| ) | |
| return out.strip() | |