import os
import gradio as gr
from huggingface_hub import InferenceClient

# 환경 변수에서 토큰 로드
HF_TOKEN = os.environ.get("HF_TOKEN")
client = InferenceClient("aixk/isai-v2", token=HF_TOKEN)

def respond(message, history):
    messages = [{"role": "system", "content": "You are aixk, a helpful assistant."}]
    for user_msg, assistant_msg in history:
        if user_msg: messages.append({"role": "user", "content": user_msg})
        if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})
    response = ""
    for message_chunk in client.chat_completion(messages, max_tokens=512, stream=True):
        token = message_chunk.choices[0].delta.content
        response += token
        yield response

demo = gr.ChatInterface(fn=respond, title="aixk/isai-v2 Chatbot", type="messages")
demo.launch()