import gradio as gr
from huggingface_hub import InferenceClient
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

import os


# 🧠 Load mô hình GGUF
llm = Llama.from_pretrained(repo_id="phucdoankaizen/gguf_mentora", filename = "modelQ4.gguf", n_ctx=1024)


# ✨ System prompt định hướng phản hồi
system_prompt = "Bạn là một chuyên gia tâm lý, luôn lắng nghe, thấu hiểu và phản hồi nhẹ nhàng bằng tiếng Việt."

def chat(user_input, history):
    if history is None:
        history = []

    recent_history = history[-5:]
    dialogue = "\n".join([f"User: {msg['user']}\nAssistant: {msg['bot']}" for msg in recent_history])
    prompt = f"{system_prompt}\n{dialogue}\nUser: {user_input}\nAssistant:"

    output = llm(
        prompt,
        max_tokens=200,
        temperature=0.1,
        top_p=0.4,
        stop=["User:", "Assistant:"]
    )

    answer = output["choices"][0]["text"].strip()
    history.append({"user": user_input, "bot": answer})
    formatted_history = [(msg["user"], msg["bot"]) for msg in history]
    return "", formatted_history, history

# 🎨 Giao diện Gradio
with gr.Blocks() as demo:
    gr.Markdown("## Mentora - AI hỗ trợ tâm lý")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Nhập tin nhắn của bạn", placeholder="Bạn đang cảm thấy thế nào?")
    send_btn = gr.Button("Gửi")
    session_state = gr.State([])  # 🧩 Lưu lịch sử riêng cho mỗi user

    send_btn.click(chat, inputs=[msg, session_state], outputs=[msg, chatbot, session_state])
    msg.submit(chat, inputs=[msg, session_state], outputs=[msg, chatbot, session_state])

demo.launch()