from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr

# بارگیری مدل
model_path = hf_hub_download(
    repo_id="rinrikatoki/dorna-quantized",
    filename="dorna-q8_0.gguf"
)

llm = Llama(model_path=model_path, n_ctx=512, n_threads=4, n_batch=8)

# تابع چت برای قالب پیام جدید Gradio
def chat_fn(message, history):
    prompt = message
    output = llm(prompt, max_tokens=128)
    response = output["choices"][0]["text"].strip()
    
    # ساخت قالب جدید
    if history is None:
        history = []
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response})
    
    return history, history

# رابط Gradio
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 چت با مدل درنا")
    
    chatbot = gr.Chatbot(label="درنا", type="messages")
    msg = gr.Textbox(label="پیام شما...")
    clear = gr.Button("پاک‌سازی چت")

    msg.submit(chat_fn, [msg, chatbot], [chatbot, chatbot])
    clear.click(lambda: [], inputs=[], outputs=[chatbot], queue=False)

demo.launch()