from huggingface_hub import hf_hub_download from llama_cpp import Llama import gradio as gr # بارگیری مدل model_path = hf_hub_download( repo_id="rinrikatoki/dorna-quantized", filename="dorna-q8_0.gguf" ) llm = Llama(model_path=model_path, n_ctx=512, n_threads=4, n_batch=8) # تابع چت برای قالب پیام جدید Gradio def chat_fn(message, history): prompt = message output = llm(prompt, max_tokens=128) response = output["choices"][0]["text"].strip() # ساخت قالب جدید if history is None: history = [] history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response}) return history, history # رابط Gradio with gr.Blocks() as demo: gr.Markdown("## 🤖 چت با مدل درنا") chatbot = gr.Chatbot(label="درنا", type="messages") msg = gr.Textbox(label="پیام شما...") clear = gr.Button("پاک‌سازی چت") msg.submit(chat_fn, [msg, chatbot], [chatbot, chatbot]) clear.click(lambda: [], inputs=[], outputs=[chatbot], queue=False) demo.launch()