Spaces:

Datangtang
/

iris

Sleeping

App Files Files Community

Datangtang commited on Dec 4, 2025

Commit

c972030

verified ·

1 Parent(s): 28a304b

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -21

app.py CHANGED Viewed

@@ -3,21 +3,21 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# ============ 读取 HF Token ==============
 HF_TOKEN = os.environ.get("HF_Token")
-# ============ 模型信息 ==============
 REPO_ID = "Datangtang/GGUF3B"
 FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
-# 下载模型
 model_path = hf_hub_download(
     repo_id=REPO_ID,
     filename=FILE_NAME,
     token=HF_TOKEN
 )
-# 加载模型
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
@@ -26,44 +26,52 @@ llm = Llama(
 )
-# ============ 核心对话函数（Gradio 6 消息格式） ==============
-def chat_fn(messages, user_input):
     """
-    messages: List of {"role": "...", "content": "..."}
-    user_input: 当前用户输入
     """
-    # 添加用户输入
     messages.append({"role": "user", "content": user_input})
-    # llama_cpp 调用
     result = llm.create_chat_completion(
         messages=messages,
         max_tokens=512,
         temperature=0.7,
-        top_p=0.95,
     )
-    bot_reply = result["choices"][0]["message"]["content"]
-    # 添加模型回复
-    messages.append({"role": "assistant", "content": bot_reply})
-    return messages, ""
-# ============ Gradio UI (6.0.2) ==============
 with gr.Blocks() as demo:
-    gr.Markdown("# 💬 Chat with Your Finetuned LLM (Gradio 6)")
-    chatbot = gr.Chatbot(height=500, type="messages")
-    user_input = gr.Textbox(show_label=False, placeholder="Type something...")
     submit = gr.Button("Send")
     submit.click(
         fn=chat_fn,
         inputs=[chatbot, user_input],
-        outputs=[chatbot, user_input],
     )
 if __name__ == "__main__":

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# ============ 下载模型 ==============
+# 从环境变量读取 HF Token（在 Spaces → Settings → Secrets 设置）
 HF_TOKEN = os.environ.get("HF_Token")
+# 模型仓库与文件
 REPO_ID = "Datangtang/GGUF3B"
 FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
 model_path = hf_hub_download(
     repo_id=REPO_ID,
     filename=FILE_NAME,
     token=HF_TOKEN
 )
+# ============ 加载模型 ==============
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
 )
+# ============ 推理函数 ==============
+def chat_fn(history, user_input):
     """
+    history 为 Gradio 聊天历史
+    user_input 为当前用户输入
     """
+    messages = []
+    # 组织对话历史，适配 llama_cpp 的聊天格式
+    for role, text in history:
+        if role == "user":
+            messages.append({"role": "user", "content": text})
+        else:
+            messages.append({"role": "assistant", "content": text})
+    # 新输入
     messages.append({"role": "user", "content": user_input})
+    # 调用 LLM
     result = llm.create_chat_completion(
         messages=messages,
         max_tokens=512,
         temperature=0.7,
+        top_p=0.95
     )
+    output = result["choices"][0]["message"]["content"]
+    # 返回：更新后的历史记录
+    history.append(("user", user_input))
+    history.append(("assistant", output))
+    return history, ""
+# ============ Gradio UI ==============
 with gr.Blocks() as demo:
+    gr.Markdown("# 💬 Chat with Your Fine-tuned LLM")
+    chatbot = gr.Chatbot(height=500)
+    user_input = gr.Textbox(show_label=False, placeholder="Enter message...")
     submit = gr.Button("Send")
     submit.click(
         fn=chat_fn,
         inputs=[chatbot, user_input],
+        outputs=[chatbot, user_input]
     )
 if __name__ == "__main__":