Spaces:

Datangtang
/

iris

Sleeping

App Files Files Community

Datangtang commited on Dec 4, 2025

Commit

29889da

verified ·

1 Parent(s): 9aa1169

继续修改bug

Browse files

Files changed (1) hide show

app.py +19 -28

app.py CHANGED Viewed

@@ -3,21 +3,21 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# ============ 下载模型 ==============
-# 从环境变量读取 HF Token（在 Spaces → Settings → Secrets 设置）
 HF_TOKEN = os.environ.get("HF_Token")
-# 模型仓库与文件
 REPO_ID = "Datangtang/GGUF3B"
 FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
 model_path = hf_hub_download(
     repo_id=REPO_ID,
     filename=FILE_NAME,
     token=HF_TOKEN
 )
-# ============ 加载模型 ==============
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
@@ -25,47 +25,38 @@ llm = Llama(
     chat_format="llama-3",
 )
-# ============ 推理函数 ==============
 def chat_fn(history, user_input):
     """
-    history 为 Gradio 聊天历史
-    user_input 为当前用户输入
     """
-    messages = []
-    # 组织对话历史，适配 llama_cpp 的聊天格式
-    for role, text in history:
-        if role == "user":
-            messages.append({"role": "user", "content": text})
-        else:
-            messages.append({"role": "assistant", "content": text})
-    # 新输入
-    messages.append({"role": "user", "content": user_input})
     # 调用 LLM
-    result = llm.create_chat_completion(
-        messages=messages,
         max_tokens=512,
         temperature=0.7,
         top_p=0.95
     )
-    output = result["choices"][0]["message"]["content"]
-    # 返回：更新后的历史记录
-    history.append(("user", user_input))
-    history.append(("assistant", output))
     return history, ""
-# ============ Gradio UI ==============
 with gr.Blocks() as demo:
-    gr.Markdown("# 💬 Chat with Your Fine-tuned LLM")
-    chatbot = gr.Chatbot(height=500)
-    user_input = gr.Textbox(show_label=False, placeholder="Enter message...")
     submit = gr.Button("Send")
     submit.click(

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+# ============ 环境变量中读取 token ==============
 HF_TOKEN = os.environ.get("HF_Token")
+# ============ 模型配置 ==============
 REPO_ID = "Datangtang/GGUF3B"
 FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
+# 下载模型
 model_path = hf_hub_download(
     repo_id=REPO_ID,
     filename=FILE_NAME,
     token=HF_TOKEN
 )
+# 加载模型（llama_cpp）
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
     chat_format="llama-3",
 )
+# ============ 核心对话函数 ==============
 def chat_fn(history, user_input):
     """
+    history: List[{"role": "...", "content": "..."}]
+    user_input: 当前用户的输入
     """
+    # 将用户输入添加到历史
+    history.append({"role": "user", "content": user_input})
     # 调用 LLM
+    response = llm.create_chat_completion(
+        messages=history,
         max_tokens=512,
         temperature=0.7,
         top_p=0.95
     )
+    assistant_msg = response["choices"][0]["message"]["content"]
+    # 添加模型回复到历史
+    history.append({"role": "assistant", "content": assistant_msg})
     return history, ""
+# ============ Gradio 界面 ==============
 with gr.Blocks() as demo:
+    gr.Markdown("# 💬 Chat with Your Finetuned LLM")
+    chatbot = gr.Chatbot(height=500, type="messages")  # ⭐ 注意 type="messages"
+    user_input = gr.Textbox(show_label=False, placeholder="Ask your model...")
     submit = gr.Button("Send")
     submit.click(