Datangtang commited on
Commit
29889da
·
verified ·
1 Parent(s): 9aa1169

继续修改bug

Browse files
Files changed (1) hide show
  1. app.py +19 -28
app.py CHANGED
@@ -3,21 +3,21 @@ import gradio as gr
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
- # ============ 下载模型 ==============
7
- # 从环境变量读取 HF Token(在 Spaces → Settings → Secrets 设置)
8
  HF_TOKEN = os.environ.get("HF_Token")
9
 
10
- # 模型仓库与文件
11
  REPO_ID = "Datangtang/GGUF3B"
12
  FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
13
 
 
14
  model_path = hf_hub_download(
15
  repo_id=REPO_ID,
16
  filename=FILE_NAME,
17
  token=HF_TOKEN
18
  )
19
 
20
- # ============ 加载模型 ==============
21
  llm = Llama(
22
  model_path=model_path,
23
  n_ctx=4096,
@@ -25,47 +25,38 @@ llm = Llama(
25
  chat_format="llama-3",
26
  )
27
 
28
-
29
- # ============ 推理函数 ==============
30
  def chat_fn(history, user_input):
31
  """
32
- history Gradio 聊天历史
33
- user_input 为当前用户输入
34
  """
35
- messages = []
36
-
37
- # 组织对话历史,适配 llama_cpp 的聊天格式
38
- for role, text in history:
39
- if role == "user":
40
- messages.append({"role": "user", "content": text})
41
- else:
42
- messages.append({"role": "assistant", "content": text})
43
 
44
- # 新输入
45
- messages.append({"role": "user", "content": user_input})
46
 
47
  # 调用 LLM
48
- result = llm.create_chat_completion(
49
- messages=messages,
50
  max_tokens=512,
51
  temperature=0.7,
52
  top_p=0.95
53
  )
54
 
55
- output = result["choices"][0]["message"]["content"]
 
 
 
56
 
57
- # 返回:更新后的历史记录
58
- history.append(("user", user_input))
59
- history.append(("assistant", output))
60
  return history, ""
61
 
62
 
63
- # ============ Gradio UI ==============
64
  with gr.Blocks() as demo:
65
- gr.Markdown("# 💬 Chat with Your Fine-tuned LLM")
66
 
67
- chatbot = gr.Chatbot(height=500)
68
- user_input = gr.Textbox(show_label=False, placeholder="Enter message...")
69
  submit = gr.Button("Send")
70
 
71
  submit.click(
 
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
+ # ============ 环境变量中读取 token ==============
 
7
  HF_TOKEN = os.environ.get("HF_Token")
8
 
9
+ # ============ 模型配置 ==============
10
  REPO_ID = "Datangtang/GGUF3B"
11
  FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
12
 
13
+ # 下载模型
14
  model_path = hf_hub_download(
15
  repo_id=REPO_ID,
16
  filename=FILE_NAME,
17
  token=HF_TOKEN
18
  )
19
 
20
+ # 加载模型(llama_cpp)
21
  llm = Llama(
22
  model_path=model_path,
23
  n_ctx=4096,
 
25
  chat_format="llama-3",
26
  )
27
 
28
+ # ============ 核心对话函数 ==============
 
29
  def chat_fn(history, user_input):
30
  """
31
+ history: List[{"role": "...", "content": "..."}]
32
+ user_input: 当前用户的输入
33
  """
 
 
 
 
 
 
 
 
34
 
35
+ # 将用户输入添加到历史
36
+ history.append({"role": "user", "content": user_input})
37
 
38
  # 调用 LLM
39
+ response = llm.create_chat_completion(
40
+ messages=history,
41
  max_tokens=512,
42
  temperature=0.7,
43
  top_p=0.95
44
  )
45
 
46
+ assistant_msg = response["choices"][0]["message"]["content"]
47
+
48
+ # 添加模型回复到历史
49
+ history.append({"role": "assistant", "content": assistant_msg})
50
 
 
 
 
51
  return history, ""
52
 
53
 
54
+ # ============ Gradio 界面 ==============
55
  with gr.Blocks() as demo:
56
+ gr.Markdown("# 💬 Chat with Your Finetuned LLM")
57
 
58
+ chatbot = gr.Chatbot(height=500, type="messages") # ⭐ 注意 type="messages"
59
+ user_input = gr.Textbox(show_label=False, placeholder="Ask your model...")
60
  submit = gr.Button("Send")
61
 
62
  submit.click(