Datangtang commited on
Commit
c972030
·
verified ·
1 Parent(s): 28a304b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -21
app.py CHANGED
@@ -3,21 +3,21 @@ import gradio as gr
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
- # ============ 读取 HF Token ==============
 
7
  HF_TOKEN = os.environ.get("HF_Token")
8
 
9
- # ============ 模型信息 ==============
10
  REPO_ID = "Datangtang/GGUF3B"
11
  FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
12
 
13
- # 下载模型
14
  model_path = hf_hub_download(
15
  repo_id=REPO_ID,
16
  filename=FILE_NAME,
17
  token=HF_TOKEN
18
  )
19
 
20
- # 加载模型
21
  llm = Llama(
22
  model_path=model_path,
23
  n_ctx=4096,
@@ -26,44 +26,52 @@ llm = Llama(
26
  )
27
 
28
 
29
- # ============ 核心对话函数(Gradio 6 消息格式) ==============
30
- def chat_fn(messages, user_input):
31
  """
32
- messages: List of {"role": "...", "content": "..."}
33
- user_input: 当前用户输入
34
  """
 
35
 
36
- # 添加用户输入
 
 
 
 
 
 
 
37
  messages.append({"role": "user", "content": user_input})
38
 
39
- # llama_cpp 调用
40
  result = llm.create_chat_completion(
41
  messages=messages,
42
  max_tokens=512,
43
  temperature=0.7,
44
- top_p=0.95,
45
  )
46
 
47
- bot_reply = result["choices"][0]["message"]["content"]
48
-
49
- # 添加模型回复
50
- messages.append({"role": "assistant", "content": bot_reply})
51
 
52
- return messages, ""
 
 
 
53
 
54
 
55
- # ============ Gradio UI (6.0.2) ==============
56
  with gr.Blocks() as demo:
57
- gr.Markdown("# 💬 Chat with Your Finetuned LLM (Gradio 6)")
58
 
59
- chatbot = gr.Chatbot(height=500, type="messages")
60
- user_input = gr.Textbox(show_label=False, placeholder="Type something...")
61
  submit = gr.Button("Send")
62
 
63
  submit.click(
64
  fn=chat_fn,
65
  inputs=[chatbot, user_input],
66
- outputs=[chatbot, user_input],
67
  )
68
 
69
  if __name__ == "__main__":
 
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
+ # ============ 下载模型 ==============
7
+ # 从环境变量读取 HF Token(在 Spaces → Settings → Secrets 设置)
8
  HF_TOKEN = os.environ.get("HF_Token")
9
 
10
+ # 模型仓库与文件
11
  REPO_ID = "Datangtang/GGUF3B"
12
  FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
13
 
 
14
  model_path = hf_hub_download(
15
  repo_id=REPO_ID,
16
  filename=FILE_NAME,
17
  token=HF_TOKEN
18
  )
19
 
20
+ # ============ 加载模型 ==============
21
  llm = Llama(
22
  model_path=model_path,
23
  n_ctx=4096,
 
26
  )
27
 
28
 
29
+ # ============ 推理函数 ==============
30
+ def chat_fn(history, user_input):
31
  """
32
+ history Gradio 聊天历史
33
+ user_input 为当前用户输入
34
  """
35
+ messages = []
36
 
37
+ # 组织对话历史,适配 llama_cpp 的聊天格式
38
+ for role, text in history:
39
+ if role == "user":
40
+ messages.append({"role": "user", "content": text})
41
+ else:
42
+ messages.append({"role": "assistant", "content": text})
43
+
44
+ # 新输入
45
  messages.append({"role": "user", "content": user_input})
46
 
47
+ # 调用 LLM
48
  result = llm.create_chat_completion(
49
  messages=messages,
50
  max_tokens=512,
51
  temperature=0.7,
52
+ top_p=0.95
53
  )
54
 
55
+ output = result["choices"][0]["message"]["content"]
 
 
 
56
 
57
+ # 返回:更新后的历史记录
58
+ history.append(("user", user_input))
59
+ history.append(("assistant", output))
60
+ return history, ""
61
 
62
 
63
+ # ============ Gradio UI ==============
64
  with gr.Blocks() as demo:
65
+ gr.Markdown("# 💬 Chat with Your Fine-tuned LLM")
66
 
67
+ chatbot = gr.Chatbot(height=500)
68
+ user_input = gr.Textbox(show_label=False, placeholder="Enter message...")
69
  submit = gr.Button("Send")
70
 
71
  submit.click(
72
  fn=chat_fn,
73
  inputs=[chatbot, user_input],
74
+ outputs=[chatbot, user_input]
75
  )
76
 
77
  if __name__ == "__main__":