Datangtang commited on
Commit
f19f181
·
verified ·
1 Parent(s): c55e754

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -25
app.py CHANGED
@@ -3,13 +3,10 @@ import gradio as gr
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
- print("Gradio version:", gr.__version__)
7
-
8
-
9
- # ============ 环境变量中读取 token ==============
10
  HF_TOKEN = os.environ.get("HF_Token")
11
 
12
- # ============ 模型配置 ==============
13
  REPO_ID = "Datangtang/GGUF3B"
14
  FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
15
 
@@ -20,7 +17,7 @@ model_path = hf_hub_download(
20
  token=HF_TOKEN
21
  )
22
 
23
- # 加载模型(llama_cpp)
24
  llm = Llama(
25
  model_path=model_path,
26
  n_ctx=4096,
@@ -28,44 +25,45 @@ llm = Llama(
28
  chat_format="llama-3",
29
  )
30
 
31
- # ============ 核心对话函数 ==============
32
- def chat_fn(history, user_input):
 
33
  """
34
- history: List[{"role": "...", "content": "..."}]
35
- user_input: 当前用户的输入
36
  """
37
 
38
- # 将用户输入添加到历史
39
- history.append({"role": "user", "content": user_input})
40
 
41
- # 调用 LLM
42
- response = llm.create_chat_completion(
43
- messages=history,
44
  max_tokens=512,
45
  temperature=0.7,
46
- top_p=0.95
47
  )
48
 
49
- assistant_msg = response["choices"][0]["message"]["content"]
50
 
51
- # 添加模型回复到历史
52
- history.append({"role": "assistant", "content": assistant_msg})
53
 
54
- return history, ""
55
 
56
 
57
- # ============ Gradio 界面 ==============
58
  with gr.Blocks() as demo:
59
- gr.Markdown("# 💬 Chat with Your Finetuned LLM")
60
 
61
- chatbot = gr.Chatbot(height=500, type="messages") # ⭐ 注意 type="messages"
62
- user_input = gr.Textbox(show_label=False, placeholder="Ask your model...")
63
  submit = gr.Button("Send")
64
 
65
  submit.click(
66
  fn=chat_fn,
67
  inputs=[chatbot, user_input],
68
- outputs=[chatbot, user_input]
69
  )
70
 
71
  if __name__ == "__main__":
 
3
  from huggingface_hub import hf_hub_download
4
  from llama_cpp import Llama
5
 
6
+ # ============ 读取 HF Token ==============
 
 
 
7
  HF_TOKEN = os.environ.get("HF_Token")
8
 
9
+ # ============ 模型信息 ==============
10
  REPO_ID = "Datangtang/GGUF3B"
11
  FILE_NAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
12
 
 
17
  token=HF_TOKEN
18
  )
19
 
20
+ # 加载模型
21
  llm = Llama(
22
  model_path=model_path,
23
  n_ctx=4096,
 
25
  chat_format="llama-3",
26
  )
27
 
28
+
29
+ # ============ 核心对话函数(Gradio 6 消息格式) ==============
30
+ def chat_fn(messages, user_input):
31
  """
32
+ messages: List of {"role": "...", "content": "..."}
33
+ user_input: 当前用户输入
34
  """
35
 
36
+ # 添加用户输入
37
+ messages.append({"role": "user", "content": user_input})
38
 
39
+ # llama_cpp 调用
40
+ result = llm.create_chat_completion(
41
+ messages=messages,
42
  max_tokens=512,
43
  temperature=0.7,
44
+ top_p=0.95,
45
  )
46
 
47
+ bot_reply = result["choices"][0]["message"]["content"]
48
 
49
+ # 添加模型回复
50
+ messages.append({"role": "assistant", "content": bot_reply})
51
 
52
+ return messages, ""
53
 
54
 
55
+ # ============ Gradio UI (6.0.2) ==============
56
  with gr.Blocks() as demo:
57
+ gr.Markdown("# 💬 Chat with Your Finetuned LLM (Gradio 6)")
58
 
59
+ chatbot = gr.Chatbot(height=500, type="messages")
60
+ user_input = gr.Textbox(show_label=False, placeholder="Type something...")
61
  submit = gr.Button("Send")
62
 
63
  submit.click(
64
  fn=chat_fn,
65
  inputs=[chatbot, user_input],
66
+ outputs=[chatbot, user_input],
67
  )
68
 
69
  if __name__ == "__main__":