Marcus719 commited on
Commit
5374b45
·
verified ·
1 Parent(s): 064d52c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ # --- 配置 ---
6
+ # 替换为你刚才上传成功的仓库 ID
7
+ REPO_ID = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
8
+ FILENAME = "unsloth.Q4_K_M.gguf"
9
+
10
+ # --- 下载并加载模型 (CPU) ---
11
+ print(f"正在下载模型 {FILENAME} ...")
12
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
13
+
14
+ print("正在加载模型到内存...")
15
+ # n_ctx 控制上下文长度,2048 是常用值
16
+ llm = Llama(model_path=model_path, n_ctx=2048)
17
+
18
+ # --- 定义回复函数 ---
19
+ def chat_response(message, history):
20
+ # 构造对话格式 (System Prompt 可以根据你的数据集微调)
21
+ system_prompt = "You are a helpful assistant trained on the FineTome dataset."
22
+
23
+ messages = [{"role": "system", "content": system_prompt}]
24
+
25
+ # 添加历史上下文
26
+ for user_msg, assistant_msg in history:
27
+ messages.append({"role": "user", "content": user_msg})
28
+ messages.append({"role": "assistant", "content": assistant_msg})
29
+
30
+ # 添加当前用户输入
31
+ messages.append({"role": "user", "content": message})
32
+
33
+ # 生成回复
34
+ response = llm.create_chat_completion(
35
+ messages=messages,
36
+ max_tokens=256, # 控制生成长度
37
+ temperature=0.7,
38
+ top_p=0.9
39
+ )
40
+
41
+ return response['choices'][0]['message']['content']
42
+
43
+ # --- 启动 Gradio ---
44
+ demo = gr.ChatInterface(
45
+ fn=chat_response,
46
+ title="Llama 3.2 Lab2 Demo (GGUF)",
47
+ description="Running on CPU via llama.cpp",
48
+ examples=["Hello!", "Explain machine learning."],
49
+ )
50
+
51
+ if __name__ == "__main__":
52
+ demo.launch()