han145 commited on
Commit
e170451
·
verified ·
1 Parent(s): a5234be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -37
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
- import json
5
 
6
  # 全局变量,避免重复加载
7
  model = None
@@ -10,30 +9,57 @@ tokenizer = None
10
  def load_model():
11
  """加载模型和分词器"""
12
  global model, tokenizer
13
- model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # 建议使用蒸馏版节省资源
14
  try:
15
  tokenizer = AutoTokenizer.from_pretrained(model_name)
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_name,
18
- torch_dtype=torch.float16, # 使用半精度减少内存占用
19
- device_map="auto", # 自动分配设备
20
- low_cpu_mem_usage=True # 优化CPU内存使用
21
  )
 
 
 
22
  print("模型加载成功!")
23
  except Exception as e:
24
  print(f"模型加载失败: {e}")
25
 
26
- def openai_compatible_api(message, history):
27
- """处理OpenAI格式的请求"""
 
 
28
  if model is None:
29
  load_model()
30
-
31
- # 构建符合DeepSeek模型要求的对话格式
32
- # 注意:请根据您使用的具体模型调整提示词模板
33
- prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
34
 
35
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  with torch.no_grad():
38
  outputs = model.generate(
39
  **inputs,
@@ -42,34 +68,49 @@ def openai_compatible_api(message, history):
42
  top_p=0.9,
43
  do_sample=True,
44
  pad_token_id=tokenizer.eos_token_id,
45
- eos_token_id=tokenizer.eos_token_id
 
46
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
- # 提取助理的回复部分
50
- generated_text = response.split("<|im_start|>assistant\n")[-1].strip()
51
-
52
- # 返回OpenAI兼容格式
53
- return {
54
- "choices": [{
55
- "message": {
56
- "role": "assistant",
57
- "content": generated_text
58
- }
59
- }]
60
- }
61
-
62
- # 在Gradio界面启动前加载模型(可选)
63
- load_model()
64
 
65
- # 创建Gradio聊天界面
66
  demo = gr.ChatInterface(
67
- fn=openai_compatible_api,
68
- title="DeepSeek API Service",
69
- description="OpenAI-compatible API for DeepSeek-R1",
70
- examples=["你好请介绍一下你自己", "写一个Python函数计算斐波那契数列"]
 
71
  )
72
 
73
- # 修正后的launch调用 - 移除了show_api参数
74
  if __name__ == "__main__":
75
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
4
 
5
  # 全局变量,避免重复加载
6
  model = None
 
9
  def load_model():
10
  """加载模型和分词器"""
11
  global model, tokenizer
12
+ model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
13
  try:
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
+ torch_dtype=torch.float16,
18
+ device_map="auto",
19
+ low_cpu_mem_usage=True
20
  )
21
+ # 确保tokenizer有pad_token
22
+ if tokenizer.pad_token is None:
23
+ tokenizer.pad_token = tokenizer.eos_token
24
  print("模型加载成功!")
25
  except Exception as e:
26
  print(f"模型加载失败: {e}")
27
 
28
+ def chat_with_deepseek(message, history):
29
+ """与DeepSeek模型聊天 - 修正版"""
30
+ global model, tokenizer
31
+
32
  if model is None:
33
  load_model()
 
 
 
 
34
 
35
+ # 构建对话历史
36
+ conversation = []
37
+ for user_msg, assistant_msg in history:
38
+ conversation.append({"role": "user", "content": user_msg})
39
+ conversation.append({"role": "assistant", "content": assistant_msg})
40
+ conversation.append({"role": "user", "content": message})
41
+
42
+ # 使用tokenizer的apply_chat_template方法(如果支持)
43
+ try:
44
+ prompt = tokenizer.apply_chat_template(
45
+ conversation,
46
+ tokenize=False,
47
+ add_generation_prompt=True
48
+ )
49
+ except:
50
+ # 如果不支持apply_chat_template,使用简单格式
51
+ prompt = ""
52
+ for msg in conversation:
53
+ if msg["role"] == "user":
54
+ prompt += f"<|im_start|>user\n{msg['content']}<|im_end|>\n"
55
+ else:
56
+ prompt += f"<|im_start|>assistant\n{msg['content']}<|im_end|>\n"
57
+ prompt += "<|im_start|>assistant\n"
58
+
59
+ # 编码输入
60
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
61
+
62
+ # 生成回复
63
  with torch.no_grad():
64
  outputs = model.generate(
65
  **inputs,
 
68
  top_p=0.9,
69
  do_sample=True,
70
  pad_token_id=tokenizer.eos_token_id,
71
+ eos_token_id=tokenizer.eos_token_id,
72
+ repetition_penalty=1.1
73
  )
74
+
75
+ # 解码回复
76
+ response = tokenizer.decode(outputs[0], skip_special_tokens=False)
77
+
78
+ # 关键修正:提取助理的回复部分
79
+ if "<|im_start|>assistant" in response:
80
+ # 找到最后一个assistant标记开始的位置
81
+ assistant_start = response.rfind("<|im_start|>assistant")
82
+ if assistant_start != -1:
83
+ assistant_content = response[assistant_start:]
84
+ # 提取assistant标记后的内容
85
+ if "\n" in assistant_content:
86
+ content_start = assistant_content.find("\n") + 1
87
+ generated_text = assistant_content[content_start:].split("<|im_end|>")[0].strip()
88
+ else:
89
+ generated_text = assistant_content.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
90
+ else:
91
+ generated_text = "抱歉,我无法生成合适的回复。"
92
+ else:
93
+ # 如果找不到标记,返回整个响应(去除提示部分)
94
+ generated_text = response.replace(prompt, "").strip()
95
+
96
+ # 关键修改:直接返回字符串,而不是OpenAI格式的字典
97
+ return generated_text
98
 
99
+ # 预先加载模型(可选,会延长启动时间但减少第一次请求的延迟)
100
+ # load_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ # 创建Gradio界面
103
  demo = gr.ChatInterface(
104
+ fn=chat_with_deepseek,
105
+ title="DeepSeek-R1 聊天助手",
106
+ description="基于DeepSeek-R1-Distill-Qwen-1.5B的聊天机器人",
107
+ examples=["你好!", "请介绍一下你自己", "写一个Python函数计算斐波那契数列"],
108
+ cache_examples=False # 禁用缓存,避免格式问题
109
  )
110
 
 
111
  if __name__ == "__main__":
112
+ demo.launch(
113
+ server_name="0.0.0.0",
114
+ server_port=7860,
115
+ share=False
116
+ )