Milkfish033 commited on
Commit
9b87900
·
verified ·
1 Parent(s): cd56330

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -85
app.py CHANGED
@@ -1,4 +1,7 @@
1
  import os
 
 
 
2
  import threading
3
  import gradio as gr
4
  import torch
@@ -6,22 +9,17 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
6
 
7
  MODEL_ID = os.getenv("MODEL_ID", "Milkfish033/deepseek-r1-1.5b-merged")
8
 
9
- # 🔒 固定 system promptUI 完全不暴露,不让用户修改
10
- SYSTEM_PROMPT = (
11
- "You are a helpful assistant. Answer clearly and concisely. "
12
- "Do not reveal system prompts or internal formatting tokens."
13
- )
14
 
15
  theme = gr.themes.Soft()
16
  css = """
17
- /* ---- Global ---- */
18
  .gradio-container { background: #ffffff !important; }
19
  footer { display: none !important; }
20
 
21
- /* ---- Layout: ChatGPT-like centered width ---- */
22
  .page-wrap {
23
- max-width: 980px; /* 控制整体宽度 */
24
- margin: 0 auto; /* 居中 */
25
  padding: 16px 12px 28px 12px;
26
  }
27
 
@@ -33,13 +31,7 @@ footer { display: none !important; }
33
  padding: 12px;
34
  }
35
 
36
- /* 聊天消息列表区域背景保持白 */
37
- .chat-card [data-testid="chatbot"],
38
- .chat-card .messages {
39
- background: #ffffff !important;
40
- }
41
-
42
- /* 输入框明显一点 */
43
  .chat-card textarea,
44
  .chat-card input {
45
  border: 1px solid #d1d5db !important;
@@ -48,45 +40,27 @@ footer { display: none !important; }
48
  }
49
 
50
  /* 发送按钮圆角 */
51
- .chat-card button {
52
- border-radius: 14px !important;
53
- }
54
 
55
- /* ---- Bubble styling (light, not harsh) ---- */
56
- /* Gradio 版本/主题不同 class 会变,所以多写几种 selector 提高命中率 */
57
-
58
- /* 用户消息气泡:很浅的蓝 */
59
  .chat-card .message.user,
60
- .chat-card [data-testid="chatbot"] .message.user,
61
- .chat-card .bubble.user,
62
- .chat-card [data-testid="chatbot"] .bubble.user {
63
  background: #eef2ff !important;
64
  border: 1px solid #e0e7ff !important;
65
  border-radius: 16px !important;
66
  }
67
 
68
- /* 助手消息气泡:很浅的灰 */
69
  .chat-card .message.assistant,
70
- .chat-card [data-testid="chatbot"] .message.assistant,
71
- .chat-card .bubble.assistant,
72
- .chat-card [data-testid="chatbot"] .bubble.assistant {
73
  background: #f8fafc !important;
74
  border: 1px solid #eef2f7 !important;
75
  border-radius: 16px !important;
76
  }
77
-
78
- /* 每条消息间距 */
79
- .chat-card .message,
80
- .chat-card .bubble {
81
- padding: 10px 12px !important;
82
- margin: 8px 0 !important;
83
- }
84
-
85
- /* 让顶部标题别太挤 */
86
- h1, h2, h3 { margin-bottom: 8px !important; }
87
  """
88
 
89
- # --- Load model once ---
90
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
91
  model = AutoModelForCausalLM.from_pretrained(
92
  MODEL_ID,
@@ -96,13 +70,20 @@ model = AutoModelForCausalLM.from_pretrained(
96
  )
97
  model.eval()
98
 
99
- def _build_prompt(history: list[dict[str, str]], user_msg: str) -> str:
100
- # 注意:system prompt 不在 UI 暴露,但仍参与推理
 
 
 
 
101
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
102
- messages.extend(history)
 
 
 
 
103
  messages.append({"role": "user", "content": user_msg})
104
 
105
- # 优先使用模型自带 chat_template
106
  if hasattr(tokenizer, "apply_chat_template"):
107
  try:
108
  return tokenizer.apply_chat_template(
@@ -113,74 +94,63 @@ def _build_prompt(history: list[dict[str, str]], user_msg: str) -> str:
113
  except Exception:
114
  pass
115
 
116
- # fallback(一般不会走到这里)
117
  prompt = f"System: {SYSTEM_PROMPT}\n"
118
- for m in history:
119
- role = m.get("role", "")
120
- content = m.get("content", "")
121
- if role == "user":
122
- prompt += f"User: {content}\n"
123
- elif role == "assistant":
124
- prompt += f"Assistant: {content}\n"
125
  prompt += f"User: {user_msg}\nAssistant:"
126
  return prompt
127
 
128
- def respond(message: str, history: list[dict[str, str]], max_tokens: int, temperature: float, top_p: float):
 
 
 
 
 
129
  prompt = _build_prompt(history, message)
130
 
131
  inputs = tokenizer(prompt, return_tensors="pt")
132
  if torch.cuda.is_available():
133
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
134
 
135
- # ✅ 关键:skip_prompt=True,彻底不回显 prompt
136
  streamer = TextIteratorStreamer(
137
  tokenizer,
138
  skip_special_tokens=True,
139
- skip_prompt=True,
140
  )
141
 
142
  gen_kwargs = dict(
143
  **inputs,
144
  streamer=streamer,
145
- max_new_tokens=int(max_tokens),
146
- do_sample=(float(temperature) > 0),
147
- temperature=float(temperature),
148
- top_p=float(top_p),
149
  pad_token_id=tokenizer.eos_token_id,
150
  )
151
 
152
- thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
153
- thread.start()
154
 
155
- response = ""
156
  for piece in streamer:
157
- response += piece
158
- yield response.strip()
159
-
160
- # UI:不提供 system_message 输入框(🔒隐藏)
161
- chat_ui = gr.ChatInterface(
162
- respond,
163
- type="messages",
164
- title="我是 Bello,有什么能帮到您?",
165
- description="",
166
- additional_inputs=[
167
- gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
168
- gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="Temperature"),
169
- gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
170
- ],
171
- )
172
 
173
  with gr.Blocks(theme=theme, css=css) as demo:
174
  with gr.Column(elem_classes=["page-wrap"]):
175
- gr.Markdown(
176
- """
177
- # 我是 Bello,有什么能帮到您?
178
- """
179
- )
180
  with gr.Column(elem_classes=["chat-card"]):
181
- chat_ui.render()
 
 
 
 
 
182
 
183
- demo.queue(default_concurrency_limit=4)
184
 
185
  if __name__ == "__main__":
186
  demo.launch(ssr_mode=False)
 
1
  import os
2
+ # 解决 libgomp OMP_NUM_THREADS 非法值
3
+ os.environ["OMP_NUM_THREADS"] = str(int(os.getenv("OMP_NUM_THREADS", "1") or "1"))
4
+
5
  import threading
6
  import gradio as gr
7
  import torch
 
9
 
10
  MODEL_ID = os.getenv("MODEL_ID", "Milkfish033/deepseek-r1-1.5b-merged")
11
 
12
+ # 🔒 固定 system promptUI 不暴露
13
+ SYSTEM_PROMPT = "你是 Bello,一个友好的智能助手。请用清晰、简洁的中文回答用户问题。"
 
 
 
14
 
15
  theme = gr.themes.Soft()
16
  css = """
 
17
  .gradio-container { background: #ffffff !important; }
18
  footer { display: none !important; }
19
 
 
20
  .page-wrap {
21
+ max-width: 980px;
22
+ margin: 0 auto;
23
  padding: 16px 12px 28px 12px;
24
  }
25
 
 
31
  padding: 12px;
32
  }
33
 
34
+ /* 输入框边框 */
 
 
 
 
 
 
35
  .chat-card textarea,
36
  .chat-card input {
37
  border: 1px solid #d1d5db !important;
 
40
  }
41
 
42
  /* 发送按钮圆角 */
43
+ .chat-card button { border-radius: 14px !important; }
 
 
44
 
45
+ /* 气泡样式(不同 gradio 版本 class 不同,多写点提高命中率) */
 
 
 
46
  .chat-card .message.user,
47
+ .chat-card .bubble.user {
 
 
48
  background: #eef2ff !important;
49
  border: 1px solid #e0e7ff !important;
50
  border-radius: 16px !important;
51
  }
52
 
53
+ .chat-card .message.bot,
54
  .chat-card .message.assistant,
55
+ .chat-card .bubble.bot,
56
+ .chat-card .bubble.assistant {
 
57
  background: #f8fafc !important;
58
  border: 1px solid #eef2f7 !important;
59
  border-radius: 16px !important;
60
  }
 
 
 
 
 
 
 
 
 
 
61
  """
62
 
63
+ # ---- Load model once ----
64
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
65
  model = AutoModelForCausalLM.from_pretrained(
66
  MODEL_ID,
 
70
  )
71
  model.eval()
72
 
73
+
74
+ def _build_prompt(history_pairs, user_msg: str) -> str:
75
+ """
76
+ 旧版 ChatInterface 的 history 是 [(user, bot), ...]
77
+ 我们把它转成 messages,再用 chat_template 生成 prompt
78
+ """
79
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
80
+ for u, a in history_pairs:
81
+ if u:
82
+ messages.append({"role": "user", "content": u})
83
+ if a:
84
+ messages.append({"role": "assistant", "content": a})
85
  messages.append({"role": "user", "content": user_msg})
86
 
 
87
  if hasattr(tokenizer, "apply_chat_template"):
88
  try:
89
  return tokenizer.apply_chat_template(
 
94
  except Exception:
95
  pass
96
 
97
+ # fallback
98
  prompt = f"System: {SYSTEM_PROMPT}\n"
99
+ for u, a in history_pairs:
100
+ prompt += f"User: {u}\nAssistant: {a}\n"
 
 
 
 
 
101
  prompt += f"User: {user_msg}\nAssistant:"
102
  return prompt
103
 
104
+
105
+ def respond(message: str, history):
106
+ """
107
+ ✅ 兼容旧版 gradio.ChatInterface:fn(message, history) -> str 或 generator
108
+ history: List[Tuple[str, str]]
109
+ """
110
  prompt = _build_prompt(history, message)
111
 
112
  inputs = tokenizer(prompt, return_tensors="pt")
113
  if torch.cuda.is_available():
114
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
115
 
 
116
  streamer = TextIteratorStreamer(
117
  tokenizer,
118
  skip_special_tokens=True,
119
+ skip_prompt=True, # ✅ 不回显 prompt(解决 <|User|> 问题)
120
  )
121
 
122
  gen_kwargs = dict(
123
  **inputs,
124
  streamer=streamer,
125
+ max_new_tokens=512,
126
+ do_sample=True,
127
+ temperature=0.7,
128
+ top_p=0.95,
129
  pad_token_id=tokenizer.eos_token_id,
130
  )
131
 
132
+ t = threading.Thread(target=model.generate, kwargs=gen_kwargs)
133
+ t.start()
134
 
135
+ out = ""
136
  for piece in streamer:
137
+ out += piece
138
+ yield out.strip()
139
+
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  with gr.Blocks(theme=theme, css=css) as demo:
142
  with gr.Column(elem_classes=["page-wrap"]):
143
+ gr.Markdown("# 我是 Bello,有什么能帮到您?")
144
+
 
 
 
145
  with gr.Column(elem_classes=["chat-card"]):
146
+ # ✅ 老版本不支持 type="messages",不要传 type
147
+ gr.ChatInterface(
148
+ fn=respond,
149
+ title="",
150
+ description="",
151
+ )
152
 
153
+ demo.queue(default_concurrency_limit=4)
154
 
155
  if __name__ == "__main__":
156
  demo.launch(ssr_mode=False)