mac commited on
Commit
d70f24f
Β·
1 Parent(s): c51af21

rewrite ui

Browse files
Files changed (4) hide show
  1. app.py +27 -264
  2. index.html +470 -0
  3. requirements.txt +3 -1
  4. utils_chatbot.py +12 -69
app.py CHANGED
@@ -1,18 +1,18 @@
1
  # MiniCPM5-1B Demo
2
 
3
- from pathlib import Path
4
  import os
5
- import time
6
  import logging
7
  import threading
 
8
 
9
- import gradio as gr
10
  import spaces
11
  import torch
 
 
12
  from huggingface_hub import login
13
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
14
 
15
- from utils_chatbot import organize_messages_from_messages, stream2display_text, clean_final_text
16
 
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
@@ -34,20 +34,28 @@ model = AutoModelForCausalLM.from_pretrained(
34
  trust_remote_code=True,
35
  ).to("cuda")
36
 
 
37
 
 
 
38
  @spaces.GPU(duration=60)
39
- def gpu_generate_stream(inputs, history, temperature, top_p, enable_think):
 
 
 
 
 
 
 
 
40
  prompt_text = tokenizer.apply_chat_template(
41
- inputs,
42
  tokenize=False,
43
  add_generation_prompt=True,
44
- enable_thinking=enable_think,
45
  )
46
  model_inputs = tokenizer([prompt_text], return_tensors="pt").to("cuda")
47
 
48
- history.append({"role": "assistant", "content": ""})
49
- yield "", history
50
-
51
  streamer = TextIteratorStreamer(
52
  tokenizer,
53
  skip_prompt=True,
@@ -67,267 +75,22 @@ def gpu_generate_stream(inputs, history, temperature, top_p, enable_think):
67
  thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
68
  thread.start()
69
 
70
- stream_text = ""
71
- gen_tk_count = 0
72
- start_time = time.time()
73
-
74
  for new_token_text in streamer:
75
  if not new_token_text:
76
  continue
77
- stream_text += new_token_text
78
- gen_tk_count += 1
79
- elapsed = time.time() - start_time
80
- token_per_sec = gen_tk_count / elapsed if elapsed > 0 else 0
81
- display_text = stream2display_text(stream_text, token_per_sec)
82
- history[-1]["content"] = display_text
83
- yield "", history
84
 
85
  thread.join()
86
- history[-1]["content"] = clean_final_text(stream_text)
87
- yield "", history
88
-
89
-
90
- def gen_response_stream(message, history, temperature, top_p, enable_think):
91
- chat_msg_ls = organize_messages_from_messages(message, history)
92
- history.append({"role": "user", "content": message})
93
- yield from gpu_generate_stream(
94
- chat_msg_ls, history,
95
- temperature=temperature,
96
- top_p=top_p,
97
- enable_think=enable_think,
98
- )
99
-
100
-
101
- def create_app():
102
- assets_path = Path.cwd().absolute() / "assets"
103
- gr.set_static_paths(paths=[assets_path])
104
-
105
- with gr.Blocks() as demo:
106
- # Header
107
- gr.HTML(
108
- '<div class="app-header">'
109
- ' <div class="header-left">'
110
- ' <img src="https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png" alt="Logo" class="header-logo">'
111
- ' <div class="header-title">'
112
- ' <span class="title-main">MiniCPM5-1B</span>'
113
- ' <span class="title-sub">By OpenBMB</span>'
114
- ' </div>'
115
- ' </div>'
116
- ' <div class="header-badge">'
117
- ' <span class="status-dot"></span>'
118
- ' MiniCPM5-1B'
119
- ' </div>'
120
- '</div>'
121
- )
122
-
123
- with gr.Row(elem_classes=["main-row"]):
124
- with gr.Column(scale=4, elem_classes=["chat-col"]):
125
- chatbot = gr.Chatbot(
126
- show_label=False,
127
- placeholder="Send a message to start chatting...",
128
- height="70vh",
129
- elem_classes=["dark-chatbot"],
130
- )
131
- prompt = gr.Textbox(
132
- show_label=False,
133
- placeholder="Ask MiniCPM5...",
134
- lines=1,
135
- elem_classes=["input-pill"],
136
- )
137
-
138
- with gr.Column(scale=1, min_width=220, elem_classes=["settings-col"]):
139
- gr.HTML('<div class="settings-title">Settings</div>')
140
- temperature = gr.Slider(
141
- minimum=0, maximum=1, value=0.9, step=0.05,
142
- label="Temperature",
143
- elem_classes=["dark-slider"],
144
- )
145
- top_p = gr.Slider(
146
- minimum=0, maximum=1, value=0.95, step=0.01,
147
- label="Top-p",
148
- elem_classes=["dark-slider"],
149
- )
150
- enable_think = gr.Checkbox(
151
- label="Thinking",
152
- value=True,
153
- elem_classes=["dark-checkbox"],
154
- )
155
- clear = gr.Button(
156
- "Clear History",
157
- variant="secondary",
158
- elem_classes=["clear-btn"],
159
- )
160
-
161
- prompt.submit(
162
- gen_response_stream,
163
- inputs=[prompt, chatbot, temperature, top_p, enable_think],
164
- outputs=[prompt, chatbot],
165
- )
166
- clear.click(lambda: None, None, chatbot, queue=False)
167
-
168
- return demo
169
-
170
-
171
- THEME = gr.themes.Soft(
172
- primary_hue="indigo",
173
- secondary_hue="slate",
174
- neutral_hue="slate",
175
- font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
176
- )
177
-
178
- CSS = """
179
- footer { display: none !important; }
180
-
181
- /* Header */
182
- .app-header {
183
- display: flex;
184
- align-items: center;
185
- justify-content: space-between;
186
- padding: 16px 24px;
187
- border-bottom: 1px solid #e2e8f0;
188
- }
189
- .header-left {
190
- display: flex;
191
- align-items: center;
192
- gap: 12px;
193
- }
194
- .header-logo {
195
- width: 40px;
196
- height: 40px;
197
- border-radius: 10px;
198
- filter: drop-shadow(0 0 8px rgba(99,102,241,0.25));
199
- }
200
- .header-title {
201
- display: flex;
202
- flex-direction: column;
203
- }
204
- .title-main {
205
- font-size: 18px;
206
- font-weight: 700;
207
- letter-spacing: -0.02em;
208
- color: #1e293b;
209
- }
210
- .title-sub {
211
- font-size: 10px;
212
- text-transform: uppercase;
213
- letter-spacing: 0.2em;
214
- color: #94a3b8;
215
- font-weight: 600;
216
- }
217
- .header-badge {
218
- display: flex;
219
- align-items: center;
220
- gap: 6px;
221
- font-size: 10px;
222
- font-weight: 700;
223
- letter-spacing: 0.12em;
224
- text-transform: uppercase;
225
- color: #64748b;
226
- background: #f1f5f9;
227
- padding: 6px 14px;
228
- border-radius: 100px;
229
- border: 1px solid #e2e8f0;
230
- }
231
- .status-dot {
232
- width: 6px;
233
- height: 6px;
234
- border-radius: 50%;
235
- background: #22c55e;
236
- animation: pulse-dot 2s infinite;
237
- }
238
- @keyframes pulse-dot {
239
- 0%, 100% { opacity: 1; }
240
- 50% { opacity: 0.4; }
241
- }
242
-
243
- /* Layout */
244
- .main-row { gap: 0 !important; }
245
- .chat-col { padding-right: 8px !important; }
246
- .settings-col {
247
- border-left: 1px solid #e2e8f0 !important;
248
- padding: 20px !important;
249
- }
250
- .settings-title {
251
- font-size: 14px;
252
- font-weight: 700;
253
- color: #475569;
254
- margin-bottom: 12px;
255
- letter-spacing: 0.05em;
256
- text-transform: uppercase;
257
- }
258
-
259
- /* Chatbot styling */
260
- .dark-chatbot {
261
- border: none !important;
262
- background: transparent !important;
263
- }
264
- .dark-chatbot .message-row {
265
- animation: msg-fade-in 0.35s cubic-bezier(0.16, 1, 0.3, 1) forwards;
266
- }
267
- @keyframes msg-fade-in {
268
- from { opacity: 0; transform: translateY(12px); }
269
- to { opacity: 1; transform: translateY(0); }
270
- }
271
-
272
- /* User message bubble */
273
- .dark-chatbot .role-user .message-bubble-border {
274
- background: linear-gradient(135deg, #4f46e5, #6366f1) !important;
275
- border: none !important;
276
- border-radius: 20px 20px 4px 20px !important;
277
- box-shadow: 0 4px 14px rgba(79,70,229,0.2);
278
- }
279
- .dark-chatbot .role-user .message-bubble-border .message-content {
280
- color: #ffffff !important;
281
- }
282
-
283
- /* Bot message bubble */
284
- .dark-chatbot .role-assistant .message-bubble-border {
285
- background: #f8fafc !important;
286
- border: 1px solid #e2e8f0 !important;
287
- border-radius: 20px 20px 20px 4px !important;
288
- }
289
-
290
- /* Thinking block (blockquote) */
291
- .dark-chatbot .role-assistant blockquote {
292
- background: #eef2ff !important;
293
- border-left: 3px solid #6366f1 !important;
294
- border-radius: 4px 10px 10px 4px;
295
- padding: 10px 14px !important;
296
- color: #64748b !important;
297
- font-style: italic;
298
- margin-bottom: 10px !important;
299
- }
300
-
301
- /* Input pill */
302
- .input-pill {
303
- border: 1px solid #cbd5e1 !important;
304
- border-radius: 24px !important;
305
- transition: all 0.3s ease;
306
- }
307
- .input-pill:focus-within {
308
- border-color: #6366f1 !important;
309
- box-shadow: 0 0 0 3px rgba(99,102,241,0.1);
310
- }
311
-
312
- /* Clear button */
313
- .clear-btn button {
314
- border-radius: 12px !important;
315
- transition: all 0.2s ease;
316
- }
317
 
318
- /* Code blocks */
319
- .dark-chatbot pre {
320
- background: #f1f5f9 !important;
321
- border: 1px solid #e2e8f0 !important;
322
- border-radius: 10px !important;
323
- }
324
- .dark-chatbot code {
325
- color: #4f46e5 !important;
326
- }
327
- """
328
 
 
 
 
 
 
329
 
330
- demo = create_app()
331
 
332
  if __name__ == "__main__":
333
- demo.launch(theme=THEME, css=CSS)
 
1
  # MiniCPM5-1B Demo
2
 
 
3
  import os
 
4
  import logging
5
  import threading
6
+ from typing import Generator
7
 
 
8
  import spaces
9
  import torch
10
+ from fastapi.responses import HTMLResponse
11
+ from gradio import Server
12
  from huggingface_hub import login
13
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
14
 
15
+ from utils_chatbot import organize_messages
16
 
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
 
34
  trust_remote_code=True,
35
  ).to("cuda")
36
 
37
+ demo = Server()
38
 
39
+
40
+ @demo.api()
41
  @spaces.GPU(duration=60)
42
+ def predict(
43
+ message: str,
44
+ history: list[list] | None = None,
45
+ thinking_mode: bool = True,
46
+ temperature: float = 0.9,
47
+ top_p: float = 0.95,
48
+ ) -> Generator[str, None, None]:
49
+ messages = organize_messages(message, history)
50
+
51
  prompt_text = tokenizer.apply_chat_template(
52
+ messages,
53
  tokenize=False,
54
  add_generation_prompt=True,
55
+ enable_thinking=thinking_mode,
56
  )
57
  model_inputs = tokenizer([prompt_text], return_tensors="pt").to("cuda")
58
 
 
 
 
59
  streamer = TextIteratorStreamer(
60
  tokenizer,
61
  skip_prompt=True,
 
75
  thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
76
  thread.start()
77
 
78
+ full_text = ""
 
 
 
79
  for new_token_text in streamer:
80
  if not new_token_text:
81
  continue
82
+ full_text += new_token_text
83
+ yield full_text
 
 
 
 
 
84
 
85
  thread.join()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ @demo.get("/", response_class=HTMLResponse)
89
+ async def homepage():
90
+ html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
91
+ with open(html_path, "r", encoding="utf-8") as f:
92
+ return f.read()
93
 
 
94
 
95
  if __name__ == "__main__":
96
+ demo.launch(show_error=True)
index.html ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
6
+ <title>MiniCPM5-1B | OpenBMB</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
9
+ <script src="https://unpkg.com/lucide@latest"></script>
10
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
11
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.min.css">
12
+ <script src="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.min.js"></script>
13
+ <script src="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/contrib/auto-render.min.js"></script>
14
+ <style>
15
+ :root {
16
+ --bg: #ffffff;
17
+ --blue: #4f46e5;
18
+ --cyan: #6366f1;
19
+ --text: #1e293b;
20
+ --text-muted: #64748b;
21
+ --glass: rgba(0, 0, 0, 0.02);
22
+ --glass-border: #e2e8f0;
23
+ --accent: #4f46e5;
24
+ }
25
+
26
+ body {
27
+ font-family: 'Inter', sans-serif;
28
+ background-color: var(--bg);
29
+ color: var(--text);
30
+ height: 100vh;
31
+ margin: 0;
32
+ display: flex;
33
+ flex-direction: column;
34
+ overflow: hidden;
35
+ }
36
+
37
+ .chat-scroll-area {
38
+ flex: 1;
39
+ overflow-y: auto;
40
+ padding-bottom: 140px;
41
+ -webkit-overflow-scrolling: touch;
42
+ scroll-behavior: smooth;
43
+ }
44
+
45
+ .chat-scroll-area::-webkit-scrollbar { width: 4px; }
46
+ .chat-scroll-area::-webkit-scrollbar-thumb { background: #cbd5e1; border-radius: 10px; }
47
+
48
+ .message-bubble {
49
+ max-width: 85%;
50
+ animation: fadeIn 0.35s cubic-bezier(0.16, 1, 0.3, 1) forwards;
51
+ }
52
+
53
+ @keyframes fadeIn {
54
+ from { opacity: 0; transform: translateY(12px); }
55
+ to { opacity: 1; transform: translateY(0); }
56
+ }
57
+
58
+ .user-message {
59
+ background: linear-gradient(135deg, var(--blue), var(--cyan));
60
+ color: #ffffff;
61
+ box-shadow: 0 4px 14px rgba(79, 70, 229, 0.2);
62
+ border-radius: 20px 20px 4px 20px;
63
+ }
64
+
65
+ .bot-message {
66
+ background: #f8fafc;
67
+ border: 1px solid var(--glass-border);
68
+ border-radius: 20px 20px 20px 4px;
69
+ }
70
+
71
+ .thinking-block {
72
+ background: #eef2ff;
73
+ border-left: 3px solid var(--accent);
74
+ padding: 12px 16px;
75
+ margin-bottom: 12px;
76
+ border-radius: 4px 12px 12px 4px;
77
+ font-size: 14px;
78
+ color: var(--text-muted);
79
+ font-style: italic;
80
+ }
81
+
82
+ .typing-dot {
83
+ width: 4px; height: 4px;
84
+ background: var(--accent);
85
+ border-radius: 50%;
86
+ animation: bounce 1.4s infinite ease-in-out;
87
+ }
88
+
89
+ @keyframes bounce {
90
+ 0%, 80%, 100% { transform: scale(0.3); opacity: 0.4; }
91
+ 40% { transform: scale(1); opacity: 1; }
92
+ }
93
+
94
+ .input-pill {
95
+ background: #ffffff;
96
+ border: 1px solid var(--glass-border);
97
+ transition: all 0.3s ease;
98
+ }
99
+
100
+ .input-pill:focus-within {
101
+ border-color: rgba(79, 70, 229, 0.4);
102
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
103
+ }
104
+
105
+ .logo-glow {
106
+ filter: drop-shadow(0 0 8px rgba(99, 102, 241, 0.25));
107
+ }
108
+
109
+ .send-btn {
110
+ background: linear-gradient(135deg, var(--blue), var(--cyan));
111
+ transition: all 0.3s ease;
112
+ }
113
+ .send-btn:hover:not(:disabled) { transform: scale(1.05); filter: brightness(1.05); }
114
+
115
+ .settings-panel {
116
+ background: rgba(255, 255, 255, 0.98);
117
+ backdrop-filter: blur(20px);
118
+ border-left: 1px solid var(--glass-border);
119
+ transition: transform 0.4s cubic-bezier(0.16, 1, 0.3, 1);
120
+ }
121
+
122
+ .control-slider {
123
+ -webkit-appearance: none;
124
+ width: 100%;
125
+ height: 4px;
126
+ background: #e2e8f0;
127
+ border-radius: 2px;
128
+ outline: none;
129
+ }
130
+ .control-slider::-webkit-slider-thumb {
131
+ -webkit-appearance: none;
132
+ width: 12px; height: 12px;
133
+ background: var(--accent);
134
+ border-radius: 50%;
135
+ cursor: pointer;
136
+ }
137
+
138
+ .toggle-switch {
139
+ width: 36px; height: 20px;
140
+ background: #e2e8f0;
141
+ border-radius: 10px;
142
+ position: relative;
143
+ cursor: pointer;
144
+ transition: background 0.3s;
145
+ }
146
+ .toggle-switch.active { background: var(--accent); }
147
+ .toggle-switch::after {
148
+ content: '';
149
+ position: absolute;
150
+ top: 2px; left: 2px;
151
+ width: 16px; height: 16px;
152
+ background: white;
153
+ border-radius: 50%;
154
+ transition: transform 0.3s;
155
+ box-shadow: 0 1px 3px rgba(0,0,0,0.15);
156
+ }
157
+ .toggle-switch.active::after { transform: translateX(16px); }
158
+
159
+ .bot-message pre {
160
+ background: #f1f5f9;
161
+ border: 1px solid #e2e8f0;
162
+ border-radius: 10px;
163
+ padding: 12px;
164
+ overflow-x: auto;
165
+ }
166
+ .bot-message code { color: #4f46e5; }
167
+ </style>
168
+ </head>
169
+ <body>
170
+
171
+ <header class="h-16 flex items-center justify-between px-6 md:px-10 shrink-0 z-50 border-b border-slate-200 bg-white">
172
+ <div class="flex items-center gap-3">
173
+ <img src="https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png"
174
+ alt="OpenBMB" class="w-10 h-10 logo-glow rounded-lg">
175
+ <div>
176
+ <h1 class="text-lg font-bold text-slate-800">MiniCPM5-1B</h1>
177
+ <p class="text-[10px] text-slate-400 uppercase tracking-[0.2em] font-semibold">By OpenBMB</p>
178
+ </div>
179
+ </div>
180
+ <div class="flex items-center gap-4">
181
+ <div class="hidden md:flex items-center gap-2 text-[10px] font-bold text-slate-500 tracking-widest bg-slate-50 px-3 py-1.5 rounded-full border border-slate-200">
182
+ <span class="w-1.5 h-1.5 rounded-full bg-green-500 animate-pulse"></span>
183
+ MiniCPM5-1B
184
+ </div>
185
+ <button id="toggle-settings" class="p-2 rounded-xl hover:bg-slate-100 text-slate-500 hover:text-slate-800 transition-all">
186
+ <i data-lucide="sliders-horizontal" class="w-5 h-5"></i>
187
+ </button>
188
+ </div>
189
+ </header>
190
+
191
+ <div id="settings-panel" class="fixed top-0 right-0 h-full w-72 z-[100] translate-x-full settings-panel p-6 flex flex-col gap-6 shadow-[-12px_0_40px_rgba(0,0,0,0.08)]">
192
+ <div class="flex items-center justify-between">
193
+ <h2 class="text-base font-bold text-slate-800">Settings</h2>
194
+ <button id="close-settings" class="text-slate-400 hover:text-slate-700">
195
+ <i data-lucide="x" class="w-5 h-5"></i>
196
+ </button>
197
+ </div>
198
+
199
+ <div class="space-y-5">
200
+ <div class="flex items-center justify-between">
201
+ <span class="text-sm font-medium text-slate-600">Thinking</span>
202
+ <div id="thinking-toggle" class="toggle-switch active"></div>
203
+ </div>
204
+
205
+ <div class="space-y-2">
206
+ <div class="flex justify-between text-xs font-bold text-slate-400 uppercase tracking-widest">
207
+ <span>Temperature</span>
208
+ <span id="temp-val">0.9</span>
209
+ </div>
210
+ <input type="range" id="temp-slider" min="0" max="1" step="0.05" value="0.9" class="control-slider">
211
+ </div>
212
+
213
+ <div class="space-y-2">
214
+ <div class="flex justify-between text-xs font-bold text-slate-400 uppercase tracking-widest">
215
+ <span>Top-p</span>
216
+ <span id="p-val">0.95</span>
217
+ </div>
218
+ <input type="range" id="p-slider" min="0" max="1" step="0.01" value="0.95" class="control-slider">
219
+ </div>
220
+
221
+ <button onclick="clearHistory()" class="w-full py-3 rounded-xl bg-red-50 border border-red-200 text-red-600 text-sm font-bold hover:bg-red-100 transition-all flex items-center justify-center gap-2 mt-4">
222
+ <i data-lucide="trash-2" class="w-4 h-4"></i>
223
+ Clear History
224
+ </button>
225
+ </div>
226
+ </div>
227
+
228
+ <main id="chat-messages" class="chat-scroll-area px-4 flex-1">
229
+ <div class="max-w-3xl mx-auto space-y-6 pt-6 pb-32" id="chat-container">
230
+ <div class="flex gap-3 items-start">
231
+ <div class="bot-message p-5 message-bubble shadow-sm">
232
+ <p class="text-slate-700 leading-relaxed text-[15px]">
233
+ Hello! I'm <strong>MiniCPM5-1B</strong>. Ask me anything.
234
+ </p>
235
+ </div>
236
+ </div>
237
+ </div>
238
+ </main>
239
+
240
+ <div class="fixed bottom-0 left-0 right-0 p-4 md:p-8 pointer-events-none z-50">
241
+ <div class="max-w-3xl mx-auto pointer-events-auto">
242
+ <div class="input-pill rounded-[2rem] p-2 flex items-end shadow-lg">
243
+ <textarea id="user-input" placeholder="Ask MiniCPM5..." rows="1"
244
+ class="flex-1 bg-transparent border-none focus:ring-0 text-slate-800 placeholder-slate-400 py-3 px-3 resize-none max-h-40 leading-relaxed font-medium outline-none"></textarea>
245
+ <button id="send-btn" class="send-btn w-11 h-11 text-white rounded-full flex items-center justify-center shrink-0 mb-0.5 mr-1">
246
+ <i data-lucide="arrow-up" class="w-5 h-5" id="send-icon"></i>
247
+ </button>
248
+ </div>
249
+ </div>
250
+ </div>
251
+
252
+ <script type="module">
253
+ import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
254
+
255
+ lucide.createIcons();
256
+
257
+ const chatContainer = document.getElementById('chat-container');
258
+ const chatScrollArea = document.getElementById('chat-messages');
259
+ const userInput = document.getElementById('user-input');
260
+ const sendBtn = document.getElementById('send-btn');
261
+ const settingsPanel = document.getElementById('settings-panel');
262
+ const toggleSettings = document.getElementById('toggle-settings');
263
+ const closeSettings = document.getElementById('close-settings');
264
+ const thinkingToggle = document.getElementById('thinking-toggle');
265
+ const tempSlider = document.getElementById('temp-slider');
266
+ const pSlider = document.getElementById('p-slider');
267
+
268
+ let client = null;
269
+ let chatHistory = [];
270
+ let currentJob = null;
271
+ let isSettingsOpen = false;
272
+
273
+ const THINK_CLOSE = '</think>';
274
+
275
+ async function init() {
276
+ try {
277
+ client = await Client.connect(window.location.origin, { events: ["data", "status"] });
278
+ } catch (err) {
279
+ console.error("Gradio connection error", err);
280
+ }
281
+ }
282
+ init();
283
+
284
+ function renderMath(el) {
285
+ if (window.renderMathInElement) {
286
+ renderMathInElement(el, {
287
+ delimiters: [
288
+ {left: '$$', right: '$$', display: true},
289
+ {left: '$', right: '$', display: false},
290
+ ],
291
+ throwOnError: false
292
+ });
293
+ }
294
+ }
295
+
296
+ function splitThinking(fullText) {
297
+ const text = fullText.replace('<think>', '');
298
+ const pos = text.indexOf(THINK_CLOSE);
299
+ if (pos === -1) {
300
+ return { thinking: text.trim(), answer: '' };
301
+ }
302
+ return {
303
+ thinking: text.slice(0, pos).trim(),
304
+ answer: text.slice(pos + THINK_CLOSE.length).replace('<|im_end|>', '').trim()
305
+ };
306
+ }
307
+
308
+ function appendMessage(role, text = '') {
309
+ const div = document.createElement('div');
310
+ div.className = `flex gap-3 items-start ${role === 'user' ? 'flex-row-reverse' : ''}`;
311
+ const bubbleClass = role === 'user' ? 'user-message' : 'bot-message';
312
+ div.innerHTML = `
313
+ <div class="${bubbleClass} p-5 message-bubble shadow-sm">
314
+ <div class="thinking-container hidden"></div>
315
+ <div class="content-container leading-relaxed text-[15px]">${role === 'user' ? escapeHtml(text) : marked.parse(text)}</div>
316
+ </div>
317
+ `;
318
+ chatContainer.appendChild(div);
319
+ if (role === 'bot') renderMath(div.querySelector('.content-container'));
320
+ chatScrollArea.scrollTo({ top: chatScrollArea.scrollHeight, behavior: 'smooth' });
321
+ return div;
322
+ }
323
+
324
+ function escapeHtml(s) {
325
+ return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
326
+ }
327
+
328
+ function updateBotMessage(div, fullText) {
329
+ const thinkingContainer = div.querySelector('.thinking-container');
330
+ const contentContainer = div.querySelector('.content-container');
331
+ const { thinking, answer } = splitThinking(fullText);
332
+
333
+ if (thinking) {
334
+ thinkingContainer.classList.remove('hidden');
335
+ thinkingContainer.innerHTML = `<div class="thinking-block">${marked.parse(thinking)}</div>`;
336
+ } else {
337
+ thinkingContainer.classList.add('hidden');
338
+ thinkingContainer.innerHTML = '';
339
+ }
340
+
341
+ if (answer) {
342
+ contentContainer.innerHTML = marked.parse(answer);
343
+ } else if (!thinking) {
344
+ contentContainer.innerHTML = marked.parse(fullText.replace('<|im_end|>', ''));
345
+ } else {
346
+ contentContainer.innerHTML = '<div class="flex gap-1.5 py-1"><div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div></div>';
347
+ }
348
+
349
+ renderMath(thinkingContainer);
350
+ renderMath(contentContainer);
351
+ chatScrollArea.scrollTo({ top: chatScrollArea.scrollHeight, behavior: 'smooth' });
352
+ return answer || fullText.replace(THINK_CLOSE, '').replace('<|im_end|>', '').trim();
353
+ }
354
+
355
+ async function sendMessage() {
356
+ const text = userInput.value.trim();
357
+ if (!text) return;
358
+
359
+ userInput.value = '';
360
+ userInput.style.height = 'auto';
361
+ appendMessage('user', text);
362
+
363
+ sendBtn.disabled = true;
364
+ const botDiv = appendMessage('bot', '');
365
+ const contentContainer = botDiv.querySelector('.content-container');
366
+ contentContainer.innerHTML = '<div class="flex gap-1.5 py-2"><div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div></div>';
367
+
368
+ let isStopped = false;
369
+ sendBtn.onclick = () => {
370
+ if (currentJob) {
371
+ currentJob.cancel();
372
+ isStopped = true;
373
+ resetSendBtn();
374
+ }
375
+ };
376
+
377
+ try {
378
+ currentJob = client.submit("/predict", {
379
+ message: text,
380
+ history: chatHistory,
381
+ thinking_mode: thinkingToggle.classList.contains('active'),
382
+ temperature: parseFloat(tempSlider.value),
383
+ top_p: parseFloat(pSlider.value),
384
+ });
385
+
386
+ let finalAnswer = "";
387
+ for await (const msg of currentJob) {
388
+ if (isStopped) break;
389
+ if (msg.type === "data" && msg.data) {
390
+ finalAnswer = updateBotMessage(botDiv, msg.data[0]);
391
+ } else if (msg.type === "status" && msg.stage === "complete") {
392
+ break;
393
+ } else if (msg.type === "status" && msg.stage === "error") {
394
+ throw new Error(msg.message || "Generation failed");
395
+ }
396
+ }
397
+
398
+ if (!isStopped && finalAnswer) {
399
+ chatHistory.push([text, finalAnswer]);
400
+ }
401
+ } catch (err) {
402
+ console.error(err);
403
+ if (!isStopped) {
404
+ contentContainer.innerHTML = '<p class="text-red-500">Error: please try again.</p>';
405
+ }
406
+ } finally {
407
+ resetSendBtn();
408
+ currentJob = null;
409
+ }
410
+ }
411
+
412
+ function resetSendBtn() {
413
+ sendBtn.disabled = false;
414
+ sendBtn.onclick = sendMessage;
415
+ }
416
+
417
+ window.clearHistory = function() {
418
+ chatHistory = [];
419
+ chatContainer.innerHTML = `
420
+ <div class="flex gap-3 items-start">
421
+ <div class="bot-message p-5 message-bubble shadow-sm">
422
+ <p class="text-slate-700 leading-relaxed text-[15px]">History cleared. How can I help you?</p>
423
+ </div>
424
+ </div>
425
+ `;
426
+ toggleSettingsSidebar(false);
427
+ };
428
+
429
+ function toggleSettingsSidebar(open) {
430
+ isSettingsOpen = open;
431
+ settingsPanel.classList.toggle('translate-x-full', !open);
432
+ settingsPanel.classList.toggle('translate-x-0', open);
433
+ }
434
+
435
+ toggleSettings.onclick = (e) => { e.stopPropagation(); toggleSettingsSidebar(true); };
436
+ closeSettings.onclick = () => toggleSettingsSidebar(false);
437
+ document.addEventListener('click', (e) => {
438
+ if (isSettingsOpen && !settingsPanel.contains(e.target) && !toggleSettings.contains(e.target)) {
439
+ toggleSettingsSidebar(false);
440
+ }
441
+ });
442
+
443
+ thinkingToggle.onclick = () => {
444
+ thinkingToggle.classList.toggle('active');
445
+ if (chatHistory.length > 0) {
446
+ if (confirm("Changing Thinking mode will clear conversation history. Continue?")) {
447
+ clearHistory();
448
+ } else {
449
+ thinkingToggle.classList.toggle('active');
450
+ }
451
+ }
452
+ };
453
+
454
+ tempSlider.oninput = () => document.getElementById('temp-val').textContent = tempSlider.value;
455
+ pSlider.oninput = () => document.getElementById('p-val').textContent = pSlider.value;
456
+
457
+ sendBtn.onclick = sendMessage;
458
+ userInput.onkeydown = (e) => {
459
+ if (e.key === 'Enter' && !e.shiftKey) {
460
+ e.preventDefault();
461
+ sendMessage();
462
+ }
463
+ };
464
+ userInput.oninput = () => {
465
+ userInput.style.height = 'auto';
466
+ userInput.style.height = userInput.scrollHeight + 'px';
467
+ };
468
+ </script>
469
+ </body>
470
+ </html>
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
- gradio
2
  transformers>=4.56
3
  torch
4
  spaces
5
  accelerate
6
  sentencepiece
 
 
 
1
+ gradio>=6.14.0
2
  transformers>=4.56
3
  torch
4
  spaces
5
  accelerate
6
  sentencepiece
7
+ fastapi
8
+ uvicorn>=0.14.0
utils_chatbot.py CHANGED
@@ -1,72 +1,15 @@
1
- def organize_messages_from_messages(message, history):
2
- """Build chat messages from Gradio 6.x messages-format history."""
3
  msg_ls = [{"role": "system", "content": "You are a helpful assistant."}]
4
- for msg in history:
5
- role = msg.get("role", "")
6
- content = msg.get("content", "")
7
- if role in ("user", "assistant") and content:
8
- msg_ls.append({"role": role, "content": content})
 
 
 
 
 
9
  msg_ls.append({"role": "user", "content": message})
10
  return msg_ls
11
-
12
-
13
- def _split_on_think_close(text):
14
- """Split on </think>. The <think> tag is in the prompt (skipped by streamer),
15
- so stream_text is: 'thinking content</think>answer content'.
16
- Returns (thinking_text_or_None, answer_text)."""
17
- text = text.replace("<think>", "")
18
-
19
- close_tag = "</think>"
20
- pos = text.find(close_tag)
21
- if pos == -1:
22
- return text, None
23
-
24
- think_text = text[:pos]
25
- answer_text = text[pos + len(close_tag):]
26
- return think_text, answer_text
27
-
28
-
29
- def _format_thinking(think_text):
30
- """Render thinking content as blockquote."""
31
- lines = think_text.strip().splitlines()
32
- return "\n".join(f"> {line}" if line.strip() else ">" for line in lines)
33
-
34
-
35
- def stream2display_text(stream_text, token_per_sec):
36
- think_text, answer_text = _split_on_think_close(stream_text)
37
-
38
- parts = []
39
-
40
- if think_text is not None:
41
- think_text = think_text.strip()
42
- if think_text:
43
- parts.append(_format_thinking(think_text))
44
-
45
- if answer_text is not None:
46
- answer_text = answer_text.replace("<|im_end|>", "").strip()
47
- if answer_text:
48
- parts.append(answer_text)
49
-
50
- parts.append(f"```{token_per_sec:.2f} token/s```")
51
- return "\n\n".join(parts)
52
-
53
-
54
- def clean_final_text(stream_text):
55
- think_text, answer_text = _split_on_think_close(stream_text)
56
-
57
- parts = []
58
-
59
- if think_text is not None:
60
- think_text = think_text.strip()
61
- if think_text:
62
- parts.append(_format_thinking(think_text))
63
-
64
- if answer_text is not None:
65
- answer_text = answer_text.replace("<|im_end|>", "").strip()
66
- if answer_text:
67
- parts.append(answer_text)
68
-
69
- if not parts:
70
- return stream_text.replace("</think>", "").replace("<|im_end|>", "")
71
-
72
- return "\n\n".join(parts)
 
1
+ def organize_messages(message, history=None):
2
+ """Build chat messages from history tuples [[user, assistant], ...]."""
3
  msg_ls = [{"role": "system", "content": "You are a helpful assistant."}]
4
+ if history:
5
+ for turn in history:
6
+ if not turn:
7
+ continue
8
+ user_text = turn[0] if len(turn) > 0 else None
9
+ assistant_text = turn[1] if len(turn) > 1 else None
10
+ if user_text:
11
+ msg_ls.append({"role": "user", "content": user_text})
12
+ if assistant_text:
13
+ msg_ls.append({"role": "assistant", "content": assistant_text})
14
  msg_ls.append({"role": "user", "content": message})
15
  return msg_ls