Harmony18090 commited on
Commit
2e14a9a
·
verified ·
1 Parent(s): a22cca9

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. dashboard/index.html +558 -0
  2. run.sh +20 -0
  3. server.log +0 -0
  4. server.py +435 -0
dashboard/index.html ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>OpenBioLLM - Medical AI Assistant</title>
7
+ <style>
8
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
9
+
10
+ :root {
11
+ --bg: #0f1117;
12
+ --surface: #1a1d27;
13
+ --surface2: #242736;
14
+ --border: #2e3144;
15
+ --text: #e4e4e7;
16
+ --text-dim: #9ca3af;
17
+ --accent: #10b981;
18
+ --accent-hover: #059669;
19
+ --user-bg: #1e3a5f;
20
+ --bot-bg: #1f2937;
21
+ --danger: #ef4444;
22
+ --success: #22c55e;
23
+ --radius: 12px;
24
+ }
25
+
26
+ html, body { height: 100%; }
27
+
28
+ body {
29
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
30
+ background: var(--bg);
31
+ color: var(--text);
32
+ display: flex;
33
+ flex-direction: column;
34
+ }
35
+
36
+ header {
37
+ display: flex;
38
+ align-items: center;
39
+ justify-content: space-between;
40
+ padding: 16px 24px;
41
+ background: var(--surface);
42
+ border-bottom: 1px solid var(--border);
43
+ flex-shrink: 0;
44
+ }
45
+
46
+ .logo {
47
+ display: flex;
48
+ align-items: center;
49
+ gap: 12px;
50
+ }
51
+
52
+ .logo-icon {
53
+ width: 40px;
54
+ height: 40px;
55
+ background: linear-gradient(135deg, #10b981, #06b6d4);
56
+ border-radius: 10px;
57
+ display: flex;
58
+ align-items: center;
59
+ justify-content: center;
60
+ font-size: 20px;
61
+ }
62
+
63
+ .logo h1 {
64
+ font-size: 20px;
65
+ font-weight: 700;
66
+ background: linear-gradient(135deg, #34d399, #22d3ee);
67
+ -webkit-background-clip: text;
68
+ -webkit-text-fill-color: transparent;
69
+ }
70
+
71
+ .logo span {
72
+ font-size: 12px;
73
+ color: var(--text-dim);
74
+ }
75
+
76
+ .status-badge {
77
+ display: flex;
78
+ align-items: center;
79
+ gap: 6px;
80
+ font-size: 13px;
81
+ color: var(--text-dim);
82
+ padding: 6px 14px;
83
+ background: var(--surface2);
84
+ border-radius: 20px;
85
+ }
86
+
87
+ .status-dot {
88
+ width: 8px;
89
+ height: 8px;
90
+ border-radius: 50%;
91
+ background: var(--danger);
92
+ }
93
+
94
+ .status-dot.online { background: var(--success); }
95
+
96
+ .chat-area {
97
+ flex: 1;
98
+ overflow-y: auto;
99
+ padding: 24px;
100
+ display: flex;
101
+ flex-direction: column;
102
+ gap: 16px;
103
+ }
104
+
105
+ .welcome {
106
+ text-align: center;
107
+ padding: 60px 20px;
108
+ max-width: 600px;
109
+ margin: auto;
110
+ }
111
+
112
+ .welcome-icon {
113
+ font-size: 64px;
114
+ margin-bottom: 16px;
115
+ }
116
+
117
+ .welcome h2 {
118
+ font-size: 24px;
119
+ margin-bottom: 8px;
120
+ color: var(--text);
121
+ }
122
+
123
+ .welcome p {
124
+ color: var(--text-dim);
125
+ line-height: 1.6;
126
+ margin-bottom: 24px;
127
+ }
128
+
129
+ .suggestions {
130
+ display: grid;
131
+ grid-template-columns: 1fr 1fr;
132
+ gap: 10px;
133
+ }
134
+
135
+ .suggestion {
136
+ padding: 14px 16px;
137
+ background: var(--surface2);
138
+ border: 1px solid var(--border);
139
+ border-radius: var(--radius);
140
+ cursor: pointer;
141
+ text-align: left;
142
+ color: var(--text-dim);
143
+ font-size: 13px;
144
+ transition: all 0.15s;
145
+ }
146
+
147
+ .suggestion:hover {
148
+ background: var(--surface);
149
+ border-color: var(--accent);
150
+ color: var(--text);
151
+ }
152
+
153
+ .message {
154
+ display: flex;
155
+ gap: 12px;
156
+ max-width: 800px;
157
+ width: 100%;
158
+ margin: 0 auto;
159
+ animation: fadeIn 0.3s ease;
160
+ }
161
+
162
+ @keyframes fadeIn {
163
+ from { opacity: 0; transform: translateY(8px); }
164
+ to { opacity: 1; transform: translateY(0); }
165
+ }
166
+
167
+ .message.user { flex-direction: row-reverse; }
168
+
169
+ .avatar {
170
+ width: 36px;
171
+ height: 36px;
172
+ border-radius: 10px;
173
+ display: flex;
174
+ align-items: center;
175
+ justify-content: center;
176
+ font-size: 16px;
177
+ flex-shrink: 0;
178
+ }
179
+
180
+ .message.bot .avatar { background: linear-gradient(135deg, #10b981, #06b6d4); }
181
+ .message.user .avatar { background: var(--user-bg); }
182
+
183
+ .bubble {
184
+ padding: 14px 18px;
185
+ border-radius: var(--radius);
186
+ line-height: 1.7;
187
+ font-size: 14px;
188
+ max-width: 70%;
189
+ word-break: break-word;
190
+ }
191
+
192
+ .bubble ol, .bubble ul {
193
+ margin: 8px 0;
194
+ padding-left: 24px;
195
+ }
196
+
197
+ .bubble ol li, .bubble ul li {
198
+ margin-bottom: 6px;
199
+ }
200
+
201
+ .bubble p {
202
+ margin: 6px 0;
203
+ }
204
+
205
+ .bubble p:first-child { margin-top: 0; }
206
+ .bubble p:last-child { margin-bottom: 0; }
207
+
208
+ .bubble strong { color: #6ee7b7; font-weight: 600; }
209
+
210
+ .bubble code {
211
+ background: rgba(255,255,255,0.08);
212
+ padding: 2px 6px;
213
+ border-radius: 4px;
214
+ font-size: 13px;
215
+ }
216
+
217
+ .bubble pre {
218
+ background: rgba(0,0,0,0.3);
219
+ padding: 12px;
220
+ border-radius: 8px;
221
+ overflow-x: auto;
222
+ margin: 8px 0;
223
+ }
224
+
225
+ .bubble pre code {
226
+ background: none;
227
+ padding: 0;
228
+ }
229
+
230
+ .bubble h3, .bubble h4 {
231
+ margin: 12px 0 6px;
232
+ color: #6ee7b7;
233
+ }
234
+
235
+ .message.user .bubble { white-space: pre-wrap; }
236
+
237
+ .message.bot .bubble { background: var(--bot-bg); border: 1px solid var(--border); }
238
+ .message.user .bubble { background: var(--user-bg); }
239
+
240
+ .bubble .typing-dots span {
241
+ display: inline-block;
242
+ width: 7px;
243
+ height: 7px;
244
+ margin: 0 2px;
245
+ border-radius: 50%;
246
+ background: var(--text-dim);
247
+ animation: bounce 1.4s infinite ease-in-out both;
248
+ }
249
+
250
+ .bubble .typing-dots span:nth-child(1) { animation-delay: -0.32s; }
251
+ .bubble .typing-dots span:nth-child(2) { animation-delay: -0.16s; }
252
+
253
+ @keyframes bounce {
254
+ 0%, 80%, 100% { transform: scale(0); }
255
+ 40% { transform: scale(1); }
256
+ }
257
+
258
+ .input-area {
259
+ padding: 16px 24px 24px;
260
+ background: var(--surface);
261
+ border-top: 1px solid var(--border);
262
+ flex-shrink: 0;
263
+ }
264
+
265
+ .input-wrap {
266
+ display: flex;
267
+ gap: 10px;
268
+ max-width: 800px;
269
+ margin: 0 auto;
270
+ }
271
+
272
+ .input-wrap textarea {
273
+ flex: 1;
274
+ resize: none;
275
+ border: 1px solid var(--border);
276
+ background: var(--surface2);
277
+ color: var(--text);
278
+ border-radius: var(--radius);
279
+ padding: 14px 18px;
280
+ font-size: 14px;
281
+ font-family: inherit;
282
+ line-height: 1.5;
283
+ outline: none;
284
+ transition: border-color 0.15s;
285
+ min-height: 52px;
286
+ max-height: 160px;
287
+ }
288
+
289
+ .input-wrap textarea:focus { border-color: var(--accent); }
290
+ .input-wrap textarea::placeholder { color: var(--text-dim); }
291
+
292
+ .input-wrap button {
293
+ padding: 14px 20px;
294
+ background: var(--accent);
295
+ color: #fff;
296
+ border: none;
297
+ border-radius: var(--radius);
298
+ font-size: 14px;
299
+ font-weight: 600;
300
+ cursor: pointer;
301
+ transition: background 0.15s;
302
+ display: flex;
303
+ align-items: center;
304
+ gap: 6px;
305
+ white-space: nowrap;
306
+ }
307
+
308
+ .input-wrap button:hover { background: var(--accent-hover); }
309
+ .input-wrap button:disabled { opacity: 0.5; cursor: not-allowed; }
310
+
311
+ .controls {
312
+ display: flex;
313
+ justify-content: space-between;
314
+ align-items: center;
315
+ max-width: 800px;
316
+ margin: 8px auto 0;
317
+ }
318
+
319
+ .disclaimer {
320
+ font-size: 11px;
321
+ color: var(--text-dim);
322
+ }
323
+
324
+ .clear-btn {
325
+ background: none;
326
+ border: 1px solid var(--border);
327
+ color: var(--text-dim);
328
+ padding: 5px 12px;
329
+ border-radius: 8px;
330
+ font-size: 12px;
331
+ cursor: pointer;
332
+ transition: all 0.15s;
333
+ }
334
+
335
+ .clear-btn:hover { border-color: var(--danger); color: var(--danger); }
336
+
337
+ @media (max-width: 640px) {
338
+ .suggestions { grid-template-columns: 1fr; }
339
+ .bubble { max-width: 85%; }
340
+ header { padding: 12px 16px; }
341
+ .chat-area { padding: 16px; }
342
+ .input-area { padding: 12px 16px 16px; }
343
+ }
344
+ </style>
345
+ </head>
346
+ <body>
347
+ <header>
348
+ <div class="logo">
349
+ <div class="logo-icon">&#x1F9EC;</div>
350
+ <div>
351
+ <h1>OpenBioLLM</h1>
352
+ <span>Medical Assistant &middot; OpenBioLLM-8B</span>
353
+ </div>
354
+ </div>
355
+ <div class="status-badge">
356
+ <div class="status-dot" id="statusDot"></div>
357
+ <span id="statusText">Connecting...</span>
358
+ </div>
359
+ </header>
360
+
361
+ <div class="chat-area" id="chatArea">
362
+ <div class="welcome" id="welcome">
363
+ <div class="welcome-icon">&#x1F9EC;</div>
364
+ <h2>OpenBioLLM Medical Assistant</h2>
365
+ <p>Ask me about symptoms, conditions, medications, or general health information.
366
+ Responses are for informational purposes only&mdash;always consult a healthcare professional.</p>
367
+ <div class="suggestions">
368
+ <div class="suggestion" onclick="useSuggestion(this)">What are common symptoms of type 2 diabetes?</div>
369
+ <div class="suggestion" onclick="useSuggestion(this)">Explain the difference between viral and bacterial infections</div>
370
+ <div class="suggestion" onclick="useSuggestion(this)">What are the risk factors for cardiovascular disease?</div>
371
+ <div class="suggestion" onclick="useSuggestion(this)">How does hypertension affect the body over time?</div>
372
+ </div>
373
+ </div>
374
+ </div>
375
+
376
+ <div class="input-area">
377
+ <div class="input-wrap">
378
+ <textarea id="msgInput" rows="1" placeholder="Describe your symptoms or ask a medical question..."
379
+ onkeydown="handleKey(event)" oninput="autoGrow(this)"></textarea>
380
+ <button id="sendBtn" onclick="sendMessage()">Send &#x27A4;</button>
381
+ </div>
382
+ <div class="controls">
383
+ <span class="disclaimer">&#x26A0; Not a substitute for professional medical advice.</span>
384
+ <button class="clear-btn" onclick="clearChat()">Clear chat</button>
385
+ </div>
386
+ </div>
387
+
388
+ <script>
389
+ const chatArea = document.getElementById('chatArea');
390
+ const msgInput = document.getElementById('msgInput');
391
+ const sendBtn = document.getElementById('sendBtn');
392
+ const statusDot = document.getElementById('statusDot');
393
+ const statusText= document.getElementById('statusText');
394
+ const welcome = document.getElementById('welcome');
395
+
396
+ let history = [];
397
+ let busy = false;
398
+
399
+ async function checkHealth() {
400
+ try {
401
+ const r = await fetch('/health');
402
+ const d = await r.json();
403
+ statusDot.classList.toggle('online', d.status === 'ok');
404
+ statusText.textContent = d.status === 'ok'
405
+ ? `Online \u2022 GPU ${d.gpu_memory_used_mb} MB`
406
+ : 'Error';
407
+ } catch {
408
+ statusDot.classList.remove('online');
409
+ statusText.textContent = 'Offline';
410
+ }
411
+ }
412
+ checkHealth();
413
+ setInterval(checkHealth, 15000);
414
+
415
+ function useSuggestion(el) {
416
+ msgInput.value = el.textContent;
417
+ msgInput.focus();
418
+ autoGrow(msgInput);
419
+ }
420
+
421
+ function handleKey(e) {
422
+ if (e.key === 'Enter' && !e.shiftKey) {
423
+ e.preventDefault();
424
+ sendMessage();
425
+ }
426
+ }
427
+
428
+ function autoGrow(el) {
429
+ el.style.height = 'auto';
430
+ el.style.height = Math.min(el.scrollHeight, 160) + 'px';
431
+ }
432
+
433
+ function appendMessage(role, content) {
434
+ if (welcome) welcome.style.display = 'none';
435
+ const div = document.createElement('div');
436
+ div.className = `message ${role}`;
437
+ const avatarChar = role === 'user' ? '\u{1F464}' : '\u{1F9EC}';
438
+ const rendered = role === 'bot' ? renderMarkdown(content) : escapeHtml(content);
439
+ div.innerHTML = `
440
+ <div class="avatar">${avatarChar}</div>
441
+ <div class="bubble">${rendered}</div>`;
442
+ chatArea.appendChild(div);
443
+ chatArea.scrollTop = chatArea.scrollHeight;
444
+ return div;
445
+ }
446
+
447
+ function showTyping() {
448
+ if (welcome) welcome.style.display = 'none';
449
+ const div = document.createElement('div');
450
+ div.className = 'message bot';
451
+ div.id = 'typing';
452
+ div.innerHTML = `
453
+ <div class="avatar">\u{1F9EC}</div>
454
+ <div class="bubble"><div class="typing-dots"><span></span><span></span><span></span></div></div>`;
455
+ chatArea.appendChild(div);
456
+ chatArea.scrollTop = chatArea.scrollHeight;
457
+ }
458
+
459
+ function removeTyping() {
460
+ const t = document.getElementById('typing');
461
+ if (t) t.remove();
462
+ }
463
+
464
+ function escapeHtml(s) {
465
+ const d = document.createElement('div');
466
+ d.textContent = s;
467
+ return d.innerHTML;
468
+ }
469
+
470
+ function renderMarkdown(text) {
471
+ let html = escapeHtml(text);
472
+
473
+ html = html.replace(/```(\w*)\n?([\s\S]*?)```/g, '<pre><code>$2</code></pre>');
474
+ html = html.replace(/`([^`]+)`/g, '<code>$1</code>');
475
+ html = html.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
476
+ html = html.replace(/\*(.+?)\*/g, '<em>$1</em>');
477
+ html = html.replace(/^####\s+(.+)$/gm, '<h4>$1</h4>');
478
+ html = html.replace(/^###\s+(.+)$/gm, '<h3>$1</h3>');
479
+
480
+ html = html.replace(/((?:^\d+[\.\)]\s+.+$\n?)+)/gm, function(block) {
481
+ const items = block.trim().split('\n').map(line =>
482
+ '<li>' + line.replace(/^\d+[\.\)]\s+/, '') + '</li>'
483
+ ).join('');
484
+ return '<ol>' + items + '</ol>';
485
+ });
486
+
487
+ html = html.replace(/((?:^[\-\*]\s+.+$\n?)+)/gm, function(block) {
488
+ const items = block.trim().split('\n').map(line =>
489
+ '<li>' + line.replace(/^[\-\*]\s+/, '') + '</li>'
490
+ ).join('');
491
+ return '<ul>' + items + '</ul>';
492
+ });
493
+
494
+ html = html.replace(/\n{2,}/g, '</p><p>');
495
+ html = html.replace(/\n/g, '<br>');
496
+ html = html.replace(/(<\/?(ol|ul|li|h[34]|pre|p)>)\s*<br>/g, '$1');
497
+ html = html.replace(/<br>\s*(<(ol|ul|li|h[34]|pre|p)[ >])/g, '$1');
498
+
499
+ if (!/^\s*<(ol|ul|h[34]|pre|p)/.test(html)) {
500
+ html = '<p>' + html + '</p>';
501
+ }
502
+
503
+ return html;
504
+ }
505
+
506
+ async function sendMessage() {
507
+ const text = msgInput.value.trim();
508
+ if (!text || busy) return;
509
+
510
+ busy = true;
511
+ sendBtn.disabled = true;
512
+ msgInput.value = '';
513
+ msgInput.style.height = 'auto';
514
+
515
+ appendMessage('user', text);
516
+ history.push({ role: 'user', content: text });
517
+
518
+ showTyping();
519
+
520
+ try {
521
+ const resp = await fetch('/v1/chat', {
522
+ method: 'POST',
523
+ headers: { 'Content-Type': 'application/json' },
524
+ body: JSON.stringify({
525
+ messages: history,
526
+ max_new_tokens: 512,
527
+ temperature: 0.7,
528
+ stream: false
529
+ })
530
+ });
531
+
532
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
533
+ const data = await resp.json();
534
+
535
+ removeTyping();
536
+ appendMessage('bot', data.content);
537
+ history.push({ role: 'assistant', content: data.content });
538
+ } catch (err) {
539
+ removeTyping();
540
+ appendMessage('bot', `Error: ${err.message}. Please try again.`);
541
+ }
542
+
543
+ busy = false;
544
+ sendBtn.disabled = false;
545
+ msgInput.focus();
546
+ }
547
+
548
+ function clearChat() {
549
+ history = [];
550
+ chatArea.innerHTML = '';
551
+ if (welcome) {
552
+ welcome.style.display = '';
553
+ chatArea.appendChild(welcome);
554
+ }
555
+ }
556
+ </script>
557
+ </body>
558
+ </html>
run.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ export PYTHONPATH="${SCRIPT_DIR}/source:${PYTHONPATH:-}"
6
+ export MODEL_PATH="${MODEL_PATH:-/root/openbiollm-model}"
7
+ export HOST="${HOST:-0.0.0.0}"
8
+ export PORT="${PORT:-8001}"
9
+
10
+ echo "============================================"
11
+ echo " OpenBioLLM Medical Chatbot Server"
12
+ echo "============================================"
13
+ echo " Source libs : ${SCRIPT_DIR}/source"
14
+ echo " Model path : ${MODEL_PATH}"
15
+ echo " Listening on: http://${HOST}:${PORT}"
16
+ echo " Dashboard : http://${HOST}:${PORT}/"
17
+ echo "============================================"
18
+
19
+ cd "${SCRIPT_DIR}"
20
+ exec python3 server.py
server.log ADDED
The diff for this file is too large to render. See raw diff
 
server.py ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ SERVER_DIR = os.path.dirname(os.path.abspath(__file__))
5
+ SOURCE_DIR = os.path.join(SERVER_DIR, "source")
6
+ if os.path.isdir(SOURCE_DIR):
7
+ sys.path.insert(0, SOURCE_DIR)
8
+
9
+ import re
10
+ import uuid
11
+ from contextlib import asynccontextmanager
12
+
13
+ import torch
14
+ import uvicorn
15
+ from fastapi import FastAPI, HTTPException
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from starlette.responses import HTMLResponse, StreamingResponse
18
+ from pydantic import BaseModel, Field
19
+ from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams
20
+
21
+ MODEL_PATH = os.environ.get("MODEL_PATH", "/root/openbiollm-model")
22
+ HOST = os.environ.get("HOST", "0.0.0.0")
23
+ PORT = int(os.environ.get("PORT", "8001"))
24
+ DASHBOARD_DIR = os.path.join(SERVER_DIR, "dashboard")
25
+
26
+ SYSTEM_PROMPT = (
27
+ "You are OpenBioLLM, a medical AI assistant. You provide helpful, accurate, "
28
+ "and evidence-based medical information.\n\n"
29
+ "Response format rules:\n\n"
30
+ "1. Start with a clear, direct one-sentence answer to the question.\n\n"
31
+ "2. Then organize the rest of your response into labeled sections. "
32
+ "Use section headers like 'Definition:', 'Common uses:', 'Drug class:', "
33
+ "'Symptoms:', 'Causes:', 'Treatment:', 'Safety:', 'Key points:' etc. "
34
+ "Put each section header on its own line followed by a newline.\n\n"
35
+ "3. Under each section, list items one per line using '- ' bullet points.\n\n"
36
+ "4. Leave a blank line between each section.\n\n"
37
+ "5. Keep each bullet point short and clear (one idea per bullet).\n\n"
38
+ "6. At the end, add a 'Safety note:' or 'Important:' section for warnings.\n\n"
39
+ "7. Stay on topic. Only answer what was asked. "
40
+ "Do NOT generate unrelated content, fictional patient cases, or diagnosis codes.\n\n"
41
+ "8. Stop when you have fully answered. Do not keep writing.\n\n"
42
+ "9. End with a one-line disclaimer that this is for informational purposes only."
43
+ )
44
+
45
+ STOP_PATTERNS = [
46
+ "The following is a case",
47
+ "The patient is a",
48
+ "Diagnosis code:",
49
+ "Treatment code:",
50
+ "The following sections provide",
51
+ "## Further reading",
52
+ "## References",
53
+ "Further reading",
54
+ "End of interaction",
55
+ "System Response",
56
+ "The following is an example",
57
+ "This sample response",
58
+ "<|eot_id|>",
59
+ "<|start_header_id|>",
60
+ ]
61
+
62
+
63
+ _SECTION_RE = re.compile(
64
+ r'(?:^|\.\s+|\n\s*)'
65
+ r'((?:Definition|Common uses?|Drug class|Symptoms?|Causes?|Diagnosis'
66
+ r'|Treatments?|Safety(?: note)?|Important|Mechanism|Side effects?'
67
+ r'|Precautions?|Dosage|Key points?|Overview|Risk factors?'
68
+ r'|Complications?|Prevention|When to see a doctor|Warning'
69
+ r'|How it works|What it(?:\'s| is) used for|Disclaimer)\s*:\s*)',
70
+ re.IGNORECASE,
71
+ )
72
+
73
+ _INLINE_LIST_RE = re.compile(
74
+ r'(?:(?:such as|including|like|e\.g\.|for example|include)\s+)'
75
+ r'([^.!?]{10,}(?:,\s*(?:and\s+)?[^.!?]+)+)',
76
+ re.IGNORECASE,
77
+ )
78
+
79
+ _PREAMBLE_RE = re.compile(
80
+ r'^(?:[^\n]*\n)*?[^\n]*'
81
+ r'(?:System Response|sample response|example (?:system )?response|'
82
+ r'The exact response will vary|Responses? (?:may|should|will) differ)'
83
+ r'[^\n]*\n+',
84
+ re.IGNORECASE,
85
+ )
86
+
87
+
88
+ def _truncate_hallucination(text: str) -> str:
89
+ for pattern in STOP_PATTERNS:
90
+ idx = text.find(pattern)
91
+ if idx > len(text) * 0.15:
92
+ text = text[:idx]
93
+
94
+ text = _PREAMBLE_RE.sub('', text)
95
+ return text.strip()
96
+
97
+
98
+ def _trim_incomplete(text: str) -> str:
99
+ if not text:
100
+ return text
101
+ if not text.endswith((".", "!", "?")):
102
+ last_end = max(text.rfind("."), text.rfind("!"), text.rfind("?"))
103
+ if last_end > 0:
104
+ text = text[:last_end + 1]
105
+ return text
106
+
107
+
108
+ def _split_into_sections(text: str) -> list[tuple[str, str]]:
109
+ parts = _SECTION_RE.split(text)
110
+ sections: list[tuple[str, str]] = []
111
+
112
+ if parts and not _SECTION_RE.match(parts[0].strip() + ":"):
113
+ first = parts.pop(0).strip()
114
+ if first:
115
+ sections.append(("", first))
116
+
117
+ i = 0
118
+ while i < len(parts):
119
+ header = parts[i].strip().rstrip(":")
120
+ body = parts[i + 1].strip() if i + 1 < len(parts) else ""
121
+ if header and body:
122
+ sections.append((header, body))
123
+ elif header and not body:
124
+ sections.append((header, ""))
125
+ i += 2
126
+
127
+ return sections
128
+
129
+
130
+ def _expand_inline_lists(text: str) -> str:
131
+ def _replacer(m: re.Match) -> str:
132
+ items_str = m.group(1)
133
+ items = re.split(r',\s*(?:and\s+)?', items_str)
134
+ items = [it.strip().rstrip(".").strip() for it in items if it.strip()]
135
+ if len(items) < 2:
136
+ return m.group(0)
137
+ prefix = m.group(0)[:m.start(1) - m.start(0)]
138
+ bullet_block = "\n".join(f"- {it.capitalize()}" for it in items)
139
+ return f"{prefix.rstrip()}\n\n{bullet_block}"
140
+
141
+ return _INLINE_LIST_RE.sub(_replacer, text)
142
+
143
+
144
+ def _sentences_to_bullets(text: str) -> str:
145
+ sentences = re.split(r'(?<=[.!?])\s+', text)
146
+ if len(sentences) < 3:
147
+ return text
148
+
149
+ bullets: list[str] = []
150
+ for s in sentences:
151
+ s = s.strip().rstrip(".")
152
+ if not s:
153
+ continue
154
+ parts = s.split(":", 1)
155
+ if len(parts) == 2 and len(parts[0]) < 40:
156
+ bullets.append(f"- **{parts[0].strip()}**: {parts[1].strip()}")
157
+ else:
158
+ bullets.append(f"- {s}")
159
+
160
+ return "\n".join(bullets)
161
+
162
+
163
+ def _fix_numbered_list(text: str) -> str:
164
+ if not re.search(r'\d+\.\s', text):
165
+ return text
166
+
167
+ items = re.split(r'(?<=[.!?])\s*(?=\d+\.\s)', text)
168
+ if len(items) < 2:
169
+ items = re.split(r'\s+(?=\d+\.\s)', text)
170
+ if len(items) < 2:
171
+ return text
172
+
173
+ result: list[str] = []
174
+ counter = 0
175
+ for item in items:
176
+ item = item.strip()
177
+ if not item:
178
+ continue
179
+ cleaned = re.sub(r'^\d+\.\s*', '', item)
180
+ if cleaned != item:
181
+ counter += 1
182
+ result.append(f"{counter}. {cleaned}")
183
+ else:
184
+ result.append(item)
185
+
186
+ return "\n".join(result)
187
+
188
+
189
+ def format_response(text: str) -> str:
190
+ text = _truncate_hallucination(text)
191
+ if not text:
192
+ return text
193
+
194
+ text = _trim_incomplete(text)
195
+ text = re.sub(r'\n{3,}', '\n\n', text)
196
+ text = re.sub(r'\s{2,}-\s+', '\n- ', text)
197
+
198
+ if '\n\n' in text and re.search(r'\n- ', text):
199
+ return text.strip()
200
+
201
+ sections = _split_into_sections(text)
202
+
203
+ if len(sections) <= 1 and sections:
204
+ header, body = sections[0]
205
+ body = _fix_numbered_list(body)
206
+
207
+ if re.search(r'\d+\.\s', body) and "\n" in body:
208
+ return body.strip()
209
+
210
+ body = _expand_inline_lists(body)
211
+ if "\n- " in body:
212
+ return body.strip()
213
+
214
+ sentences = re.split(r'(?<=[.!?])\s+', body)
215
+ if len(sentences) >= 4:
216
+ intro = sentences[0]
217
+ rest_text = " ".join(sentences[1:])
218
+ bullets = _sentences_to_bullets(rest_text)
219
+ return f"{intro}\n\n{bullets}".strip()
220
+
221
+ return body.strip()
222
+
223
+ output_parts: list[str] = []
224
+ for header, body in sections:
225
+ if not header and body:
226
+ output_parts.append(body)
227
+ continue
228
+
229
+ body = _fix_numbered_list(body)
230
+ body = _expand_inline_lists(body)
231
+
232
+ if "\n" not in body and not body.startswith("-"):
233
+ sentences = re.split(r'(?<=[.!?])\s+', body)
234
+ if len(sentences) >= 3:
235
+ body = _sentences_to_bullets(body)
236
+
237
+ section_text = f"**{header}**\n\n{body}" if body else f"**{header}**"
238
+ output_parts.append(section_text)
239
+
240
+ result = "\n\n".join(output_parts)
241
+
242
+ if not re.search(
243
+ r'(?i)disclaimer|informational purposes|not.{0,20}replace.{0,30}medical advice',
244
+ result,
245
+ ):
246
+ result += (
247
+ "\n\n⚠️ *This information is for educational purposes only "
248
+ "and should not replace professional medical advice.*"
249
+ )
250
+
251
+ return result.strip()
252
+
253
+
254
+ engine = None
255
+
256
+
257
+ @asynccontextmanager
258
+ async def lifespan(app: FastAPI):
259
+ global engine
260
+ print(f"Loading model from {MODEL_PATH} via vLLM ...")
261
+
262
+ engine_args = AsyncEngineArgs(
263
+ model=MODEL_PATH,
264
+ dtype="bfloat16",
265
+ max_model_len=2048,
266
+ gpu_memory_utilization=0.40,
267
+ enforce_eager=True,
268
+ )
269
+ engine = AsyncLLMEngine.from_engine_args(engine_args)
270
+
271
+ print(f"vLLM engine ready")
272
+ print(f"Dashboard available at http://{HOST}:{PORT}/")
273
+ yield
274
+ engine = None
275
+ torch.cuda.empty_cache()
276
+
277
+
278
+ app = FastAPI(
279
+ title="OpenBioLLM Medical Chatbot",
280
+ description="Medical AI chatbot powered by OpenBioLLM-8B (vLLM)",
281
+ version="1.0.0",
282
+ lifespan=lifespan,
283
+ )
284
+
285
+ app.add_middleware(
286
+ CORSMiddleware,
287
+ allow_origins=["*"],
288
+ allow_credentials=True,
289
+ allow_methods=["*"],
290
+ allow_headers=["*"],
291
+ )
292
+
293
+
294
+ class Message(BaseModel):
295
+ role: str = Field(..., pattern="^(user|assistant|system)$")
296
+ content: str
297
+
298
+
299
+ class ChatRequest(BaseModel):
300
+ messages: list[Message]
301
+ max_new_tokens: int = Field(default=512, ge=1, le=2048)
302
+ temperature: float = Field(default=0.7, ge=0.01, le=2.0)
303
+ top_p: float = Field(default=0.9, ge=0.0, le=1.0)
304
+ stream: bool = False
305
+
306
+
307
+ class ChatResponse(BaseModel):
308
+ id: str
309
+ content: str
310
+ finish_reason: str
311
+ usage: dict
312
+
313
+
314
+ def build_prompt(messages: list[Message]) -> str:
315
+ """Build Llama 3 chat-format prompt."""
316
+ parts: list[str] = ["<|begin_of_text|>"]
317
+
318
+ has_system = any(m.role == "system" for m in messages)
319
+ if not has_system:
320
+ parts.append(
321
+ "<|start_header_id|>system<|end_header_id|>\n\n"
322
+ f"{SYSTEM_PROMPT}<|eot_id|>"
323
+ )
324
+
325
+ for msg in messages:
326
+ parts.append(
327
+ f"<|start_header_id|>{msg.role}<|end_header_id|>\n\n"
328
+ f"{msg.content}<|eot_id|>"
329
+ )
330
+
331
+ parts.append("<|start_header_id|>assistant<|end_header_id|>\n\n")
332
+ return "".join(parts)
333
+
334
+
335
+ @app.get("/health")
336
+ async def health():
337
+ return {
338
+ "status": "ok",
339
+ "model": MODEL_PATH,
340
+ "engine": "vLLM",
341
+ "gpu_memory_used_mb": round(torch.cuda.memory_allocated() / 1024 / 1024, 1)
342
+ if torch.cuda.is_available()
343
+ else None,
344
+ }
345
+
346
+
347
+ @app.post("/v1/chat", response_model=ChatResponse)
348
+ async def chat(req: ChatRequest):
349
+ if not req.messages or req.messages[-1].role != "user":
350
+ raise HTTPException(400, "Last message must be from the user.")
351
+
352
+ prompt = build_prompt(req.messages)
353
+ request_id = f"bio-{uuid.uuid4().hex[:12]}"
354
+
355
+ sampling_params = SamplingParams(
356
+ max_tokens=req.max_new_tokens,
357
+ temperature=req.temperature,
358
+ top_p=req.top_p,
359
+ stop=STOP_PATTERNS,
360
+ repetition_penalty=1.15,
361
+ )
362
+
363
+ if req.stream:
364
+ return _stream_response(request_id, prompt, sampling_params)
365
+
366
+ full_text = ""
367
+ prompt_tokens = 0
368
+ completion_tokens = 0
369
+
370
+ async for result in engine.generate(prompt, sampling_params, request_id):
371
+ final = result
372
+
373
+ output = final.outputs[0]
374
+ full_text = format_response(output.text)
375
+ prompt_tokens = len(final.prompt_token_ids)
376
+ completion_tokens = len(output.token_ids)
377
+
378
+ return ChatResponse(
379
+ id=request_id,
380
+ content=full_text,
381
+ finish_reason=output.finish_reason or "stop",
382
+ usage={
383
+ "prompt_tokens": prompt_tokens,
384
+ "completion_tokens": completion_tokens,
385
+ "total_tokens": prompt_tokens + completion_tokens,
386
+ },
387
+ )
388
+
389
+
390
+ def _stream_response(request_id: str, prompt: str, sampling_params: SamplingParams):
391
+ async def token_generator():
392
+ accumulated = ""
393
+ sent_len = 0
394
+ async for result in engine.generate(prompt, sampling_params, request_id):
395
+ output = result.outputs[0]
396
+ accumulated = output.text
397
+
398
+ formatted = format_response(accumulated)
399
+ new_text = formatted[sent_len:]
400
+ if new_text:
401
+ sent_len = len(formatted)
402
+ yield f"data: {new_text}\n\n"
403
+ yield "data: [DONE]\n\n"
404
+
405
+ return StreamingResponse(token_generator(), media_type="text/event-stream")
406
+
407
+
408
+ @app.post("/v1/diagnose")
409
+ async def diagnose(req: ChatRequest):
410
+ if not req.messages or req.messages[-1].role != "user":
411
+ raise HTTPException(400, "Last message must be from the user.")
412
+
413
+ last = req.messages[-1]
414
+ last.content = (
415
+ f"{last.content}\n\n"
416
+ "Please provide a detailed and comprehensive diagnostic analysis of this medical record."
417
+ )
418
+ return await chat(req)
419
+
420
+
421
+ @app.get("/", include_in_schema=False)
422
+ async def serve_dashboard():
423
+ dashboard_path = os.path.join(DASHBOARD_DIR, "index.html")
424
+ with open(dashboard_path, "r") as f:
425
+ html = f.read()
426
+ return HTMLResponse(content=html)
427
+
428
+
429
+ if __name__ == "__main__":
430
+ uvicorn.run(
431
+ app,
432
+ host=HOST,
433
+ port=PORT,
434
+ log_level="info",
435
+ )