Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

dashboard/index.html +558 -0
run.sh +20 -0
server.log +0 -0
server.py +435 -0

dashboard/index.html ADDED Viewed

	@@ -0,0 +1,558 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>OpenBioLLM - Medical AI Assistant</title>
+  <style>
+    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+    :root {
+      --bg: #0f1117;
+      --surface: #1a1d27;
+      --surface2: #242736;
+      --border: #2e3144;
+      --text: #e4e4e7;
+      --text-dim: #9ca3af;
+      --accent: #10b981;
+      --accent-hover: #059669;
+      --user-bg: #1e3a5f;
+      --bot-bg: #1f2937;
+      --danger: #ef4444;
+      --success: #22c55e;
+      --radius: 12px;
+    }
+    html, body { height: 100%; }
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+      display: flex;
+      flex-direction: column;
+    }
+    header {
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      padding: 16px 24px;
+      background: var(--surface);
+      border-bottom: 1px solid var(--border);
+      flex-shrink: 0;
+    }
+    .logo {
+      display: flex;
+      align-items: center;
+      gap: 12px;
+    }
+    .logo-icon {
+      width: 40px;
+      height: 40px;
+      background: linear-gradient(135deg, #10b981, #06b6d4);
+      border-radius: 10px;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      font-size: 20px;
+    }
+    .logo h1 {
+      font-size: 20px;
+      font-weight: 700;
+      background: linear-gradient(135deg, #34d399, #22d3ee);
+      -webkit-background-clip: text;
+      -webkit-text-fill-color: transparent;
+    }
+    .logo span {
+      font-size: 12px;
+      color: var(--text-dim);
+    }
+    .status-badge {
+      display: flex;
+      align-items: center;
+      gap: 6px;
+      font-size: 13px;
+      color: var(--text-dim);
+      padding: 6px 14px;
+      background: var(--surface2);
+      border-radius: 20px;
+    }
+    .status-dot {
+      width: 8px;
+      height: 8px;
+      border-radius: 50%;
+      background: var(--danger);
+    }
+    .status-dot.online { background: var(--success); }
+    .chat-area {
+      flex: 1;
+      overflow-y: auto;
+      padding: 24px;
+      display: flex;
+      flex-direction: column;
+      gap: 16px;
+    }
+    .welcome {
+      text-align: center;
+      padding: 60px 20px;
+      max-width: 600px;
+      margin: auto;
+    }
+    .welcome-icon {
+      font-size: 64px;
+      margin-bottom: 16px;
+    }
+    .welcome h2 {
+      font-size: 24px;
+      margin-bottom: 8px;
+      color: var(--text);
+    }
+    .welcome p {
+      color: var(--text-dim);
+      line-height: 1.6;
+      margin-bottom: 24px;
+    }
+    .suggestions {
+      display: grid;
+      grid-template-columns: 1fr 1fr;
+      gap: 10px;
+    }
+    .suggestion {
+      padding: 14px 16px;
+      background: var(--surface2);
+      border: 1px solid var(--border);
+      border-radius: var(--radius);
+      cursor: pointer;
+      text-align: left;
+      color: var(--text-dim);
+      font-size: 13px;
+      transition: all 0.15s;
+    }
+    .suggestion:hover {
+      background: var(--surface);
+      border-color: var(--accent);
+      color: var(--text);
+    }
+    .message {
+      display: flex;
+      gap: 12px;
+      max-width: 800px;
+      width: 100%;
+      margin: 0 auto;
+      animation: fadeIn 0.3s ease;
+    }
+    @keyframes fadeIn {
+      from { opacity: 0; transform: translateY(8px); }
+      to { opacity: 1; transform: translateY(0); }
+    }
+    .message.user { flex-direction: row-reverse; }
+    .avatar {
+      width: 36px;
+      height: 36px;
+      border-radius: 10px;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      font-size: 16px;
+      flex-shrink: 0;
+    }
+    .message.bot .avatar { background: linear-gradient(135deg, #10b981, #06b6d4); }
+    .message.user .avatar { background: var(--user-bg); }
+    .bubble {
+      padding: 14px 18px;
+      border-radius: var(--radius);
+      line-height: 1.7;
+      font-size: 14px;
+      max-width: 70%;
+      word-break: break-word;
+    }
+    .bubble ol, .bubble ul {
+      margin: 8px 0;
+      padding-left: 24px;
+    }
+    .bubble ol li, .bubble ul li {
+      margin-bottom: 6px;
+    }
+    .bubble p {
+      margin: 6px 0;
+    }
+    .bubble p:first-child { margin-top: 0; }
+    .bubble p:last-child { margin-bottom: 0; }
+    .bubble strong { color: #6ee7b7; font-weight: 600; }
+    .bubble code {
+      background: rgba(255,255,255,0.08);
+      padding: 2px 6px;
+      border-radius: 4px;
+      font-size: 13px;
+    }
+    .bubble pre {
+      background: rgba(0,0,0,0.3);
+      padding: 12px;
+      border-radius: 8px;
+      overflow-x: auto;
+      margin: 8px 0;
+    }
+    .bubble pre code {
+      background: none;
+      padding: 0;
+    }
+    .bubble h3, .bubble h4 {
+      margin: 12px 0 6px;
+      color: #6ee7b7;
+    }
+    .message.user .bubble { white-space: pre-wrap; }
+    .message.bot .bubble { background: var(--bot-bg); border: 1px solid var(--border); }
+    .message.user .bubble { background: var(--user-bg); }
+    .bubble .typing-dots span {
+      display: inline-block;
+      width: 7px;
+      height: 7px;
+      margin: 0 2px;
+      border-radius: 50%;
+      background: var(--text-dim);
+      animation: bounce 1.4s infinite ease-in-out both;
+    }
+    .bubble .typing-dots span:nth-child(1) { animation-delay: -0.32s; }
+    .bubble .typing-dots span:nth-child(2) { animation-delay: -0.16s; }
+    @keyframes bounce {
+      0%, 80%, 100% { transform: scale(0); }
+      40% { transform: scale(1); }
+    }
+    .input-area {
+      padding: 16px 24px 24px;
+      background: var(--surface);
+      border-top: 1px solid var(--border);
+      flex-shrink: 0;
+    }
+    .input-wrap {
+      display: flex;
+      gap: 10px;
+      max-width: 800px;
+      margin: 0 auto;
+    }
+    .input-wrap textarea {
+      flex: 1;
+      resize: none;
+      border: 1px solid var(--border);
+      background: var(--surface2);
+      color: var(--text);
+      border-radius: var(--radius);
+      padding: 14px 18px;
+      font-size: 14px;
+      font-family: inherit;
+      line-height: 1.5;
+      outline: none;
+      transition: border-color 0.15s;
+      min-height: 52px;
+      max-height: 160px;
+    }
+    .input-wrap textarea:focus { border-color: var(--accent); }
+    .input-wrap textarea::placeholder { color: var(--text-dim); }
+    .input-wrap button {
+      padding: 14px 20px;
+      background: var(--accent);
+      color: #fff;
+      border: none;
+      border-radius: var(--radius);
+      font-size: 14px;
+      font-weight: 600;
+      cursor: pointer;
+      transition: background 0.15s;
+      display: flex;
+      align-items: center;
+      gap: 6px;
+      white-space: nowrap;
+    }
+    .input-wrap button:hover { background: var(--accent-hover); }
+    .input-wrap button:disabled { opacity: 0.5; cursor: not-allowed; }
+    .controls {
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+      max-width: 800px;
+      margin: 8px auto 0;
+    }
+    .disclaimer {
+      font-size: 11px;
+      color: var(--text-dim);
+    }
+    .clear-btn {
+      background: none;
+      border: 1px solid var(--border);
+      color: var(--text-dim);
+      padding: 5px 12px;
+      border-radius: 8px;
+      font-size: 12px;
+      cursor: pointer;
+      transition: all 0.15s;
+    }
+    .clear-btn:hover { border-color: var(--danger); color: var(--danger); }
+    @media (max-width: 640px) {
+      .suggestions { grid-template-columns: 1fr; }
+      .bubble { max-width: 85%; }
+      header { padding: 12px 16px; }
+      .chat-area { padding: 16px; }
+      .input-area { padding: 12px 16px 16px; }
+    }
+  </style>
+</head>
+<body>
+  <header>
+    <div class="logo">
+      <div class="logo-icon">&#x1F9EC;</div>
+      <div>
+        <h1>OpenBioLLM</h1>
+        <span>Medical Assistant &middot; OpenBioLLM-8B</span>
+      </div>
+    </div>
+    <div class="status-badge">
+      <div class="status-dot" id="statusDot"></div>
+      <span id="statusText">Connecting...</span>
+    </div>
+  </header>
+  <div class="chat-area" id="chatArea">
+    <div class="welcome" id="welcome">
+      <div class="welcome-icon">&#x1F9EC;</div>
+      <h2>OpenBioLLM Medical Assistant</h2>
+      <p>Ask me about symptoms, conditions, medications, or general health information.
+         Responses are for informational purposes only&mdash;always consult a healthcare professional.</p>
+      <div class="suggestions">
+        <div class="suggestion" onclick="useSuggestion(this)">What are common symptoms of type 2 diabetes?</div>
+        <div class="suggestion" onclick="useSuggestion(this)">Explain the difference between viral and bacterial infections</div>
+        <div class="suggestion" onclick="useSuggestion(this)">What are the risk factors for cardiovascular disease?</div>
+        <div class="suggestion" onclick="useSuggestion(this)">How does hypertension affect the body over time?</div>
+      </div>
+    </div>
+  </div>
+  <div class="input-area">
+    <div class="input-wrap">
+      <textarea id="msgInput" rows="1" placeholder="Describe your symptoms or ask a medical question..."
+                onkeydown="handleKey(event)" oninput="autoGrow(this)"></textarea>
+      <button id="sendBtn" onclick="sendMessage()">Send &#x27A4;</button>
+    </div>
+    <div class="controls">
+      <span class="disclaimer">&#x26A0; Not a substitute for professional medical advice.</span>
+      <button class="clear-btn" onclick="clearChat()">Clear chat</button>
+    </div>
+  </div>
+  <script>
+    const chatArea  = document.getElementById('chatArea');
+    const msgInput  = document.getElementById('msgInput');
+    const sendBtn   = document.getElementById('sendBtn');
+    const statusDot = document.getElementById('statusDot');
+    const statusText= document.getElementById('statusText');
+    const welcome   = document.getElementById('welcome');
+    let history = [];
+    let busy = false;
+    async function checkHealth() {
+      try {
+        const r = await fetch('/health');
+        const d = await r.json();
+        statusDot.classList.toggle('online', d.status === 'ok');
+        statusText.textContent = d.status === 'ok'
+          ? `Online \u2022 GPU ${d.gpu_memory_used_mb} MB`
+          : 'Error';
+      } catch {
+        statusDot.classList.remove('online');
+        statusText.textContent = 'Offline';
+      }
+    }
+    checkHealth();
+    setInterval(checkHealth, 15000);
+    function useSuggestion(el) {
+      msgInput.value = el.textContent;
+      msgInput.focus();
+      autoGrow(msgInput);
+    }
+    function handleKey(e) {
+      if (e.key === 'Enter' && !e.shiftKey) {
+        e.preventDefault();
+        sendMessage();
+      }
+    }
+    function autoGrow(el) {
+      el.style.height = 'auto';
+      el.style.height = Math.min(el.scrollHeight, 160) + 'px';
+    }
+    function appendMessage(role, content) {
+      if (welcome) welcome.style.display = 'none';
+      const div = document.createElement('div');
+      div.className = `message ${role}`;
+      const avatarChar = role === 'user' ? '\u{1F464}' : '\u{1F9EC}';
+      const rendered = role === 'bot' ? renderMarkdown(content) : escapeHtml(content);
+      div.innerHTML = `
+        <div class="avatar">${avatarChar}</div>
+        <div class="bubble">${rendered}</div>`;
+      chatArea.appendChild(div);
+      chatArea.scrollTop = chatArea.scrollHeight;
+      return div;
+    }
+    function showTyping() {
+      if (welcome) welcome.style.display = 'none';
+      const div = document.createElement('div');
+      div.className = 'message bot';
+      div.id = 'typing';
+      div.innerHTML = `
+        <div class="avatar">\u{1F9EC}</div>
+        <div class="bubble"><div class="typing-dots"><span></span><span></span><span></span></div></div>`;
+      chatArea.appendChild(div);
+      chatArea.scrollTop = chatArea.scrollHeight;
+    }
+    function removeTyping() {
+      const t = document.getElementById('typing');
+      if (t) t.remove();
+    }
+    function escapeHtml(s) {
+      const d = document.createElement('div');
+      d.textContent = s;
+      return d.innerHTML;
+    }
+    function renderMarkdown(text) {
+      let html = escapeHtml(text);
+      html = html.replace(/```(\w*)\n?([\s\S]*?)```/g, '<pre><code>$2</code></pre>');
+      html = html.replace(/`([^`]+)`/g, '<code>$1</code>');
+      html = html.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
+      html = html.replace(/\*(.+?)\*/g, '<em>$1</em>');
+      html = html.replace(/^####\s+(.+)$/gm, '<h4>$1</h4>');
+      html = html.replace(/^###\s+(.+)$/gm, '<h3>$1</h3>');
+      html = html.replace(/((?:^\d+[\.\)]\s+.+$\n?)+)/gm, function(block) {
+        const items = block.trim().split('\n').map(line =>
+          '<li>' + line.replace(/^\d+[\.\)]\s+/, '') + '</li>'
+        ).join('');
+        return '<ol>' + items + '</ol>';
+      });
+      html = html.replace(/((?:^[\-\*]\s+.+$\n?)+)/gm, function(block) {
+        const items = block.trim().split('\n').map(line =>
+          '<li>' + line.replace(/^[\-\*]\s+/, '') + '</li>'
+        ).join('');
+        return '<ul>' + items + '</ul>';
+      });
+      html = html.replace(/\n{2,}/g, '</p><p>');
+      html = html.replace(/\n/g, '<br>');
+      html = html.replace(/(<\/?(ol|ul|li|h[34]|pre|p)>)\s*<br>/g, '$1');
+      html = html.replace(/<br>\s*(<(ol|ul|li|h[34]|pre|p)[ >])/g, '$1');
+      if (!/^\s*<(ol|ul|h[34]|pre|p)/.test(html)) {
+        html = '<p>' + html + '</p>';
+      }
+      return html;
+    }
+    async function sendMessage() {
+      const text = msgInput.value.trim();
+      if (!text || busy) return;
+      busy = true;
+      sendBtn.disabled = true;
+      msgInput.value = '';
+      msgInput.style.height = 'auto';
+      appendMessage('user', text);
+      history.push({ role: 'user', content: text });
+      showTyping();
+      try {
+        const resp = await fetch('/v1/chat', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            messages: history,
+            max_new_tokens: 512,
+            temperature: 0.7,
+            stream: false
+          })
+        });
+        if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
+        const data = await resp.json();
+        removeTyping();
+        appendMessage('bot', data.content);
+        history.push({ role: 'assistant', content: data.content });
+      } catch (err) {
+        removeTyping();
+        appendMessage('bot', `Error: ${err.message}. Please try again.`);
+      }
+      busy = false;
+      sendBtn.disabled = false;
+      msgInput.focus();
+    }
+    function clearChat() {
+      history = [];
+      chatArea.innerHTML = '';
+      if (welcome) {
+        welcome.style.display = '';
+        chatArea.appendChild(welcome);
+      }
+    }
+  </script>
+</body>
+</html>

run.sh ADDED Viewed

	@@ -0,0 +1,20 @@

+#!/usr/bin/env bash
+set -e
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+export PYTHONPATH="${SCRIPT_DIR}/source:${PYTHONPATH:-}"
+export MODEL_PATH="${MODEL_PATH:-/root/openbiollm-model}"
+export HOST="${HOST:-0.0.0.0}"
+export PORT="${PORT:-8001}"
+echo "============================================"
+echo "  OpenBioLLM Medical Chatbot Server"
+echo "============================================"
+echo "  Source libs : ${SCRIPT_DIR}/source"
+echo "  Model path  : ${MODEL_PATH}"
+echo "  Listening on: http://${HOST}:${PORT}"
+echo "  Dashboard   : http://${HOST}:${PORT}/"
+echo "============================================"
+cd "${SCRIPT_DIR}"
+exec python3 server.py

server.log ADDED Viewed

The diff for this file is too large to render. See raw diff

server.py ADDED Viewed

	@@ -0,0 +1,435 @@

+import os
+import sys
+SERVER_DIR = os.path.dirname(os.path.abspath(__file__))
+SOURCE_DIR = os.path.join(SERVER_DIR, "source")
+if os.path.isdir(SOURCE_DIR):
+    sys.path.insert(0, SOURCE_DIR)
+import re
+import uuid
+from contextlib import asynccontextmanager
+import torch
+import uvicorn
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from starlette.responses import HTMLResponse, StreamingResponse
+from pydantic import BaseModel, Field
+from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams
+MODEL_PATH = os.environ.get("MODEL_PATH", "/root/openbiollm-model")
+HOST = os.environ.get("HOST", "0.0.0.0")
+PORT = int(os.environ.get("PORT", "8001"))
+DASHBOARD_DIR = os.path.join(SERVER_DIR, "dashboard")
+SYSTEM_PROMPT = (
+    "You are OpenBioLLM, a medical AI assistant. You provide helpful, accurate, "
+    "and evidence-based medical information.\n\n"
+    "Response format rules:\n\n"
+    "1. Start with a clear, direct one-sentence answer to the question.\n\n"
+    "2. Then organize the rest of your response into labeled sections. "
+    "Use section headers like 'Definition:', 'Common uses:', 'Drug class:', "
+    "'Symptoms:', 'Causes:', 'Treatment:', 'Safety:', 'Key points:' etc. "
+    "Put each section header on its own line followed by a newline.\n\n"
+    "3. Under each section, list items one per line using '- ' bullet points.\n\n"
+    "4. Leave a blank line between each section.\n\n"
+    "5. Keep each bullet point short and clear (one idea per bullet).\n\n"
+    "6. At the end, add a 'Safety note:' or 'Important:' section for warnings.\n\n"
+    "7. Stay on topic. Only answer what was asked. "
+    "Do NOT generate unrelated content, fictional patient cases, or diagnosis codes.\n\n"
+    "8. Stop when you have fully answered. Do not keep writing.\n\n"
+    "9. End with a one-line disclaimer that this is for informational purposes only."
+)
+STOP_PATTERNS = [
+    "The following is a case",
+    "The patient is a",
+    "Diagnosis code:",
+    "Treatment code:",
+    "The following sections provide",
+    "## Further reading",
+    "## References",
+    "Further reading",
+    "End of interaction",
+    "System Response",
+    "The following is an example",
+    "This sample response",
+    "<|eot_id|>",
+    "<|start_header_id|>",
+]
+_SECTION_RE = re.compile(
+    r'(?:^|\.\s+|\n\s*)'
+    r'((?:Definition|Common uses?|Drug class|Symptoms?|Causes?|Diagnosis'
+    r'|Treatments?|Safety(?: note)?|Important|Mechanism|Side effects?'
+    r'|Precautions?|Dosage|Key points?|Overview|Risk factors?'
+    r'|Complications?|Prevention|When to see a doctor|Warning'
+    r'|How it works|What it(?:\'s| is) used for|Disclaimer)\s*:\s*)',
+    re.IGNORECASE,
+)
+_INLINE_LIST_RE = re.compile(
+    r'(?:(?:such as|including|like|e\.g\.|for example|include)\s+)'
+    r'([^.!?]{10,}(?:,\s*(?:and\s+)?[^.!?]+)+)',
+    re.IGNORECASE,
+)
+_PREAMBLE_RE = re.compile(
+    r'^(?:[^\n]*\n)*?[^\n]*'
+    r'(?:System Response|sample response|example (?:system )?response|'
+    r'The exact response will vary|Responses? (?:may|should|will) differ)'
+    r'[^\n]*\n+',
+    re.IGNORECASE,
+)
+def _truncate_hallucination(text: str) -> str:
+    for pattern in STOP_PATTERNS:
+        idx = text.find(pattern)
+        if idx > len(text) * 0.15:
+            text = text[:idx]
+    text = _PREAMBLE_RE.sub('', text)
+    return text.strip()
+def _trim_incomplete(text: str) -> str:
+    if not text:
+        return text
+    if not text.endswith((".", "!", "?")):
+        last_end = max(text.rfind("."), text.rfind("!"), text.rfind("?"))
+        if last_end > 0:
+            text = text[:last_end + 1]
+    return text
+def _split_into_sections(text: str) -> list[tuple[str, str]]:
+    parts = _SECTION_RE.split(text)
+    sections: list[tuple[str, str]] = []
+    if parts and not _SECTION_RE.match(parts[0].strip() + ":"):
+        first = parts.pop(0).strip()
+        if first:
+            sections.append(("", first))
+    i = 0
+    while i < len(parts):
+        header = parts[i].strip().rstrip(":")
+        body = parts[i + 1].strip() if i + 1 < len(parts) else ""
+        if header and body:
+            sections.append((header, body))
+        elif header and not body:
+            sections.append((header, ""))
+        i += 2
+    return sections
+def _expand_inline_lists(text: str) -> str:
+    def _replacer(m: re.Match) -> str:
+        items_str = m.group(1)
+        items = re.split(r',\s*(?:and\s+)?', items_str)
+        items = [it.strip().rstrip(".").strip() for it in items if it.strip()]
+        if len(items) < 2:
+            return m.group(0)
+        prefix = m.group(0)[:m.start(1) - m.start(0)]
+        bullet_block = "\n".join(f"- {it.capitalize()}" for it in items)
+        return f"{prefix.rstrip()}\n\n{bullet_block}"
+    return _INLINE_LIST_RE.sub(_replacer, text)
+def _sentences_to_bullets(text: str) -> str:
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    if len(sentences) < 3:
+        return text
+    bullets: list[str] = []
+    for s in sentences:
+        s = s.strip().rstrip(".")
+        if not s:
+            continue
+        parts = s.split(":", 1)
+        if len(parts) == 2 and len(parts[0]) < 40:
+            bullets.append(f"- **{parts[0].strip()}**: {parts[1].strip()}")
+        else:
+            bullets.append(f"- {s}")
+    return "\n".join(bullets)
+def _fix_numbered_list(text: str) -> str:
+    if not re.search(r'\d+\.\s', text):
+        return text
+    items = re.split(r'(?<=[.!?])\s*(?=\d+\.\s)', text)
+    if len(items) < 2:
+        items = re.split(r'\s+(?=\d+\.\s)', text)
+    if len(items) < 2:
+        return text
+    result: list[str] = []
+    counter = 0
+    for item in items:
+        item = item.strip()
+        if not item:
+            continue
+        cleaned = re.sub(r'^\d+\.\s*', '', item)
+        if cleaned != item:
+            counter += 1
+            result.append(f"{counter}. {cleaned}")
+        else:
+            result.append(item)
+    return "\n".join(result)
+def format_response(text: str) -> str:
+    text = _truncate_hallucination(text)
+    if not text:
+        return text
+    text = _trim_incomplete(text)
+    text = re.sub(r'\n{3,}', '\n\n', text)
+    text = re.sub(r'\s{2,}-\s+', '\n- ', text)
+    if '\n\n' in text and re.search(r'\n- ', text):
+        return text.strip()
+    sections = _split_into_sections(text)
+    if len(sections) <= 1 and sections:
+        header, body = sections[0]
+        body = _fix_numbered_list(body)
+        if re.search(r'\d+\.\s', body) and "\n" in body:
+            return body.strip()
+        body = _expand_inline_lists(body)
+        if "\n- " in body:
+            return body.strip()
+        sentences = re.split(r'(?<=[.!?])\s+', body)
+        if len(sentences) >= 4:
+            intro = sentences[0]
+            rest_text = " ".join(sentences[1:])
+            bullets = _sentences_to_bullets(rest_text)
+            return f"{intro}\n\n{bullets}".strip()
+        return body.strip()
+    output_parts: list[str] = []
+    for header, body in sections:
+        if not header and body:
+            output_parts.append(body)
+            continue
+        body = _fix_numbered_list(body)
+        body = _expand_inline_lists(body)
+        if "\n" not in body and not body.startswith("-"):
+            sentences = re.split(r'(?<=[.!?])\s+', body)
+            if len(sentences) >= 3:
+                body = _sentences_to_bullets(body)
+        section_text = f"**{header}**\n\n{body}" if body else f"**{header}**"
+        output_parts.append(section_text)
+    result = "\n\n".join(output_parts)
+    if not re.search(
+        r'(?i)disclaimer|informational purposes|not.{0,20}replace.{0,30}medical advice',
+        result,
+    ):
+        result += (
+            "\n\n⚠️ *This information is for educational purposes only "
+            "and should not replace professional medical advice.*"
+        )
+    return result.strip()
+engine = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global engine
+    print(f"Loading model from {MODEL_PATH} via vLLM ...")
+    engine_args = AsyncEngineArgs(
+        model=MODEL_PATH,
+        dtype="bfloat16",
+        max_model_len=2048,
+        gpu_memory_utilization=0.40,
+        enforce_eager=True,
+    )
+    engine = AsyncLLMEngine.from_engine_args(engine_args)
+    print(f"vLLM engine ready")
+    print(f"Dashboard available at http://{HOST}:{PORT}/")
+    yield
+    engine = None
+    torch.cuda.empty_cache()
+app = FastAPI(
+    title="OpenBioLLM Medical Chatbot",
+    description="Medical AI chatbot powered by OpenBioLLM-8B (vLLM)",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class Message(BaseModel):
+    role: str = Field(..., pattern="^(user|assistant|system)$")
+    content: str
+class ChatRequest(BaseModel):
+    messages: list[Message]
+    max_new_tokens: int = Field(default=512, ge=1, le=2048)
+    temperature: float = Field(default=0.7, ge=0.01, le=2.0)
+    top_p: float = Field(default=0.9, ge=0.0, le=1.0)
+    stream: bool = False
+class ChatResponse(BaseModel):
+    id: str
+    content: str
+    finish_reason: str
+    usage: dict
+def build_prompt(messages: list[Message]) -> str:
+    """Build Llama 3 chat-format prompt."""
+    parts: list[str] = ["<|begin_of_text|>"]
+    has_system = any(m.role == "system" for m in messages)
+    if not has_system:
+        parts.append(
+            "<|start_header_id|>system<|end_header_id|>\n\n"
+            f"{SYSTEM_PROMPT}<|eot_id|>"
+        )
+    for msg in messages:
+        parts.append(
+            f"<|start_header_id|>{msg.role}<|end_header_id|>\n\n"
+            f"{msg.content}<|eot_id|>"
+        )
+    parts.append("<|start_header_id|>assistant<|end_header_id|>\n\n")
+    return "".join(parts)
+@app.get("/health")
+async def health():
+    return {
+        "status": "ok",
+        "model": MODEL_PATH,
+        "engine": "vLLM",
+        "gpu_memory_used_mb": round(torch.cuda.memory_allocated() / 1024 / 1024, 1)
+        if torch.cuda.is_available()
+        else None,
+    }
+@app.post("/v1/chat", response_model=ChatResponse)
+async def chat(req: ChatRequest):
+    if not req.messages or req.messages[-1].role != "user":
+        raise HTTPException(400, "Last message must be from the user.")
+    prompt = build_prompt(req.messages)
+    request_id = f"bio-{uuid.uuid4().hex[:12]}"
+    sampling_params = SamplingParams(
+        max_tokens=req.max_new_tokens,
+        temperature=req.temperature,
+        top_p=req.top_p,
+        stop=STOP_PATTERNS,
+        repetition_penalty=1.15,
+    )
+    if req.stream:
+        return _stream_response(request_id, prompt, sampling_params)
+    full_text = ""
+    prompt_tokens = 0
+    completion_tokens = 0
+    async for result in engine.generate(prompt, sampling_params, request_id):
+        final = result
+    output = final.outputs[0]
+    full_text = format_response(output.text)
+    prompt_tokens = len(final.prompt_token_ids)
+    completion_tokens = len(output.token_ids)
+    return ChatResponse(
+        id=request_id,
+        content=full_text,
+        finish_reason=output.finish_reason or "stop",
+        usage={
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        },
+    )
+def _stream_response(request_id: str, prompt: str, sampling_params: SamplingParams):
+    async def token_generator():
+        accumulated = ""
+        sent_len = 0
+        async for result in engine.generate(prompt, sampling_params, request_id):
+            output = result.outputs[0]
+            accumulated = output.text
+            formatted = format_response(accumulated)
+            new_text = formatted[sent_len:]
+            if new_text:
+                sent_len = len(formatted)
+                yield f"data: {new_text}\n\n"
+        yield "data: [DONE]\n\n"
+    return StreamingResponse(token_generator(), media_type="text/event-stream")
+@app.post("/v1/diagnose")
+async def diagnose(req: ChatRequest):
+    if not req.messages or req.messages[-1].role != "user":
+        raise HTTPException(400, "Last message must be from the user.")
+    last = req.messages[-1]
+    last.content = (
+        f"{last.content}\n\n"
+        "Please provide a detailed and comprehensive diagnostic analysis of this medical record."
+    )
+    return await chat(req)
+@app.get("/", include_in_schema=False)
+async def serve_dashboard():
+    dashboard_path = os.path.join(DASHBOARD_DIR, "index.html")
+    with open(dashboard_path, "r") as f:
+        html = f.read()
+    return HTMLResponse(content=html)
+if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host=HOST,
+        port=PORT,
+        log_level="info",
+    )