MiniCPM5-1B-Demo

Runtime error

App Files Files Community

mac commited on 8 days ago

Commit

d70f24f

1 Parent(s): c51af21

rewrite ui

Browse files

Files changed (4) hide show

app.py +27 -264
index.html +470 -0
requirements.txt +3 -1
utils_chatbot.py +12 -69

app.py CHANGED Viewed

@@ -1,18 +1,18 @@
 # MiniCPM5-1B Demo
-from pathlib import Path
 import os
-import time
 import logging
 import threading
-import gradio as gr
 import spaces
 import torch
 from huggingface_hub import login
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from utils_chatbot import organize_messages_from_messages, stream2display_text, clean_final_text
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -34,20 +34,28 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True,
 ).to("cuda")
 @spaces.GPU(duration=60)
-def gpu_generate_stream(inputs, history, temperature, top_p, enable_think):
     prompt_text = tokenizer.apply_chat_template(
-        inputs,
         tokenize=False,
         add_generation_prompt=True,
-        enable_thinking=enable_think,
     )
     model_inputs = tokenizer([prompt_text], return_tensors="pt").to("cuda")
-    history.append({"role": "assistant", "content": ""})
-    yield "", history
     streamer = TextIteratorStreamer(
         tokenizer,
         skip_prompt=True,
@@ -67,267 +75,22 @@ def gpu_generate_stream(inputs, history, temperature, top_p, enable_think):
     thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
-    stream_text = ""
-    gen_tk_count = 0
-    start_time = time.time()
     for new_token_text in streamer:
         if not new_token_text:
             continue
-        stream_text += new_token_text
-        gen_tk_count += 1
-        elapsed = time.time() - start_time
-        token_per_sec = gen_tk_count / elapsed if elapsed > 0 else 0
-        display_text = stream2display_text(stream_text, token_per_sec)
-        history[-1]["content"] = display_text
-        yield "", history
     thread.join()
-    history[-1]["content"] = clean_final_text(stream_text)
-    yield "", history
-def gen_response_stream(message, history, temperature, top_p, enable_think):
-    chat_msg_ls = organize_messages_from_messages(message, history)
-    history.append({"role": "user", "content": message})
-    yield from gpu_generate_stream(
-        chat_msg_ls, history,
-        temperature=temperature,
-        top_p=top_p,
-        enable_think=enable_think,
-    )
-def create_app():
-    assets_path = Path.cwd().absolute() / "assets"
-    gr.set_static_paths(paths=[assets_path])
-    with gr.Blocks() as demo:
-        # Header
-        gr.HTML(
-            '<div class="app-header">'
-            '  <div class="header-left">'
-            '    <img src="https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png" alt="Logo" class="header-logo">'
-            '    <div class="header-title">'
-            '      <span class="title-main">MiniCPM5-1B</span>'
-            '      <span class="title-sub">By OpenBMB</span>'
-            '    </div>'
-            '  </div>'
-            '  <div class="header-badge">'
-            '    <span class="status-dot"></span>'
-            '    MiniCPM5-1B'
-            '  </div>'
-            '</div>'
-        )
-        with gr.Row(elem_classes=["main-row"]):
-            with gr.Column(scale=4, elem_classes=["chat-col"]):
-                chatbot = gr.Chatbot(
-                    show_label=False,
-                    placeholder="Send a message to start chatting...",
-                    height="70vh",
-                    elem_classes=["dark-chatbot"],
-                )
-                prompt = gr.Textbox(
-                    show_label=False,
-                    placeholder="Ask MiniCPM5...",
-                    lines=1,
-                    elem_classes=["input-pill"],
-                )
-            with gr.Column(scale=1, min_width=220, elem_classes=["settings-col"]):
-                gr.HTML('<div class="settings-title">Settings</div>')
-                temperature = gr.Slider(
-                    minimum=0, maximum=1, value=0.9, step=0.05,
-                    label="Temperature",
-                    elem_classes=["dark-slider"],
-                )
-                top_p = gr.Slider(
-                    minimum=0, maximum=1, value=0.95, step=0.01,
-                    label="Top-p",
-                    elem_classes=["dark-slider"],
-                )
-                enable_think = gr.Checkbox(
-                    label="Thinking",
-                    value=True,
-                    elem_classes=["dark-checkbox"],
-                )
-                clear = gr.Button(
-                    "Clear History",
-                    variant="secondary",
-                    elem_classes=["clear-btn"],
-                )
-        prompt.submit(
-            gen_response_stream,
-            inputs=[prompt, chatbot, temperature, top_p, enable_think],
-            outputs=[prompt, chatbot],
-        )
-        clear.click(lambda: None, None, chatbot, queue=False)
-    return demo
-THEME = gr.themes.Soft(
-    primary_hue="indigo",
-    secondary_hue="slate",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
-)
-CSS = """
-footer { display: none !important; }
-/* Header */
-.app-header {
-    display: flex;
-    align-items: center;
-    justify-content: space-between;
-    padding: 16px 24px;
-    border-bottom: 1px solid #e2e8f0;
-}
-.header-left {
-    display: flex;
-    align-items: center;
-    gap: 12px;
-}
-.header-logo {
-    width: 40px;
-    height: 40px;
-    border-radius: 10px;
-    filter: drop-shadow(0 0 8px rgba(99,102,241,0.25));
-}
-.header-title {
-    display: flex;
-    flex-direction: column;
-}
-.title-main {
-    font-size: 18px;
-    font-weight: 700;
-    letter-spacing: -0.02em;
-    color: #1e293b;
-}
-.title-sub {
-    font-size: 10px;
-    text-transform: uppercase;
-    letter-spacing: 0.2em;
-    color: #94a3b8;
-    font-weight: 600;
-}
-.header-badge {
-    display: flex;
-    align-items: center;
-    gap: 6px;
-    font-size: 10px;
-    font-weight: 700;
-    letter-spacing: 0.12em;
-    text-transform: uppercase;
-    color: #64748b;
-    background: #f1f5f9;
-    padding: 6px 14px;
-    border-radius: 100px;
-    border: 1px solid #e2e8f0;
-}
-.status-dot {
-    width: 6px;
-    height: 6px;
-    border-radius: 50%;
-    background: #22c55e;
-    animation: pulse-dot 2s infinite;
-}
-@keyframes pulse-dot {
-    0%, 100% { opacity: 1; }
-    50% { opacity: 0.4; }
-}
-/* Layout */
-.main-row { gap: 0 !important; }
-.chat-col { padding-right: 8px !important; }
-.settings-col {
-    border-left: 1px solid #e2e8f0 !important;
-    padding: 20px !important;
-}
-.settings-title {
-    font-size: 14px;
-    font-weight: 700;
-    color: #475569;
-    margin-bottom: 12px;
-    letter-spacing: 0.05em;
-    text-transform: uppercase;
-}
-/* Chatbot styling */
-.dark-chatbot {
-    border: none !important;
-    background: transparent !important;
-}
-.dark-chatbot .message-row {
-    animation: msg-fade-in 0.35s cubic-bezier(0.16, 1, 0.3, 1) forwards;
-}
-@keyframes msg-fade-in {
-    from { opacity: 0; transform: translateY(12px); }
-    to { opacity: 1; transform: translateY(0); }
-}
-/* User message bubble */
-.dark-chatbot .role-user .message-bubble-border {
-    background: linear-gradient(135deg, #4f46e5, #6366f1) !important;
-    border: none !important;
-    border-radius: 20px 20px 4px 20px !important;
-    box-shadow: 0 4px 14px rgba(79,70,229,0.2);
-}
-.dark-chatbot .role-user .message-bubble-border .message-content {
-    color: #ffffff !important;
-}
-/* Bot message bubble */
-.dark-chatbot .role-assistant .message-bubble-border {
-    background: #f8fafc !important;
-    border: 1px solid #e2e8f0 !important;
-    border-radius: 20px 20px 20px 4px !important;
-}
-/* Thinking block (blockquote) */
-.dark-chatbot .role-assistant blockquote {
-    background: #eef2ff !important;
-    border-left: 3px solid #6366f1 !important;
-    border-radius: 4px 10px 10px 4px;
-    padding: 10px 14px !important;
-    color: #64748b !important;
-    font-style: italic;
-    margin-bottom: 10px !important;
-}
-/* Input pill */
-.input-pill {
-    border: 1px solid #cbd5e1 !important;
-    border-radius: 24px !important;
-    transition: all 0.3s ease;
-}
-.input-pill:focus-within {
-    border-color: #6366f1 !important;
-    box-shadow: 0 0 0 3px rgba(99,102,241,0.1);
-}
-/* Clear button */
-.clear-btn button {
-    border-radius: 12px !important;
-    transition: all 0.2s ease;
-}
-/* Code blocks */
-.dark-chatbot pre {
-    background: #f1f5f9 !important;
-    border: 1px solid #e2e8f0 !important;
-    border-radius: 10px !important;
-}
-.dark-chatbot code {
-    color: #4f46e5 !important;
-}
-"""
-demo = create_app()
 if __name__ == "__main__":
-    demo.launch(theme=THEME, css=CSS)

 # MiniCPM5-1B Demo
 import os
 import logging
 import threading
+from typing import Generator
 import spaces
 import torch
+from fastapi.responses import HTMLResponse
+from gradio import Server
 from huggingface_hub import login
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from utils_chatbot import organize_messages
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
     trust_remote_code=True,
 ).to("cuda")
+demo = Server()
+@demo.api()
 @spaces.GPU(duration=60)
+def predict(
+    message: str,
+    history: list[list] | None = None,
+    thinking_mode: bool = True,
+    temperature: float = 0.9,
+    top_p: float = 0.95,
+) -> Generator[str, None, None]:
+    messages = organize_messages(message, history)
     prompt_text = tokenizer.apply_chat_template(
+        messages,
         tokenize=False,
         add_generation_prompt=True,
+        enable_thinking=thinking_mode,
     )
     model_inputs = tokenizer([prompt_text], return_tensors="pt").to("cuda")
     streamer = TextIteratorStreamer(
         tokenizer,
         skip_prompt=True,
     thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
     thread.start()
+    full_text = ""
     for new_token_text in streamer:
         if not new_token_text:
             continue
+        full_text += new_token_text
+        yield full_text
     thread.join()
+@demo.get("/", response_class=HTMLResponse)
+async def homepage():
+    html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
+    with open(html_path, "r", encoding="utf-8") as f:
+        return f.read()
 if __name__ == "__main__":
+    demo.launch(show_error=True)

index.html ADDED Viewed

	@@ -0,0 +1,470 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
+    <title>MiniCPM5-1B | OpenBMB</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
+    <script src="https://unpkg.com/lucide@latest"></script>
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.min.css">
+    <script src="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/contrib/auto-render.min.js"></script>
+    <style>
+        :root {
+            --bg: #ffffff;
+            --blue: #4f46e5;
+            --cyan: #6366f1;
+            --text: #1e293b;
+            --text-muted: #64748b;
+            --glass: rgba(0, 0, 0, 0.02);
+            --glass-border: #e2e8f0;
+            --accent: #4f46e5;
+        }
+        body {
+            font-family: 'Inter', sans-serif;
+            background-color: var(--bg);
+            color: var(--text);
+            height: 100vh;
+            margin: 0;
+            display: flex;
+            flex-direction: column;
+            overflow: hidden;
+        }
+        .chat-scroll-area {
+            flex: 1;
+            overflow-y: auto;
+            padding-bottom: 140px;
+            -webkit-overflow-scrolling: touch;
+            scroll-behavior: smooth;
+        }
+        .chat-scroll-area::-webkit-scrollbar { width: 4px; }
+        .chat-scroll-area::-webkit-scrollbar-thumb { background: #cbd5e1; border-radius: 10px; }
+        .message-bubble {
+            max-width: 85%;
+            animation: fadeIn 0.35s cubic-bezier(0.16, 1, 0.3, 1) forwards;
+        }
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(12px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        .user-message {
+            background: linear-gradient(135deg, var(--blue), var(--cyan));
+            color: #ffffff;
+            box-shadow: 0 4px 14px rgba(79, 70, 229, 0.2);
+            border-radius: 20px 20px 4px 20px;
+        }
+        .bot-message {
+            background: #f8fafc;
+            border: 1px solid var(--glass-border);
+            border-radius: 20px 20px 20px 4px;
+        }
+        .thinking-block {
+            background: #eef2ff;
+            border-left: 3px solid var(--accent);
+            padding: 12px 16px;
+            margin-bottom: 12px;
+            border-radius: 4px 12px 12px 4px;
+            font-size: 14px;
+            color: var(--text-muted);
+            font-style: italic;
+        }
+        .typing-dot {
+            width: 4px; height: 4px;
+            background: var(--accent);
+            border-radius: 50%;
+            animation: bounce 1.4s infinite ease-in-out;
+        }
+        @keyframes bounce {
+            0%, 80%, 100% { transform: scale(0.3); opacity: 0.4; }
+            40% { transform: scale(1); opacity: 1; }
+        }
+        .input-pill {
+            background: #ffffff;
+            border: 1px solid var(--glass-border);
+            transition: all 0.3s ease;
+        }
+        .input-pill:focus-within {
+            border-color: rgba(79, 70, 229, 0.4);
+            box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
+        }
+        .logo-glow {
+            filter: drop-shadow(0 0 8px rgba(99, 102, 241, 0.25));
+        }
+        .send-btn {
+            background: linear-gradient(135deg, var(--blue), var(--cyan));
+            transition: all 0.3s ease;
+        }
+        .send-btn:hover:not(:disabled) { transform: scale(1.05); filter: brightness(1.05); }
+        .settings-panel {
+            background: rgba(255, 255, 255, 0.98);
+            backdrop-filter: blur(20px);
+            border-left: 1px solid var(--glass-border);
+            transition: transform 0.4s cubic-bezier(0.16, 1, 0.3, 1);
+        }
+        .control-slider {
+            -webkit-appearance: none;
+            width: 100%;
+            height: 4px;
+            background: #e2e8f0;
+            border-radius: 2px;
+            outline: none;
+        }
+        .control-slider::-webkit-slider-thumb {
+            -webkit-appearance: none;
+            width: 12px; height: 12px;
+            background: var(--accent);
+            border-radius: 50%;
+            cursor: pointer;
+        }
+        .toggle-switch {
+            width: 36px; height: 20px;
+            background: #e2e8f0;
+            border-radius: 10px;
+            position: relative;
+            cursor: pointer;
+            transition: background 0.3s;
+        }
+        .toggle-switch.active { background: var(--accent); }
+        .toggle-switch::after {
+            content: '';
+            position: absolute;
+            top: 2px; left: 2px;
+            width: 16px; height: 16px;
+            background: white;
+            border-radius: 50%;
+            transition: transform 0.3s;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.15);
+        }
+        .toggle-switch.active::after { transform: translateX(16px); }
+        .bot-message pre {
+            background: #f1f5f9;
+            border: 1px solid #e2e8f0;
+            border-radius: 10px;
+            padding: 12px;
+            overflow-x: auto;
+        }
+        .bot-message code { color: #4f46e5; }
+    </style>
+</head>
+<body>
+    <header class="h-16 flex items-center justify-between px-6 md:px-10 shrink-0 z-50 border-b border-slate-200 bg-white">
+        <div class="flex items-center gap-3">
+            <img src="https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png"
+                 alt="OpenBMB" class="w-10 h-10 logo-glow rounded-lg">
+            <div>
+                <h1 class="text-lg font-bold text-slate-800">MiniCPM5-1B</h1>
+                <p class="text-[10px] text-slate-400 uppercase tracking-[0.2em] font-semibold">By OpenBMB</p>
+            </div>
+        </div>
+        <div class="flex items-center gap-4">
+            <div class="hidden md:flex items-center gap-2 text-[10px] font-bold text-slate-500 tracking-widest bg-slate-50 px-3 py-1.5 rounded-full border border-slate-200">
+                <span class="w-1.5 h-1.5 rounded-full bg-green-500 animate-pulse"></span>
+                MiniCPM5-1B
+            </div>
+            <button id="toggle-settings" class="p-2 rounded-xl hover:bg-slate-100 text-slate-500 hover:text-slate-800 transition-all">
+                <i data-lucide="sliders-horizontal" class="w-5 h-5"></i>
+            </button>
+        </div>
+    </header>
+    <div id="settings-panel" class="fixed top-0 right-0 h-full w-72 z-[100] translate-x-full settings-panel p-6 flex flex-col gap-6 shadow-[-12px_0_40px_rgba(0,0,0,0.08)]">
+        <div class="flex items-center justify-between">
+            <h2 class="text-base font-bold text-slate-800">Settings</h2>
+            <button id="close-settings" class="text-slate-400 hover:text-slate-700">
+                <i data-lucide="x" class="w-5 h-5"></i>
+            </button>
+        </div>
+        <div class="space-y-5">
+            <div class="flex items-center justify-between">
+                <span class="text-sm font-medium text-slate-600">Thinking</span>
+                <div id="thinking-toggle" class="toggle-switch active"></div>
+            </div>
+            <div class="space-y-2">
+                <div class="flex justify-between text-xs font-bold text-slate-400 uppercase tracking-widest">
+                    <span>Temperature</span>
+                    <span id="temp-val">0.9</span>
+                </div>
+                <input type="range" id="temp-slider" min="0" max="1" step="0.05" value="0.9" class="control-slider">
+            </div>
+            <div class="space-y-2">
+                <div class="flex justify-between text-xs font-bold text-slate-400 uppercase tracking-widest">
+                    <span>Top-p</span>
+                    <span id="p-val">0.95</span>
+                </div>
+                <input type="range" id="p-slider" min="0" max="1" step="0.01" value="0.95" class="control-slider">
+            </div>
+            <button onclick="clearHistory()" class="w-full py-3 rounded-xl bg-red-50 border border-red-200 text-red-600 text-sm font-bold hover:bg-red-100 transition-all flex items-center justify-center gap-2 mt-4">
+                <i data-lucide="trash-2" class="w-4 h-4"></i>
+                Clear History
+            </button>
+        </div>
+    </div>
+    <main id="chat-messages" class="chat-scroll-area px-4 flex-1">
+        <div class="max-w-3xl mx-auto space-y-6 pt-6 pb-32" id="chat-container">
+            <div class="flex gap-3 items-start">
+                <div class="bot-message p-5 message-bubble shadow-sm">
+                    <p class="text-slate-700 leading-relaxed text-[15px]">
+                        Hello! I'm <strong>MiniCPM5-1B</strong>. Ask me anything.
+                    </p>
+                </div>
+            </div>
+        </div>
+    </main>
+    <div class="fixed bottom-0 left-0 right-0 p-4 md:p-8 pointer-events-none z-50">
+        <div class="max-w-3xl mx-auto pointer-events-auto">
+            <div class="input-pill rounded-[2rem] p-2 flex items-end shadow-lg">
+                <textarea id="user-input" placeholder="Ask MiniCPM5..." rows="1"
+                    class="flex-1 bg-transparent border-none focus:ring-0 text-slate-800 placeholder-slate-400 py-3 px-3 resize-none max-h-40 leading-relaxed font-medium outline-none"></textarea>
+                <button id="send-btn" class="send-btn w-11 h-11 text-white rounded-full flex items-center justify-center shrink-0 mb-0.5 mr-1">
+                    <i data-lucide="arrow-up" class="w-5 h-5" id="send-icon"></i>
+                </button>
+            </div>
+        </div>
+    </div>
+    <script type="module">
+        import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
+        lucide.createIcons();
+        const chatContainer = document.getElementById('chat-container');
+        const chatScrollArea = document.getElementById('chat-messages');
+        const userInput = document.getElementById('user-input');
+        const sendBtn = document.getElementById('send-btn');
+        const settingsPanel = document.getElementById('settings-panel');
+        const toggleSettings = document.getElementById('toggle-settings');
+        const closeSettings = document.getElementById('close-settings');
+        const thinkingToggle = document.getElementById('thinking-toggle');
+        const tempSlider = document.getElementById('temp-slider');
+        const pSlider = document.getElementById('p-slider');
+        let client = null;
+        let chatHistory = [];
+        let currentJob = null;
+        let isSettingsOpen = false;
+        const THINK_CLOSE = '</think>';
+        async function init() {
+            try {
+                client = await Client.connect(window.location.origin, { events: ["data", "status"] });
+            } catch (err) {
+                console.error("Gradio connection error", err);
+            }
+        }
+        init();
+        function renderMath(el) {
+            if (window.renderMathInElement) {
+                renderMathInElement(el, {
+                    delimiters: [
+                        {left: '$$', right: '$$', display: true},
+                        {left: '$', right: '$', display: false},
+                    ],
+                    throwOnError: false
+                });
+            }
+        }
+        function splitThinking(fullText) {
+            const text = fullText.replace('<think>', '');
+            const pos = text.indexOf(THINK_CLOSE);
+            if (pos === -1) {
+                return { thinking: text.trim(), answer: '' };
+            }
+            return {
+                thinking: text.slice(0, pos).trim(),
+                answer: text.slice(pos + THINK_CLOSE.length).replace('<|im_end|>', '').trim()
+            };
+        }
+        function appendMessage(role, text = '') {
+            const div = document.createElement('div');
+            div.className = `flex gap-3 items-start ${role === 'user' ? 'flex-row-reverse' : ''}`;
+            const bubbleClass = role === 'user' ? 'user-message' : 'bot-message';
+            div.innerHTML = `
+                <div class="${bubbleClass} p-5 message-bubble shadow-sm">
+                    <div class="thinking-container hidden"></div>
+                    <div class="content-container leading-relaxed text-[15px]">${role === 'user' ? escapeHtml(text) : marked.parse(text)}</div>
+                </div>
+            `;
+            chatContainer.appendChild(div);
+            if (role === 'bot') renderMath(div.querySelector('.content-container'));
+            chatScrollArea.scrollTo({ top: chatScrollArea.scrollHeight, behavior: 'smooth' });
+            return div;
+        }
+        function escapeHtml(s) {
+            return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
+        }
+        function updateBotMessage(div, fullText) {
+            const thinkingContainer = div.querySelector('.thinking-container');
+            const contentContainer = div.querySelector('.content-container');
+            const { thinking, answer } = splitThinking(fullText);
+            if (thinking) {
+                thinkingContainer.classList.remove('hidden');
+                thinkingContainer.innerHTML = `<div class="thinking-block">${marked.parse(thinking)}</div>`;
+            } else {
+                thinkingContainer.classList.add('hidden');
+                thinkingContainer.innerHTML = '';
+            }
+            if (answer) {
+                contentContainer.innerHTML = marked.parse(answer);
+            } else if (!thinking) {
+                contentContainer.innerHTML = marked.parse(fullText.replace('<|im_end|>', ''));
+            } else {
+                contentContainer.innerHTML = '<div class="flex gap-1.5 py-1"><div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div></div>';
+            }
+            renderMath(thinkingContainer);
+            renderMath(contentContainer);
+            chatScrollArea.scrollTo({ top: chatScrollArea.scrollHeight, behavior: 'smooth' });
+            return answer || fullText.replace(THINK_CLOSE, '').replace('<|im_end|>', '').trim();
+        }
+        async function sendMessage() {
+            const text = userInput.value.trim();
+            if (!text) return;
+            userInput.value = '';
+            userInput.style.height = 'auto';
+            appendMessage('user', text);
+            sendBtn.disabled = true;
+            const botDiv = appendMessage('bot', '');
+            const contentContainer = botDiv.querySelector('.content-container');
+            contentContainer.innerHTML = '<div class="flex gap-1.5 py-2"><div class="typing-dot"></div><div class="typing-dot"></div><div class="typing-dot"></div></div>';
+            let isStopped = false;
+            sendBtn.onclick = () => {
+                if (currentJob) {
+                    currentJob.cancel();
+                    isStopped = true;
+                    resetSendBtn();
+                }
+            };
+            try {
+                currentJob = client.submit("/predict", {
+                    message: text,
+                    history: chatHistory,
+                    thinking_mode: thinkingToggle.classList.contains('active'),
+                    temperature: parseFloat(tempSlider.value),
+                    top_p: parseFloat(pSlider.value),
+                });
+                let finalAnswer = "";
+                for await (const msg of currentJob) {
+                    if (isStopped) break;
+                    if (msg.type === "data" && msg.data) {
+                        finalAnswer = updateBotMessage(botDiv, msg.data[0]);
+                    } else if (msg.type === "status" && msg.stage === "complete") {
+                        break;
+                    } else if (msg.type === "status" && msg.stage === "error") {
+                        throw new Error(msg.message || "Generation failed");
+                    }
+                }
+                if (!isStopped && finalAnswer) {
+                    chatHistory.push([text, finalAnswer]);
+                }
+            } catch (err) {
+                console.error(err);
+                if (!isStopped) {
+                    contentContainer.innerHTML = '<p class="text-red-500">Error: please try again.</p>';
+                }
+            } finally {
+                resetSendBtn();
+                currentJob = null;
+            }
+        }
+        function resetSendBtn() {
+            sendBtn.disabled = false;
+            sendBtn.onclick = sendMessage;
+        }
+        window.clearHistory = function() {
+            chatHistory = [];
+            chatContainer.innerHTML = `
+                <div class="flex gap-3 items-start">
+                    <div class="bot-message p-5 message-bubble shadow-sm">
+                        <p class="text-slate-700 leading-relaxed text-[15px]">History cleared. How can I help you?</p>
+                    </div>
+                </div>
+            `;
+            toggleSettingsSidebar(false);
+        };
+        function toggleSettingsSidebar(open) {
+            isSettingsOpen = open;
+            settingsPanel.classList.toggle('translate-x-full', !open);
+            settingsPanel.classList.toggle('translate-x-0', open);
+        }
+        toggleSettings.onclick = (e) => { e.stopPropagation(); toggleSettingsSidebar(true); };
+        closeSettings.onclick = () => toggleSettingsSidebar(false);
+        document.addEventListener('click', (e) => {
+            if (isSettingsOpen && !settingsPanel.contains(e.target) && !toggleSettings.contains(e.target)) {
+                toggleSettingsSidebar(false);
+            }
+        });
+        thinkingToggle.onclick = () => {
+            thinkingToggle.classList.toggle('active');
+            if (chatHistory.length > 0) {
+                if (confirm("Changing Thinking mode will clear conversation history. Continue?")) {
+                    clearHistory();
+                } else {
+                    thinkingToggle.classList.toggle('active');
+                }
+            }
+        };
+        tempSlider.oninput = () => document.getElementById('temp-val').textContent = tempSlider.value;
+        pSlider.oninput = () => document.getElementById('p-val').textContent = pSlider.value;
+        sendBtn.onclick = sendMessage;
+        userInput.onkeydown = (e) => {
+            if (e.key === 'Enter' && !e.shiftKey) {
+                e.preventDefault();
+                sendMessage();
+            }
+        };
+        userInput.oninput = () => {
+            userInput.style.height = 'auto';
+            userInput.style.height = userInput.scrollHeight + 'px';
+        };
+    </script>
+</body>
+</html>

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
-gradio
 transformers>=4.56
 torch
 spaces
 accelerate
 sentencepiece

+gradio>=6.14.0
 transformers>=4.56
 torch
 spaces
 accelerate
 sentencepiece
+fastapi
+uvicorn>=0.14.0

utils_chatbot.py CHANGED Viewed

@@ -1,72 +1,15 @@
-def organize_messages_from_messages(message, history):
-    """Build chat messages from Gradio 6.x messages-format history."""
     msg_ls = [{"role": "system", "content": "You are a helpful assistant."}]
-    for msg in history:
-        role = msg.get("role", "")
-        content = msg.get("content", "")
-        if role in ("user", "assistant") and content:
-            msg_ls.append({"role": role, "content": content})
     msg_ls.append({"role": "user", "content": message})
     return msg_ls
-def _split_on_think_close(text):
-    """Split on </think>. The <think> tag is in the prompt (skipped by streamer),
-    so stream_text is: 'thinking content</think>answer content'.
-    Returns (thinking_text_or_None, answer_text)."""
-    text = text.replace("<think>", "")
-    close_tag = "</think>"
-    pos = text.find(close_tag)
-    if pos == -1:
-        return text, None
-    think_text = text[:pos]
-    answer_text = text[pos + len(close_tag):]
-    return think_text, answer_text
-def _format_thinking(think_text):
-    """Render thinking content as blockquote."""
-    lines = think_text.strip().splitlines()
-    return "\n".join(f"> {line}" if line.strip() else ">" for line in lines)
-def stream2display_text(stream_text, token_per_sec):
-    think_text, answer_text = _split_on_think_close(stream_text)
-    parts = []
-    if think_text is not None:
-        think_text = think_text.strip()
-        if think_text:
-            parts.append(_format_thinking(think_text))
-    if answer_text is not None:
-        answer_text = answer_text.replace("<|im_end|>", "").strip()
-        if answer_text:
-            parts.append(answer_text)
-    parts.append(f"```{token_per_sec:.2f} token/s```")
-    return "\n\n".join(parts)
-def clean_final_text(stream_text):
-    think_text, answer_text = _split_on_think_close(stream_text)
-    parts = []
-    if think_text is not None:
-        think_text = think_text.strip()
-        if think_text:
-            parts.append(_format_thinking(think_text))
-    if answer_text is not None:
-        answer_text = answer_text.replace("<|im_end|>", "").strip()
-        if answer_text:
-            parts.append(answer_text)
-    if not parts:
-        return stream_text.replace("</think>", "").replace("<|im_end|>", "")
-    return "\n\n".join(parts)

+def organize_messages(message, history=None):
+    """Build chat messages from history tuples [[user, assistant], ...]."""
     msg_ls = [{"role": "system", "content": "You are a helpful assistant."}]
+    if history:
+        for turn in history:
+            if not turn:
+                continue
+            user_text = turn[0] if len(turn) > 0 else None
+            assistant_text = turn[1] if len(turn) > 1 else None
+            if user_text:
+                msg_ls.append({"role": "user", "content": user_text})
+            if assistant_text:
+                msg_ls.append({"role": "assistant", "content": assistant_text})
     msg_ls.append({"role": "user", "content": message})
     return msg_ls