Spaces:

ysharma
/

lemonade-thinking-chatbot

Runtime error

App Files Files Community

ysharma HF Staff commited on Oct 1, 2025

Commit

78752d9

verified ·

1 Parent(s): c0025b4

Create app.py

Browse files

Files changed (1) hide show

app.py +340 -0

app.py ADDED Viewed

	@@ -0,0 +1,340 @@

+import gradio as gr
+from gradio import ChatMessage
+from openai import OpenAI
+import time
+# Configure Lemonade Server connection
+base_url = "http://localhost:8000/api/v1"
+client = OpenAI(
+    base_url=base_url,
+    api_key="lemonade",  # required, but unused in Lemonade
+)
+def stream_chat_response(message: str, history: list, model_name: str, system_prompt: str):
+    """
+    Stream responses from Lemonade Server and display thinking process separately.
+    """
+    # Add user message to history
+    history.append(ChatMessage(role="user", content=message))
+    yield history
+    # Convert history to OpenAI format - only include actual conversation messages
+    messages = []
+    # Add system prompt if provided
+    if system_prompt and system_prompt.strip():
+        messages.append({"role": "system", "content": system_prompt})
+    # Convert history, skipping metadata-only messages
+    for msg in history:
+        if isinstance(msg, ChatMessage):
+            # Skip thinking/metadata messages when sending to API
+            if msg.metadata and msg.metadata.get("title"):
+                continue
+            messages.append({
+                "role": msg.role,
+                "content": msg.content
+            })
+        elif isinstance(msg, dict):
+            # Skip metadata messages
+            if msg.get("metadata"):
+                continue
+            messages.append({
+                "role": msg.get("role", "user"),
+                "content": msg.get("content", "")
+            })
+    try:
+        # Initialize response tracking
+        thinking_content = ""
+        response_content = ""
+        thinking_added = False
+        response_added = False
+        thinking_start_time = None
+        # Stream response from Lemonade Server
+        stream = client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            stream=True,
+            max_tokens=2048,
+            temperature=0.7,
+        )
+        for chunk in stream:
+            # Safety check for chunk structure
+            if not chunk.choices or len(chunk.choices) == 0:
+                continue
+            if not hasattr(chunk.choices[0], 'delta'):
+                continue
+            delta = chunk.choices[0].delta
+            # Check for reasoning_content (thinking process)
+            reasoning_content = getattr(delta, 'reasoning_content', None)
+            # Check for regular content (final answer)
+            content = getattr(delta, 'content', None)
+            # Handle reasoning/thinking content
+            if reasoning_content:
+                if not thinking_added:
+                    # Add thinking section
+                    thinking_start_time = time.time()
+                    history.append(ChatMessage(
+                        role="assistant",
+                        content="",
+                        metadata={
+                            "title": "🧠 Thought Process",
+                            "status": "pending"
+                        }
+                    ))
+                    thinking_added = True
+                # Accumulate thinking content
+                thinking_content += reasoning_content
+                history[-1] = ChatMessage(
+                    role="assistant",
+                    content=thinking_content,
+                    metadata={
+                        "title": "🧠 Thought Process",
+                        "status": "pending"
+                    }
+                )
+                yield history
+            # Handle regular content (final answer)
+            elif content:
+                # Finalize thinking section if it exists
+                if thinking_added and thinking_start_time:
+                    elapsed = time.time() - thinking_start_time
+                    # Update the thinking message to "done" status
+                    for i in range(len(history) - 1, -1, -1):
+                        if isinstance(history[i], ChatMessage) and history[i].metadata and history[i].metadata.get("title") == "🧠 Thought Process":
+                            history[i] = ChatMessage(
+                                role="assistant",
+                                content=thinking_content,
+                                metadata={
+                                    "title": "🧠 Thought Process",
+                                    "status": "done",
+                                    "duration": elapsed
+                                }
+                            )
+                            break
+                    thinking_start_time = None
+                # Add or update response content
+                if not response_added:
+                    history.append(ChatMessage(
+                        role="assistant",
+                        content=""
+                    ))
+                    response_added = True
+                response_content += content
+                history[-1] = ChatMessage(
+                    role="assistant",
+                    content=response_content
+                )
+                yield history
+        # Final check: if thinking section exists but wasn't finalized
+        if thinking_added and thinking_start_time:
+            elapsed = time.time() - thinking_start_time
+            for i in range(len(history) - 1, -1, -1):
+                if isinstance(history[i], ChatMessage) and history[i].metadata and history[i].metadata.get("title") == "🧠 Thought Process":
+                    history[i] = ChatMessage(
+                        role="assistant",
+                        content=thinking_content,
+                        metadata={
+                            "title": "🧠 Thought Process",
+                            "status": "done",
+                            "duration": elapsed
+                        }
+                    )
+                    break
+            yield history
+    except Exception as e:
+        import traceback
+        error_msg = str(e)
+        error_trace = traceback.format_exc()
+        # Try to extract more details from the error
+        if "422" in error_msg:
+            error_details = f"""
+⚠️ **Request Validation Error**
+The server rejected the request. Possible issues:
+- Model name might be incorrect (currently: `{model_name}`)
+- Check that the model is loaded on the server
+- Try simplifying the system prompt
+**Error:** {error_msg}
+"""
+        elif "list index out of range" in error_msg or "IndexError" in error_trace:
+            error_details = f"""
+⚠️ **Streaming Response Error**
+There was an issue processing the streaming response.
+**Debug Info:**
+- Model: `{model_name}`
+- Base URL: `{base_url}`
+- Error: {error_msg}
+Try refreshing and sending another message.
+"""
+        else:
+            error_details = f"""
+⚠️ **Connection Error**
+Error: {error_msg}
+Make sure:
+1. Lemonade Server is running at `{base_url}`
+2. Model `{model_name}` is loaded
+3. The server is accessible
+**Debug trace:**
+```
+{error_trace[-500:]}
+```
+"""
+        history.append(ChatMessage(
+            role="assistant",
+            content=error_details,
+            metadata={
+                "title": "⚠️ Error Details"
+            }
+        ))
+        yield history
+def clear_chat():
+    """Clear the chat history."""
+    return []
+# Build the Gradio interface
+with gr.Blocks(theme=gr.themes.Ocean()) as demo:
+    # Define input textbox first so it can be referenced in Examples
+    msg = gr.Textbox(
+        placeholder="Type your message here and press Enter...",
+        show_label=False,
+        container=False,
+        render=False  # Don't render yet, will be rendered in main area
+    )
+    # Sidebar for settings and information
+    with gr.Sidebar(position="left", open=True):
+        gr.Markdown("""
+        # 🍋 Lemonade Reasoning Chatbot
+        Chat with local LLMs running on AMD Lemonade Server. This interface beautifully displays the model's thinking process!
+        """)
+        gr.Markdown("### ⚙️ Settings")
+        model_dropdown = gr.Dropdown(
+            choices=[
+                "Qwen3-0.6B-GGUF",
+                "Llama-3.1-8B-Instruct-Hybrid",
+                "Qwen2.5-7B-Instruct",
+                "Phi-3.5-mini-instruct",
+                "Meta-Llama-3-8B-Instruct"
+            ],
+            value="Qwen3-0.6B-GGUF",
+            label="Model",
+            info="Select the LLM model to use",
+            allow_custom_value=True
+        )
+        system_prompt = gr.Textbox(
+            label="System Prompt (Optional)",
+            value="You are a helpful assistant.",
+            lines=3,
+            info="Customize the model's behavior",
+            placeholder="Leave empty to use model defaults"
+        )
+        # How Thinking Works Accordion
+        with gr.Accordion("💡 How Thinking Works", open=False):
+            gr.Markdown("""
+            - Reasoning models output `reasoning_content` (thinking) and `content` (final answer) separately
+            - Thinking appears in a collapsible "🧠 Thought Process" section
+            - Duration of thinking is displayed automatically
+            - Works with models like: DeepSeek-R1, QwQ, and other reasoning models
+            """)
+        # Current Model Accordion
+        with gr.Accordion("📋 Current Model", open=False):
+            gr.Markdown("""
+            Make sure your model supports reasoning output for thinking to be displayed.
+            """)
+        # Example Prompts Accordion
+        with gr.Accordion("📝 Example Prompts", open=False):
+            gr.Markdown("""
+            - "Solve: If a train travels 120 km in 2 hours, what's its speed?"
+            - "Compare pros and cons of electric vs gas cars"
+            - "Explain step-by-step how to make coffee"
+            - "What's the difference between AI and ML?"
+            """)
+        # Add example interactions in sidebar
+        gr.Examples(
+            examples=[
+                "What is 15 + 24?",
+                "Write a short poem about AI",
+                "What is the capital of Japan?",
+                "Explain what machine learning is in simple terms"
+            ],
+            inputs=msg,
+            label="Quick Examples"
+        )
+    # Main chat area - full screen
+    chatbot = gr.Chatbot(
+        type="messages",
+        label="Chat",
+        height="calc(100vh - 200px)",
+        avatar_images=(
+            "https://em-content.zobj.net/source/twitter/376/bust-in-silhouette_1f464.png",
+            "https://em-content.zobj.net/source/twitter/376/robot_1f916.png"
+        ),
+        show_label=False,
+        #placeholder="C:\Users\Yuvi\dev\testing\placeholder.png"
+        placeholder= #"""
+        #<div style="display: flex; justify-content: center; align-items: center; height: 100%;">
+        #    <img src="/gradio_api/file=C:\\Users\\Yuvi\\dev\\testing\\placeholder.png" style="opacity: 0.4; max-width: 80%; max-height: 80%; object-fit: contain;" alt="Placeholder">
+        #</div>
+        #"""
+        """<div>
+        <img src="/gradio_api/file=placeholder.png">
+        </div>"""
+    )
+    # Render the input textbox in main area
+    msg.render()
+    # Event handlers - only submit event
+    def submit_message(message, history, model, sys_prompt):
+        """Wrapper to handle message submission"""
+        if not message or message.strip() == "":
+            return history, ""
+        yield from stream_chat_response(message, history, model, sys_prompt)
+    msg.submit(
+        submit_message,
+        inputs=[msg, chatbot, model_dropdown, system_prompt],
+        outputs=chatbot
+    ).then(
+        lambda: "",
+        None,
+        msg
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(allowed_paths=["."], ssr_mode=True)