Daedalus-1

Sleeping

App Files Files Community

Spestly commited on Aug 31, 2025

Commit

9319628

verified ·

1 Parent(s): a21a221

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -22

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 import spaces
 from transformers import pipeline
 import torch
 from typing import List, Dict, Optional
 # Global variable to store pipelines
@@ -9,11 +11,107 @@ model_cache = {}
 # Available models
 AVAILABLE_MODELS = {
-    "Apollo-1-4B": "NoemaResearch/Apollo-1-4B",
-    "Apollo-1-8B": "NoemaResearch/Apollo-1-8B",
-    "Apollo-1-2B": "NoemaResearch/Apollo-1-2B",
 }
 @spaces.GPU
 def initialize_model(model_name):
     global model_cache
@@ -62,7 +160,9 @@ def generate_response(message, history, model_name, max_length=512, temperature=
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     # Add current message
     messages.append({"role": "user", "content": message})
@@ -116,7 +216,10 @@ def generate_response(message, history, model_name, max_length=512, temperature=
             if "Assistant:" in assistant_response:
                 assistant_response = assistant_response.split("Assistant:")[-1].strip()
-        return assistant_response
     except Exception as e:
         return f"Error generating response: {str(e)}"
@@ -134,7 +237,7 @@ def generate(
     API endpoint for LLM generation
     Args:
-        model: Model name to use (Nous-1-2B, Nous-1-4B, or Nous-1-8B)
         user_input: Current user message/input
         history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
         temperature: Temperature for generation (0.1-2.0)
@@ -186,30 +289,72 @@ def generate(
 # Create the Gradio interface
 def create_interface():
-    with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
-        # 🚀 Apollo-1 Model Chat Interface
-        Chat with the Nous-1 models by Noema Research.
         **Available Models:**
-        - Apollo-1-4B (4 billion parameters)
-        - Apollo-1-8B (8 billion parameters)
-        - Apollo-1-2B (2 billion parameters)
         """)
         with gr.Row():
             model_selector = gr.Dropdown(
                 choices=list(AVAILABLE_MODELS.keys()),
-                value="Nous-1-4B",
                 label="Select Model",
                 info="Choose which model to use for generation"
             )
         chatbot = gr.Chatbot(
-            height=400,
             placeholder="Select a model and start chatting...",
-            label="Chat"
         )
         msg = gr.Textbox(
@@ -248,6 +393,29 @@ def create_interface():
                 info="Controls diversity via nucleus sampling"
             )
         # Event handlers
         def user_message(message, history):
             return "", history + [[message, None]]
@@ -282,19 +450,22 @@ def create_interface():
         model_selector.change(model_changed, model_selector, chatbot)
         gr.Markdown("""
         ---
-        ### About the Apollo-1 Models
-        **Apollo-1-2B**: 2 billion parameter model by Noema Research, designed for fast and quick infrencing
-        **Apollo-1-4B**: 4 billion parameter model by Noema Research, optimisd for efficient conversation and text generation
-        **Apollo-1-8B**: 8 billion parameter model by Noema Research, offering enhanced capabilities and better performance for complex tasks
-        All models are designed for conversational AI and support various text generation tasks. The 8B model provides more sophisticated responses but requires more computational resources.
-        This Space uses ZeroGPU for efficient GPU allocation across both model sizes.
         """)
     return demo

 import spaces
 from transformers import pipeline
 import torch
+import re
+import json
 from typing import List, Dict, Optional
 # Global variable to store pipelines
 # Available models
 AVAILABLE_MODELS = {
+    "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
 }
+def parse_thinking_tags(text):
+    """Parse text and extract thinking sections"""
+    think_pattern = r'<think>(.*?)</think>'
+    parts = []
+    last_end = 0
+    for match in re.finditer(think_pattern, text, re.DOTALL):
+        # Add text before thinking block
+        if match.start() > last_end:
+            before_text = text[last_end:match.start()].strip()
+            if before_text:
+                parts.append({"type": "text", "content": before_text})
+        # Add thinking block
+        thinking_content = match.group(1).strip()
+        if thinking_content:
+            parts.append({"type": "thinking", "content": thinking_content})
+        last_end = match.end()
+    # Add remaining text
+    if last_end < len(text):
+        remaining_text = text[last_end:].strip()
+        if remaining_text:
+            parts.append({"type": "text", "content": remaining_text})
+    return parts
+def create_thinking_html(thinking_content, token_count=None):
+    """Create HTML for thinking block"""
+    if token_count is None:
+        token_count = f"{len(thinking_content.split()) * 4:,}"  # Rough token estimate
+    thinking_id = f"thinking_{hash(thinking_content) % 10000}"
+    html = f"""
+    <div class="thinking-container" style="margin: 16px 0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;">
+        <div class="thinking-header" onclick="toggleThinking('{thinking_id}')"
+             style="background: #3a3a3a; border-radius: 8px; padding: 12px 16px; cursor: pointer;
+                    display: flex; align-items: center; gap: 10px; transition: all 0.2s ease;
+                    border: 1px solid #4a4a4a;">
+            <div style="width: 16px; height: 16px; background: #6366f1; border-radius: 50%; position: relative;">
+                <div style="position: absolute; top: 50%; left: 50%; width: 6px; height: 6px;
+                           background: white; border-radius: 50%; transform: translate(-50%, -50%);"></div>
+            </div>
+            <span style="font-weight: 500; color: #f0f0f0;">Thinking completed</span>
+            <span style="color: #888; font-size: 14px; margin-left: auto;">{token_count} tokens budget</span>
+            <div class="chevron" id="chevron_{thinking_id}"
+                 style="width: 16px; height: 16px; border: 2px solid #888; border-left: none;
+                        border-bottom: none; transform: rotate(45deg); transition: transform 0.2s ease;"></div>
+        </div>
+        <div class="thinking-content" id="{thinking_id}"
+             style="background: #333; border-radius: 0 0 8px 8px; padding: 20px;
+                    border: 1px solid #4a4a4a; border-top: none; display: none;
+                    max-height: 400px; overflow-y: auto;">
+            <div style="white-space: pre-wrap; font-family: 'SF Mono', 'Monaco', 'Inconsolata', monospace;
+                        font-size: 14px; line-height: 1.5; color: #d0d0d0;">{thinking_content}</div>
+        </div>
+    </div>
+    <script>
+    function toggleThinking(id) {{
+        const content = document.getElementById(id);
+        const chevron = document.getElementById('chevron_' + id);
+        const header = content.previousElementSibling;
+        if (content.style.display === 'none' || !content.style.display) {{
+            content.style.display = 'block';
+            chevron.style.transform = 'rotate(135deg)';
+            header.style.background = '#404040';
+        }} else {{
+            content.style.display = 'none';
+            chevron.style.transform = 'rotate(45deg)';
+            header.style.background = '#3a3a3a';
+        }}
+    }}
+    </script>
+    """
+    return html
+def format_response_with_thinking(response_text):
+    """Format response to include thinking blocks"""
+    parts = parse_thinking_tags(response_text)
+    if not parts:
+        return response_text
+    formatted_html = ""
+    for part in parts:
+        if part["type"] == "thinking":
+            formatted_html += create_thinking_html(part["content"])
+        else:
+            # Regular text content
+            formatted_html += f'<div style="margin: 16px 0; line-height: 1.6; color: #e0e0e0;">{part["content"]}</div>'
+    return formatted_html
 @spaces.GPU
 def initialize_model(model_name):
     global model_cache
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
+            # Parse HTML back to get original content if needed
+            clean_assistant_msg = re.sub(r'<[^>]+>', '', assistant_msg) if assistant_msg else ""
+            messages.append({"role": "assistant", "content": clean_assistant_msg})
     # Add current message
     messages.append({"role": "user", "content": message})
             if "Assistant:" in assistant_response:
                 assistant_response = assistant_response.split("Assistant:")[-1].strip()
+        # Format the response with thinking blocks
+        formatted_response = format_response_with_thinking(assistant_response)
+        return formatted_response
     except Exception as e:
         return f"Error generating response: {str(e)}"
     API endpoint for LLM generation
     Args:
+        model: Model name to use (Daedalus-1-8B)
         user_input: Current user message/input
         history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
         temperature: Temperature for generation (0.1-2.0)
 # Create the Gradio interface
 def create_interface():
+    # Custom CSS for dark theme and thinking blocks
+    custom_css = """
+    .dark {
+        background: #1a1a1a !important;
+    }
+    .chatbot .message-wrap .message {
+        background: #2a2a2a !important;
+        border: 1px solid #3a3a3a !important;
+    }
+    .chatbot .message-wrap .message.user {
+        background: #2d3748 !important;
+    }
+    .chatbot .message-wrap .message.bot {
+        background: #2a2a2a !important;
+    }
+    .thinking-container {
+        margin: 16px 0;
+        font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+    }
+    .thinking-header {
+        background: #3a3a3a;
+        border-radius: 8px;
+        padding: 12px 16px;
+        cursor: pointer;
+        display: flex;
+        align-items: center;
+        gap: 10px;
+        transition: all 0.2s ease;
+        border: 1px solid #4a4a4a;
+    }
+    .thinking-header:hover {
+        background: #404040 !important;
+        border-color: #5a5a5a !important;
+    }
+    """
+    with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft(), css=custom_css) as demo:
         gr.Markdown("""
+        # 🚀 Daedalus-1 Model Chat Interface
+        Chat with the Daedalus-1 models by Noema Research. This interface will render thinking processes when the model outputs `<think></think>` tags.
         **Available Models:**
+        - Daedalus-1-8B (8 billion parameters)
         """)
         with gr.Row():
             model_selector = gr.Dropdown(
                 choices=list(AVAILABLE_MODELS.keys()),
+                value="Daedalus-1-8B",
                 label="Select Model",
                 info="Choose which model to use for generation"
             )
         chatbot = gr.Chatbot(
+            height=500,
             placeholder="Select a model and start chatting...",
+            label="Chat",
+            render_markdown=False,  # Disable markdown to allow custom HTML
+            elem_classes=["chatbot"]
         )
         msg = gr.Textbox(
                 info="Controls diversity via nucleus sampling"
             )
+        # Add JavaScript for thinking block interactions
+        gr.HTML("""
+        <script>
+        function toggleThinking(id) {
+            const content = document.getElementById(id);
+            if (!content) return;
+            const chevron = document.getElementById('chevron_' + id);
+            const header = content.previousElementSibling;
+            if (content.style.display === 'none' || !content.style.display) {
+                content.style.display = 'block';
+                if (chevron) chevron.style.transform = 'rotate(135deg)';
+                if (header) header.style.background = '#404040';
+            } else {
+                content.style.display = 'none';
+                if (chevron) chevron.style.transform = 'rotate(45deg)';
+                if (header) header.style.background = '#3a3a3a';
+            }
+        }
+        </script>
+        """)
         # Event handlers
         def user_message(message, history):
             return "", history + [[message, None]]
         model_selector.change(model_changed, model_selector, chatbot)
         gr.Markdown("""
         ---
+        ### About the Daedalus-1 Models
+        **Daedalus-1-8B**: Daedalus-1-8B is an 8 billion parameter language model for code generation and reasoning, developed by **Noema Research**.
+        It is a finetuned derivative of [Seed-Coder-8B-Reasoning](https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning),
+        with enhancements for instruction following, structured code generation, and improved safety alignment.
+        The model is designed for conversational AI and supports various text generation tasks. When the model uses thinking tags (`<think></think>`),
+        this interface will render them as expandable sections similar to Claude's thinking process visualization.
+        This Space uses ZeroGPU for efficient GPU allocation.
         """)
     return demo