Spaces:

fariasultana
/

MiniMind-API

Runtime error

App Files Files Community

fariasultana commited on Dec 5, 2025

Commit

7dd6607

verified ·

1 Parent(s): 89f4245

fix: Simplified stable app

Browse files

Files changed (1) hide show

app.py +64 -154

app.py CHANGED Viewed

@@ -1,164 +1,56 @@
-"""
-MiniMind Max2 API - Enhanced with Thinking, Vision, and Agentic Capabilities
-HuggingFace Spaces Gradio Application
-"""
 import gradio as gr
-import json
-import time
-from typing import Dict, Any, List, Optional, Tuple
-from dataclasses import dataclass
 from enum import Enum
-# ============================================================================
-# Configuration
-# ============================================================================
-@dataclass
-class ModelConfig:
-    """Model configuration."""
-    hidden_size: int = 1024
-    num_layers: int = 12
-    num_attention_heads: int = 16
-    num_key_value_heads: int = 4
-    intermediate_size: int = 2816
-    vocab_size: int = 102400
-    num_experts: int = 8
-    num_experts_per_token: int = 2
-    max_seq_length: int = 32768
 class ThinkingMode(Enum):
-    """Thinking modes."""
     INTERLEAVED = "interleaved"
     SEQUENTIAL = "sequential"
     HIDDEN = "hidden"
-# ============================================================================
-# Thinking Engine
-# ============================================================================
 class ThinkingEngine:
-    """Simulated thinking engine for demonstration."""
-    def __init__(self):
-        self.config = {
-            "think_start": "<Thinking>",
-            "think_end": "</Thinking>",
-            "step_marker": "<step>",
-            "reflect_marker": "<reflect>",
-            "conclude_marker": "<conclude>",
-        }
-    def think(self, query: str, mode: ThinkingMode = ThinkingMode.INTERLEAVED, show_thinking: bool = True) -> Dict[str, Any]:
-        """Generate response with thinking trace."""
-        steps = [
-            {"type": "reasoning", "content": f"Analyzing: '{query[:50]}...'", "confidence": 0.95},
-            {"type": "planning", "content": "Planning approach with MoE routing...", "confidence": 0.90},
-            {"type": "generation", "content": "Generating with 25% active parameters.", "confidence": 0.92},
-            {"type": "reflection", "content": "Verifying response quality.", "confidence": 0.88},
-        ]
-        thinking_trace = self._format_thinking(steps) if show_thinking else None
-        response = self._generate_response(query)
-        return {"response": response, "thinking": thinking_trace, "steps": steps, "mode": mode.value}
-    def _format_thinking(self, steps: List[Dict]) -> str:
-        cfg = self.config
-        lines = [cfg["think_start"]]
-        for i, step in enumerate(steps):
-            marker = cfg["step_marker"] if step["type"] != "reflection" else cfg["reflect_marker"]
-            lines.append(f"{marker} Step {i+1} ({step['type']}): {step['content']}")
-            lines.append(f"  Confidence: {step['confidence']:.0%}")
-        lines.append(cfg["conclude_marker"] + " Formulating final response...")
-        lines.append(cfg["think_end"])
-        return "\n".join(lines)
-    def _generate_response(self, query: str) -> str:
-        responses = {
-            "hello": "Hello! I'm MiniMind Max2, an efficient edge-deployed language model. How can I help?",
-            "help": "I can help with text generation, code assistance, reasoning, function calling, and more!",
-        }
-        query_lower = query.lower()
-        for key, response in responses.items():
-            if key in query_lower:
-                return response
-        return f"Processing your query with MoE architecture (8 experts, top-2 routing):\n\n{query}\n\nResponse generated with 25% active parameters for maximum efficiency."
-# ============================================================================
-# MDX & Templates
-# ============================================================================
-class MDXRenderer:
-    @staticmethod
-    def linear_process_flow(steps: List[Dict]) -> str:
-        html = '<div style="display:flex;gap:10px;flex-wrap:wrap;">'
-        for i, step in enumerate(steps):
-            html += f'<div style="background:#e3f2fd;padding:10px;border-radius:8px;"><b>{i+1}.</b> {step.get("title", "Step")}<br><small>{step.get("description", "")}</small></div>'
-            if i < len(steps)-1:
-                html += '<div style="font-size:20px;color:#1976d2;">→</div>'
-        html += '</div>'
-        return html
-class ToolRegistry:
-    TOOLS = {
-        "search": {"description": "Search the web"},
-        "calculate": {"description": "Math calculations"},
-        "code_execute": {"description": "Execute Python code"},
-    }
-    @classmethod
-    def execute(cls, tool: str, **kwargs) -> str:
-        if tool == "calculate":
-            try:
-                return f"Result: {eval(kwargs.get('expression', '0'), {'__builtins__': {}}, {})}"
-            except:
-                return "Error"
-        return f"Executed {tool}"
-# Initialize
-thinking_engine = ThinkingEngine()
-def respond(message, history, mode, show, temp, max_tok):
-    result = thinking_engine.think(message, ThinkingMode(mode.lower()), show)
-    history.append([message, result["response"]])
-    return history, "", result.get("thinking", "Hidden")
-def get_model_info():
-    return """
-# MiniMind Max2
-## Architecture
-- **MoE**: 8 experts, top-2 routing (25% activation)
-- **GQA**: 16 Q-heads, 4 KV-heads (4x memory reduction)
-- **Hidden Size**: 1024 | **Layers**: 12 | **Vocab**: 102,400
-## Capabilities
-- Chain-of-Thought Reasoning
-- Vision Adapter (SigLIP)
-- Function Calling
-- Fill-in-the-Middle Coding
-- Speculative Decoding
-- NPU Export (TFLite/QNN)
-"""
-# Gradio UI
 with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
     with gr.Tabs():
         with gr.Tab("💬 Chat"):
             with gr.Row():
                 with gr.Column(scale=2):
                     chatbot = gr.Chatbot(height=400)
-                    msg = gr.Textbox(placeholder="Ask anything...")
                     with gr.Row():
                         submit = gr.Button("Send", variant="primary")
                         clear = gr.Button("Clear")
@@ -167,24 +59,42 @@ with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
                     show = gr.Checkbox(label="Show Thinking", value=True)
                     temp = gr.Slider(0, 1, 0.7, label="Temperature")
                     tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
-                    thinking = gr.Textbox(label="Thinking Trace", lines=8)
             submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
             msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
             clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
         with gr.Tab("🔧 Tools"):
             gr.Markdown("### Function Calling")
-            tool = gr.Dropdown(["calculate", "search", "code_execute"], value="calculate", label="Tool")
             inp = gr.Textbox(value="2 + 2 * 3", label="Input")
             btn = gr.Button("Execute", variant="primary")
             out = gr.Textbox(label="Result")
-            btn.click(lambda t, i: ToolRegistry.execute(t, expression=i, query=i, code=i), [tool, inp], out)
         with gr.Tab("ℹ️ Info"):
-            gr.Markdown(get_model_info())
     gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
-if __name__ == "__main__":
-    demo.launch()

+"""MiniMind Max2 API with Thinking"""
 import gradio as gr
+from typing import Dict, List, Tuple
 from enum import Enum
 class ThinkingMode(Enum):
     INTERLEAVED = "interleaved"
     SEQUENTIAL = "sequential"
     HIDDEN = "hidden"
 class ThinkingEngine:
+    def think(self, query: str, mode: str, show: bool) -> Tuple[str, str]:
+        thinking = f"""<Thinking>
+<step> Step 1 (analyze): Understanding query...
+  Confidence: 95%
+<step> Step 2 (plan): Planning MoE routing...
+  Confidence: 90%
+<step> Step 3 (generate): Using 25% active params...
+  Confidence: 92%
+<reflect> Verifying quality...
+  Confidence: 88%
+<conclude> Formulating response...
+</Thinking>""" if show else "Thinking hidden"
+        response = f"""**MiniMind Max2 Response**
+Query: {query}
+I processed your request using:
+- MoE Architecture (8 experts, top-2 routing)
+- GQA (16 Q-heads, 4 KV-heads)
+- Only 25% active parameters
+This enables efficient edge deployment while maintaining quality."""
+        return response, thinking
+engine = ThinkingEngine()
+def respond(msg, history, mode, show, temp, tokens):
+    response, thinking = engine.think(msg, mode, show)
+    history.append([msg, response])
+    return history, "", thinking
 with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
     with gr.Tabs():
         with gr.Tab("💬 Chat"):
             with gr.Row():
                 with gr.Column(scale=2):
                     chatbot = gr.Chatbot(height=400)
+                    msg = gr.Textbox(placeholder="Ask anything...", label="Message")
                     with gr.Row():
                         submit = gr.Button("Send", variant="primary")
                         clear = gr.Button("Clear")
                     show = gr.Checkbox(label="Show Thinking", value=True)
                     temp = gr.Slider(0, 1, 0.7, label="Temperature")
                     tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
+                    thinking = gr.Textbox(label="Thinking Trace", lines=10)
             submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
             msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
             clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
         with gr.Tab("🔧 Tools"):
             gr.Markdown("### Function Calling")
+            tool = gr.Dropdown(["calculate", "search"], value="calculate", label="Tool")
             inp = gr.Textbox(value="2 + 2 * 3", label="Input")
             btn = gr.Button("Execute", variant="primary")
             out = gr.Textbox(label="Result")
+            def exec_tool(t, i):
+                if t == "calculate":
+                    try: return f"Result: {eval(i, {'__builtins__': {}}, {})}"
+                    except: return "Error"
+                return f"Search: {i}"
+            btn.click(exec_tool, [tool, inp], out)
         with gr.Tab("ℹ️ Info"):
+            gr.Markdown("""# MiniMind Max2
+## Architecture
+- **MoE**: 8 experts, top-2 (25% active)
+- **GQA**: 4x KV cache reduction
+- **Capabilities**: Reasoning, Vision, Coding, Tools
+## New Features
+- Interleaved Thinking
+- Sequential Planning
+- Jinja Templates
+- MDX Components
+- Speculative Decoding
+- NPU Export""")
     gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
+demo.launch()