Spaces:

fariasultana
/

MiniMind-API

Runtime error

App Files Files Community

fariasultana commited on Dec 5, 2025

Commit

a8511d9

verified ·

1 Parent(s): 7dd6607

fix: Simplified stable app

Browse files

Files changed (1) hide show

app.py +29 -91

app.py CHANGED Viewed

@@ -1,100 +1,38 @@
-"""MiniMind Max2 API with Thinking"""
 import gradio as gr
-from typing import Dict, List, Tuple
-from enum import Enum
-class ThinkingMode(Enum):
-    INTERLEAVED = "interleaved"
-    SEQUENTIAL = "sequential"
-    HIDDEN = "hidden"
-class ThinkingEngine:
-    def think(self, query: str, mode: str, show: bool) -> Tuple[str, str]:
-        thinking = f"""<Thinking>
-<step> Step 1 (analyze): Understanding query...
-  Confidence: 95%
-<step> Step 2 (plan): Planning MoE routing...
-  Confidence: 90%
-<step> Step 3 (generate): Using 25% active params...
-  Confidence: 92%
-<reflect> Verifying quality...
-  Confidence: 88%
-<conclude> Formulating response...
-</Thinking>""" if show else "Thinking hidden"
-        response = f"""**MiniMind Max2 Response**
-Query: {query}
-I processed your request using:
-- MoE Architecture (8 experts, top-2 routing)
-- GQA (16 Q-heads, 4 KV-heads)
-- Only 25% active parameters
-This enables efficient edge deployment while maintaining quality."""
-        return response, thinking
-engine = ThinkingEngine()
-def respond(msg, history, mode, show, temp, tokens):
-    response, thinking = engine.think(msg, mode, show)
     history.append([msg, response])
     return history, "", thinking
-with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
-    with gr.Tabs():
-        with gr.Tab("💬 Chat"):
-            with gr.Row():
-                with gr.Column(scale=2):
-                    chatbot = gr.Chatbot(height=400)
-                    msg = gr.Textbox(placeholder="Ask anything...", label="Message")
-                    with gr.Row():
-                        submit = gr.Button("Send", variant="primary")
-                        clear = gr.Button("Clear")
-                with gr.Column(scale=1):
-                    mode = gr.Radio(["Interleaved", "Sequential", "Hidden"], value="Interleaved", label="Thinking Mode")
-                    show = gr.Checkbox(label="Show Thinking", value=True)
-                    temp = gr.Slider(0, 1, 0.7, label="Temperature")
-                    tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
-                    thinking = gr.Textbox(label="Thinking Trace", lines=10)
-            submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
-            msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
-            clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
-        with gr.Tab("🔧 Tools"):
-            gr.Markdown("### Function Calling")
-            tool = gr.Dropdown(["calculate", "search"], value="calculate", label="Tool")
-            inp = gr.Textbox(value="2 + 2 * 3", label="Input")
-            btn = gr.Button("Execute", variant="primary")
-            out = gr.Textbox(label="Result")
-            def exec_tool(t, i):
-                if t == "calculate":
-                    try: return f"Result: {eval(i, {'__builtins__': {}}, {})}"
-                    except: return "Error"
-                return f"Search: {i}"
-            btn.click(exec_tool, [tool, inp], out)
-        with gr.Tab("ℹ️ Info"):
-            gr.Markdown("""# MiniMind Max2
-## Architecture
-- **MoE**: 8 experts, top-2 (25% active)
-- **GQA**: 4x KV cache reduction
-- **Capabilities**: Reasoning, Vision, Coding, Tools
-## New Features
-- Interleaved Thinking
-- Sequential Planning
-- Jinja Templates
-- MDX Components
-- Speculative Decoding
-- NPU Export""")
-    gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
 demo.launch()

 import gradio as gr
+def chat(msg, history, mode, show):
+    thinking = f"""<Thinking>
+<step> Analyzing: {msg[:30]}...
+<step> MoE routing (top-2 of 8 experts)
+<step> 25% active parameters
+<conclude> Ready
+</Thinking>""" if show else ""
+    response = f"MiniMind Max2 response to: {msg}"
     history.append([msg, response])
     return history, "", thinking
+with gr.Blocks(title="MiniMind Max2") as demo:
+    gr.Markdown("# MiniMind Max2 API")
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=350)
+            msg = gr.Textbox(placeholder="Ask anything...")
+            gr.Button("Send", variant="primary").click(
+                chat, [msg, chatbot, gr.State("interleaved"), gr.State(True)],
+                [chatbot, msg, gr.Textbox(visible=False)]
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("""## Info
+- MoE: 8 experts, 25% active
+- GQA: 4x memory reduction
+- Formats: safetensors, gguf
+## Docker
+```
+docker pull sultanafariabd/minimind-max2
+docker run -p 8000:8000 sultanafariabd/minimind-max2
+```""")
 demo.launch()