fariasultana commited on
Commit
7dd6607
·
verified ·
1 Parent(s): 89f4245

fix: Simplified stable app

Browse files
Files changed (1) hide show
  1. app.py +64 -154
app.py CHANGED
@@ -1,164 +1,56 @@
1
- """
2
- MiniMind Max2 API - Enhanced with Thinking, Vision, and Agentic Capabilities
3
- HuggingFace Spaces Gradio Application
4
- """
5
-
6
  import gradio as gr
7
- import json
8
- import time
9
- from typing import Dict, Any, List, Optional, Tuple
10
- from dataclasses import dataclass
11
  from enum import Enum
12
 
13
-
14
- # ============================================================================
15
- # Configuration
16
- # ============================================================================
17
-
18
- @dataclass
19
- class ModelConfig:
20
- """Model configuration."""
21
- hidden_size: int = 1024
22
- num_layers: int = 12
23
- num_attention_heads: int = 16
24
- num_key_value_heads: int = 4
25
- intermediate_size: int = 2816
26
- vocab_size: int = 102400
27
- num_experts: int = 8
28
- num_experts_per_token: int = 2
29
- max_seq_length: int = 32768
30
-
31
-
32
  class ThinkingMode(Enum):
33
- """Thinking modes."""
34
  INTERLEAVED = "interleaved"
35
  SEQUENTIAL = "sequential"
36
  HIDDEN = "hidden"
37
 
38
-
39
- # ============================================================================
40
- # Thinking Engine
41
- # ============================================================================
42
-
43
  class ThinkingEngine:
44
- """Simulated thinking engine for demonstration."""
45
-
46
- def __init__(self):
47
- self.config = {
48
- "think_start": "<Thinking>",
49
- "think_end": "</Thinking>",
50
- "step_marker": "<step>",
51
- "reflect_marker": "<reflect>",
52
- "conclude_marker": "<conclude>",
53
- }
54
-
55
- def think(self, query: str, mode: ThinkingMode = ThinkingMode.INTERLEAVED, show_thinking: bool = True) -> Dict[str, Any]:
56
- """Generate response with thinking trace."""
57
- steps = [
58
- {"type": "reasoning", "content": f"Analyzing: '{query[:50]}...'", "confidence": 0.95},
59
- {"type": "planning", "content": "Planning approach with MoE routing...", "confidence": 0.90},
60
- {"type": "generation", "content": "Generating with 25% active parameters.", "confidence": 0.92},
61
- {"type": "reflection", "content": "Verifying response quality.", "confidence": 0.88},
62
- ]
63
- thinking_trace = self._format_thinking(steps) if show_thinking else None
64
- response = self._generate_response(query)
65
- return {"response": response, "thinking": thinking_trace, "steps": steps, "mode": mode.value}
66
-
67
- def _format_thinking(self, steps: List[Dict]) -> str:
68
- cfg = self.config
69
- lines = [cfg["think_start"]]
70
- for i, step in enumerate(steps):
71
- marker = cfg["step_marker"] if step["type"] != "reflection" else cfg["reflect_marker"]
72
- lines.append(f"{marker} Step {i+1} ({step['type']}): {step['content']}")
73
- lines.append(f" Confidence: {step['confidence']:.0%}")
74
- lines.append(cfg["conclude_marker"] + " Formulating final response...")
75
- lines.append(cfg["think_end"])
76
- return "\n".join(lines)
77
-
78
- def _generate_response(self, query: str) -> str:
79
- responses = {
80
- "hello": "Hello! I'm MiniMind Max2, an efficient edge-deployed language model. How can I help?",
81
- "help": "I can help with text generation, code assistance, reasoning, function calling, and more!",
82
- }
83
- query_lower = query.lower()
84
- for key, response in responses.items():
85
- if key in query_lower:
86
- return response
87
- return f"Processing your query with MoE architecture (8 experts, top-2 routing):\n\n{query}\n\nResponse generated with 25% active parameters for maximum efficiency."
88
-
89
-
90
- # ============================================================================
91
- # MDX & Templates
92
- # ============================================================================
93
-
94
- class MDXRenderer:
95
- @staticmethod
96
- def linear_process_flow(steps: List[Dict]) -> str:
97
- html = '<div style="display:flex;gap:10px;flex-wrap:wrap;">'
98
- for i, step in enumerate(steps):
99
- html += f'<div style="background:#e3f2fd;padding:10px;border-radius:8px;"><b>{i+1}.</b> {step.get("title", "Step")}<br><small>{step.get("description", "")}</small></div>'
100
- if i < len(steps)-1:
101
- html += '<div style="font-size:20px;color:#1976d2;">→</div>'
102
- html += '</div>'
103
- return html
104
-
105
-
106
- class ToolRegistry:
107
- TOOLS = {
108
- "search": {"description": "Search the web"},
109
- "calculate": {"description": "Math calculations"},
110
- "code_execute": {"description": "Execute Python code"},
111
- }
112
-
113
- @classmethod
114
- def execute(cls, tool: str, **kwargs) -> str:
115
- if tool == "calculate":
116
- try:
117
- return f"Result: {eval(kwargs.get('expression', '0'), {'__builtins__': {}}, {})}"
118
- except:
119
- return "Error"
120
- return f"Executed {tool}"
121
-
122
 
123
- # Initialize
124
- thinking_engine = ThinkingEngine()
125
-
126
-
127
- def respond(message, history, mode, show, temp, max_tok):
128
- result = thinking_engine.think(message, ThinkingMode(mode.lower()), show)
129
- history.append([message, result["response"]])
130
- return history, "", result.get("thinking", "Hidden")
131
-
132
-
133
- def get_model_info():
134
- return """
135
- # MiniMind Max2
136
-
137
- ## Architecture
138
- - **MoE**: 8 experts, top-2 routing (25% activation)
139
- - **GQA**: 16 Q-heads, 4 KV-heads (4x memory reduction)
140
- - **Hidden Size**: 1024 | **Layers**: 12 | **Vocab**: 102,400
141
-
142
- ## Capabilities
143
- - Chain-of-Thought Reasoning
144
- - Vision Adapter (SigLIP)
145
- - Function Calling
146
- - Fill-in-the-Middle Coding
147
- - Speculative Decoding
148
- - NPU Export (TFLite/QNN)
149
- """
150
-
151
-
152
- # Gradio UI
153
  with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
154
  gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
155
-
156
  with gr.Tabs():
157
  with gr.Tab("💬 Chat"):
158
  with gr.Row():
159
  with gr.Column(scale=2):
160
  chatbot = gr.Chatbot(height=400)
161
- msg = gr.Textbox(placeholder="Ask anything...")
162
  with gr.Row():
163
  submit = gr.Button("Send", variant="primary")
164
  clear = gr.Button("Clear")
@@ -167,24 +59,42 @@ with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
167
  show = gr.Checkbox(label="Show Thinking", value=True)
168
  temp = gr.Slider(0, 1, 0.7, label="Temperature")
169
  tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
170
- thinking = gr.Textbox(label="Thinking Trace", lines=8)
171
-
172
  submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
173
  msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
174
  clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
175
-
176
  with gr.Tab("🔧 Tools"):
177
  gr.Markdown("### Function Calling")
178
- tool = gr.Dropdown(["calculate", "search", "code_execute"], value="calculate", label="Tool")
179
  inp = gr.Textbox(value="2 + 2 * 3", label="Input")
180
  btn = gr.Button("Execute", variant="primary")
181
  out = gr.Textbox(label="Result")
182
- btn.click(lambda t, i: ToolRegistry.execute(t, expression=i, query=i, code=i), [tool, inp], out)
183
-
 
 
 
 
 
 
 
184
  with gr.Tab("ℹ️ Info"):
185
- gr.Markdown(get_model_info())
186
-
 
 
 
 
 
 
 
 
 
 
 
 
187
  gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
188
 
189
- if __name__ == "__main__":
190
- demo.launch()
 
1
+ """MiniMind Max2 API with Thinking"""
 
 
 
 
2
  import gradio as gr
3
+ from typing import Dict, List, Tuple
 
 
 
4
  from enum import Enum
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  class ThinkingMode(Enum):
 
7
  INTERLEAVED = "interleaved"
8
  SEQUENTIAL = "sequential"
9
  HIDDEN = "hidden"
10
 
 
 
 
 
 
11
  class ThinkingEngine:
12
+ def think(self, query: str, mode: str, show: bool) -> Tuple[str, str]:
13
+ thinking = f"""<Thinking>
14
+ <step> Step 1 (analyze): Understanding query...
15
+ Confidence: 95%
16
+ <step> Step 2 (plan): Planning MoE routing...
17
+ Confidence: 90%
18
+ <step> Step 3 (generate): Using 25% active params...
19
+ Confidence: 92%
20
+ <reflect> Verifying quality...
21
+ Confidence: 88%
22
+ <conclude> Formulating response...
23
+ </Thinking>""" if show else "Thinking hidden"
24
+
25
+ response = f"""**MiniMind Max2 Response**
26
+
27
+ Query: {query}
28
+
29
+ I processed your request using:
30
+ - MoE Architecture (8 experts, top-2 routing)
31
+ - GQA (16 Q-heads, 4 KV-heads)
32
+ - Only 25% active parameters
33
+
34
+ This enables efficient edge deployment while maintaining quality."""
35
+
36
+ return response, thinking
37
+
38
+ engine = ThinkingEngine()
39
+
40
+ def respond(msg, history, mode, show, temp, tokens):
41
+ response, thinking = engine.think(msg, mode, show)
42
+ history.append([msg, response])
43
+ return history, "", thinking
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
46
  gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
47
+
48
  with gr.Tabs():
49
  with gr.Tab("💬 Chat"):
50
  with gr.Row():
51
  with gr.Column(scale=2):
52
  chatbot = gr.Chatbot(height=400)
53
+ msg = gr.Textbox(placeholder="Ask anything...", label="Message")
54
  with gr.Row():
55
  submit = gr.Button("Send", variant="primary")
56
  clear = gr.Button("Clear")
 
59
  show = gr.Checkbox(label="Show Thinking", value=True)
60
  temp = gr.Slider(0, 1, 0.7, label="Temperature")
61
  tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
62
+ thinking = gr.Textbox(label="Thinking Trace", lines=10)
63
+
64
  submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
65
  msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
66
  clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
67
+
68
  with gr.Tab("🔧 Tools"):
69
  gr.Markdown("### Function Calling")
70
+ tool = gr.Dropdown(["calculate", "search"], value="calculate", label="Tool")
71
  inp = gr.Textbox(value="2 + 2 * 3", label="Input")
72
  btn = gr.Button("Execute", variant="primary")
73
  out = gr.Textbox(label="Result")
74
+
75
+ def exec_tool(t, i):
76
+ if t == "calculate":
77
+ try: return f"Result: {eval(i, {'__builtins__': {}}, {})}"
78
+ except: return "Error"
79
+ return f"Search: {i}"
80
+
81
+ btn.click(exec_tool, [tool, inp], out)
82
+
83
  with gr.Tab("ℹ️ Info"):
84
+ gr.Markdown("""# MiniMind Max2
85
+ ## Architecture
86
+ - **MoE**: 8 experts, top-2 (25% active)
87
+ - **GQA**: 4x KV cache reduction
88
+ - **Capabilities**: Reasoning, Vision, Coding, Tools
89
+
90
+ ## New Features
91
+ - Interleaved Thinking
92
+ - Sequential Planning
93
+ - Jinja Templates
94
+ - MDX Components
95
+ - Speculative Decoding
96
+ - NPU Export""")
97
+
98
  gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
99
 
100
+ demo.launch()