fariasultana commited on
Commit
a8511d9
·
verified ·
1 Parent(s): 7dd6607

fix: Simplified stable app

Browse files
Files changed (1) hide show
  1. app.py +29 -91
app.py CHANGED
@@ -1,100 +1,38 @@
1
- """MiniMind Max2 API with Thinking"""
2
  import gradio as gr
3
- from typing import Dict, List, Tuple
4
- from enum import Enum
5
 
6
- class ThinkingMode(Enum):
7
- INTERLEAVED = "interleaved"
8
- SEQUENTIAL = "sequential"
9
- HIDDEN = "hidden"
10
-
11
- class ThinkingEngine:
12
- def think(self, query: str, mode: str, show: bool) -> Tuple[str, str]:
13
- thinking = f"""<Thinking>
14
- <step> Step 1 (analyze): Understanding query...
15
- Confidence: 95%
16
- <step> Step 2 (plan): Planning MoE routing...
17
- Confidence: 90%
18
- <step> Step 3 (generate): Using 25% active params...
19
- Confidence: 92%
20
- <reflect> Verifying quality...
21
- Confidence: 88%
22
- <conclude> Formulating response...
23
- </Thinking>""" if show else "Thinking hidden"
24
-
25
- response = f"""**MiniMind Max2 Response**
26
-
27
- Query: {query}
28
-
29
- I processed your request using:
30
- - MoE Architecture (8 experts, top-2 routing)
31
- - GQA (16 Q-heads, 4 KV-heads)
32
- - Only 25% active parameters
33
-
34
- This enables efficient edge deployment while maintaining quality."""
35
-
36
- return response, thinking
37
-
38
- engine = ThinkingEngine()
39
-
40
- def respond(msg, history, mode, show, temp, tokens):
41
- response, thinking = engine.think(msg, mode, show)
42
  history.append([msg, response])
43
  return history, "", thinking
44
 
45
- with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
46
- gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
47
 
48
- with gr.Tabs():
49
- with gr.Tab("💬 Chat"):
50
- with gr.Row():
51
- with gr.Column(scale=2):
52
- chatbot = gr.Chatbot(height=400)
53
- msg = gr.Textbox(placeholder="Ask anything...", label="Message")
54
- with gr.Row():
55
- submit = gr.Button("Send", variant="primary")
56
- clear = gr.Button("Clear")
57
- with gr.Column(scale=1):
58
- mode = gr.Radio(["Interleaved", "Sequential", "Hidden"], value="Interleaved", label="Thinking Mode")
59
- show = gr.Checkbox(label="Show Thinking", value=True)
60
- temp = gr.Slider(0, 1, 0.7, label="Temperature")
61
- tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
62
- thinking = gr.Textbox(label="Thinking Trace", lines=10)
63
-
64
- submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
65
- msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
66
- clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
67
-
68
- with gr.Tab("🔧 Tools"):
69
- gr.Markdown("### Function Calling")
70
- tool = gr.Dropdown(["calculate", "search"], value="calculate", label="Tool")
71
- inp = gr.Textbox(value="2 + 2 * 3", label="Input")
72
- btn = gr.Button("Execute", variant="primary")
73
- out = gr.Textbox(label="Result")
74
-
75
- def exec_tool(t, i):
76
- if t == "calculate":
77
- try: return f"Result: {eval(i, {'__builtins__': {}}, {})}"
78
- except: return "Error"
79
- return f"Search: {i}"
80
 
81
- btn.click(exec_tool, [tool, inp], out)
82
-
83
- with gr.Tab("ℹ️ Info"):
84
- gr.Markdown("""# MiniMind Max2
85
- ## Architecture
86
- - **MoE**: 8 experts, top-2 (25% active)
87
- - **GQA**: 4x KV cache reduction
88
- - **Capabilities**: Reasoning, Vision, Coding, Tools
89
-
90
- ## New Features
91
- - Interleaved Thinking
92
- - Sequential Planning
93
- - Jinja Templates
94
- - MDX Components
95
- - Speculative Decoding
96
- - NPU Export""")
97
-
98
- gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
99
 
100
  demo.launch()
 
 
1
  import gradio as gr
 
 
2
 
3
+ def chat(msg, history, mode, show):
4
+ thinking = f"""<Thinking>
5
+ <step> Analyzing: {msg[:30]}...
6
+ <step> MoE routing (top-2 of 8 experts)
7
+ <step> 25% active parameters
8
+ <conclude> Ready
9
+ </Thinking>""" if show else ""
10
+
11
+ response = f"MiniMind Max2 response to: {msg}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  history.append([msg, response])
13
  return history, "", thinking
14
 
15
+ with gr.Blocks(title="MiniMind Max2") as demo:
16
+ gr.Markdown("# MiniMind Max2 API")
17
 
18
+ with gr.Row():
19
+ with gr.Column(scale=2):
20
+ chatbot = gr.Chatbot(height=350)
21
+ msg = gr.Textbox(placeholder="Ask anything...")
22
+ gr.Button("Send", variant="primary").click(
23
+ chat, [msg, chatbot, gr.State("interleaved"), gr.State(True)],
24
+ [chatbot, msg, gr.Textbox(visible=False)]
25
+ )
26
+ with gr.Column(scale=1):
27
+ gr.Markdown("""## Info
28
+ - MoE: 8 experts, 25% active
29
+ - GQA: 4x memory reduction
30
+ - Formats: safetensors, gguf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ ## Docker
33
+ ```
34
+ docker pull sultanafariabd/minimind-max2
35
+ docker run -p 8000:8000 sultanafariabd/minimind-max2
36
+ ```""")
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  demo.launch()