Quazim0t0 commited on
Commit
afd3155
·
verified ·
1 Parent(s): 73dd4cf

Upload panel.py

Browse files
Files changed (1) hide show
  1. agents/panel.py +553 -312
agents/panel.py CHANGED
@@ -1,312 +1,553 @@
1
- """
2
- panel.py -- the Gradio section for the bottom of the boss app: a live demo of the
3
- Modular-Mind mixture-of-experts.
4
-
5
- For the SpikeWhale backend it leads with the *latent bridge* (the real result) and
6
- organizes the three demos into tabs. Every handler is a generator that yields an
7
- instant "loading/generating" message first, so the first run never looks frozen while
8
- the ~80M models lazy-load. Hot-reloads checkpoints.
9
- """
10
- from __future__ import annotations
11
-
12
- import os
13
- import sys
14
-
15
- import gradio as gr
16
-
17
- # ZeroGPU: @spaces.GPU allocates a GPU only for the decorated call (CUDA is never touched at
18
- # import/startup). Falls back to a no-op decorator when `spaces` isn't installed (local / plain CPU).
19
- try:
20
- import spaces
21
- _gpu = spaces.GPU
22
- except Exception:
23
- def _gpu(fn=None, **kw):
24
- return fn if callable(fn) else (lambda f: f)
25
-
26
-
27
- def _to_gpu(moe):
28
- if hasattr(moe, "to_gpu_if_available"):
29
- moe.to_gpu_if_available()
30
- return moe
31
-
32
-
33
- EMOJI = {"language": "📖 Language", "math": "➗ Math", "tool": "🛠️ Tool-use"}
34
- DEVICE = os.environ.get("MM_AGENTS_DEVICE", "cpu")
35
- # Self-contained SpikeWhale bundle that ships next to this file (agents/modmind/: the 80M
36
- # specialists + bridge + inference code). If it's present we default to the SpikeWhale backend
37
- # so the HuggingFace Space "just works" with no env config. Env vars still override.
38
- _BUNDLED_MODMIND = os.path.join(os.path.dirname(os.path.abspath(__file__)), "modmind")
39
- _DEFAULT_BACKEND = "spikewhale" if os.path.isdir(_BUNDLED_MODMIND) else "bytegpt"
40
- _SPIKEWHALE = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower() in ("spikewhale", "modmind")
41
- _WARMED = {"done": False} # so the "loading the models" notice only shows on the first run
42
-
43
- _FOOTER = (
44
- "Two ~80M dense specialists — 📖 Language (FineWeb-Edu) and ➗ Math (FineMath) — sharing a "
45
- "16k length-max tokenizer. A coordinator routes by bits-per-byte, and a trained RecursiveLink "
46
- "lets them communicate in latent space (proven in the Bridge tab). Hot-reloads checkpoints."
47
- if _SPIKEWHALE else
48
- "Three byte-level ~10M specialists, streamed-trained on FineWeb-Edu / FineMath / "
49
- "glaive-function-calling. Tiny + early-trained, so generations are rough — the routing "
50
- "(which expert is most confident) is the point. It hot-reloads as training continues."
51
- )
52
-
53
-
54
- def _get_moe():
55
- """Pick the MoE backend. Defaults to the bundled SpikeWhale 80M specialists
56
- (agents/modmind/) when present, else the byte-level ByteGPT experts. MM_MOE_BACKEND
57
- and MODMIND_DIR override."""
58
- backend = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower()
59
- if backend in ("spikewhale", "modmind"):
60
- mm_dir = os.environ.get("MODMIND_DIR", _BUNDLED_MODMIND)
61
- if mm_dir and mm_dir not in sys.path:
62
- sys.path.insert(0, mm_dir) # front: ModMind's model.py wins over agents/model.py
63
- from moe_gradio import get_moe
64
- return get_moe
65
- from orchestrator import get_moe
66
- return get_moe
67
-
68
-
69
- def _loading_notice(action="Generating"):
70
- """First-run popup + in-place message so nothing ever looks frozen."""
71
- if not _WARMED["done"]:
72
- gr.Info("First run — loading the models (~20–40s on CPU). After this, it's quick.")
73
- return (f"### Loading the models + {action.lower()}…\n"
74
- "*First run loads the ~80M specialists into memory — this can take ~20–40s on CPU. "
75
- "Every run after this is fast.*")
76
- return f"### {action}…"
77
-
78
-
79
- def _bar(frac, n=18):
80
- f = max(0.0, min(1.0, frac))
81
- return "█" * round(f * n) + "·" * (n - round(f * n))
82
-
83
-
84
- @_gpu(duration=120)
85
- def moe_run(query, max_new):
86
- yield _loading_notice("Routing & generating")
87
- moe = _to_gpu(_get_moe()(DEVICE))
88
- if not moe.available():
89
- if _SPIKEWHALE:
90
- yield ("### ⏳ No SpikeWhale experts found\nSet `MODMIND_DIR` to your ModMind folder "
91
- "and make sure `<domain>/checkpoints/step_*.pt` exist (the panel hot-reloads them).")
92
- else:
93
- yield ("### ⏳ No experts trained yet\nRun `python agents/train.py --expert language` "
94
- "(and `math`, `tool`).")
95
- return
96
- q = (query or "").strip() or "The"
97
- r = moe.run(q, max_new=int(max_new))
98
- _WARMED["done"] = True
99
- w = r["weights"]; bits = r["bits_per_byte"]; steps = r["steps"]
100
- rows = "\n".join(
101
- f"| {EMOJI.get(n, n)} | {steps.get(n,0):,} | {bits[n]:.2f} | `{_bar(w[n])}` {w[n]*100:4.1f}% |"
102
- + (" ⬅ **routed**" if n == r["winner"] else "")
103
- for n in w
104
- )
105
- lat = r["shared_latent"][:16]
106
- spark = "".join("▁▂▃▄▅▆▇█"[min(7, int((abs(v)) / (max(1e-3, max(abs(x) for x in lat))) * 7))] for v in lat)
107
- yield f"""### Routed to {EMOJI.get(r['winner'], r['winner'])}
108
- The expert with the **lowest bits/byte** (most fluent on your text) wins the route.
109
-
110
- | expert | train steps | bits/byte | routing weight |
111
- |---|---|---|---|
112
- {rows}
113
-
114
- **{EMOJI.get(r['winner'], r['winner'])} continues your prompt:**
115
- > {q}**{r['generation']}**
116
-
117
- **Fused latent** (both experts' output latents combined — a glimpse of the shared bus): `{spark}`
118
-
119
- <sub>{_FOOTER}</sub>"""
120
-
121
-
122
- @_gpu(duration=120)
123
- def moe_key_recall(n):
124
- """THE PROOF: a random key shown only to the consultant; the asker reproduces it from the
125
- latent alone (with) vs ablated (without)."""
126
- yield _loading_notice("Running the proof")
127
- moe = _to_gpu(_get_moe()(DEVICE))
128
- if not getattr(moe, "key_recall_available", lambda: False)():
129
- yield ("### 🔑 Bridge unavailable\nNeeds the **SpikeWhale** backend and a trained "
130
- "`links/<asker>__from__<consultant>.pt` saved with the full asker.")
131
- return
132
- meta = moe.consult_meta()
133
- a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"])
134
- wr = moe.key_recall(n=int(n), ablate=False)
135
- ar = moe.key_recall(n=int(n), ablate=True)
136
- _WARMED["done"] = True
137
- rows = "\n".join(f"| `{k}` | `{rec}` | {'✅' if ok else '❌'} |" for k, rec, ok in wr["examples"])
138
- return_ok = wr['acc'] * 100
139
- return_no = ar['acc'] * 100
140
- yield f"""### 🔑 {a} read {c}'s mind — {return_ok:.0f}% with the latent, {return_no:.0f}% without
141
-
142
- A random key is shown **only to {c}**. {a} never sees it — yet by reading {c}'s latent through the
143
- trained RecursiveLink, it reproduces the key:
144
-
145
- | secret key (only {c} saw it) | {a} recovered it | |
146
- |---|---|---|
147
- {rows}
148
-
149
- ## With the latent: {return_ok:.0f}% → ❌ Cut the latent: {return_no:.0f}%
150
-
151
- <sub>That collapse to chance when the latent is removed is the whole point: the information is genuinely
152
- crossing the latent bridge between two models that were trained **separately, on different data**.
153
- This is the result — routing and generation are the supporting act.</sub>"""
154
-
155
-
156
- @_gpu(duration=120)
157
- def moe_consult(query, max_new):
158
- """Latent-influence demo: inject the consultant's latent into the asker (WITH vs WITHOUT).
159
- The bridge was trained on key-recall, so the latent *steers* the output rather than answering."""
160
- yield _loading_notice("Injecting latent")
161
- moe = _to_gpu(_get_moe()(DEVICE))
162
- if not getattr(moe, "consult_available", lambda: False)():
163
- yield ("### 🔗 Latent bridge unavailable\nNeeds the **SpikeWhale** backend and a trained bridge.")
164
- return
165
- q = (query or "").strip() or "natural selection"
166
- meta = moe.consult_meta()
167
- a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"])
168
- with_gen = moe.consult(q, max_new=int(max_new), ablate=False)
169
- abl_gen = moe.consult(q, max_new=int(max_new), ablate=True)
170
- _WARMED["done"] = True
171
- wl, nl = meta["with_latent"], meta["without_latent"]
172
- yield f"""### 🔗 Latent influence — {c}'s latent injected into {a}
173
-
174
- **WITH {c}'s latent (through the trained RecursiveLink):**
175
- > {q}**{with_gen}**
176
-
177
- **WITHOUT it (latent ablated to zero):**
178
- > {q}**{abl_gen}**
179
-
180
- <sub>This shows the latent's raw *effect* on generation, not a Q&A answer. The bridge was trained on
181
- key-recall (ablation with={wl:.3f} vs without={nl:.4f}), so the injected latent pushes the asker toward
182
- that learned behavior — which is why WITH and WITHOUT differ so sharply. The clean proof the latent
183
- carries real information is the **Bridge** tab.</sub>"""
184
-
185
-
186
- @_gpu(duration=120)
187
- def moe_combine(query, max_new, blend, consult):
188
- """Two blends compared at the same mix ratio: a real WEIGHT-MERGE (one merged model) vs an
189
- OUTPUT-BLEND (two models run separately, distributions averaged)."""
190
- yield _loading_notice("Building merge + blending")
191
- moe = _to_gpu(_get_moe()(DEVICE))
192
- if not getattr(moe, "merge_available", lambda: False)():
193
- yield "### 🧬 Needs both specialists loaded."
194
- return
195
- q = (query or "").strip() or "The water cycle works by"
196
- a = float(blend)
197
- merged_gen = moe.merge_generate(q, alpha=a, max_new=int(max_new), consult=bool(consult))
198
- blend_gen = moe.combine(q, max_new=int(max_new), blend=a, consult=bool(consult))
199
- _WARMED["done"] = True
200
- mix = f"{int(round((1-a)*100))}% 📖 Language / {int(round(a*100))}% ➗ Math"
201
- extra = " · +Reasoning's latent (consult)" if consult else ""
202
- yield f"""### 🧬 MoE Modular Minds — two ways to blend · {mix}{extra}
203
-
204
- **① Weight merge** — a *single* model whose weights are `(1-α)·Language + α·Math` — one network, one forward pass:
205
- > {q}**{merged_gen}**
206
-
207
- **② Output blend** both models run separately, their next-token distributions averaged each step:
208
- > {q}**{blend_gen}**
209
-
210
- <sub>Same mix ratio, two different mechanisms. **Weight merge** fuses the actual parameters into one model
211
- (only possible because they're the identical dense architecture); **output blend** is an inference-time
212
- ensemble of two separate models (only possible because they share the 16k tokenizer). Tick *consult* to also
213
- route Reasoning's latent into each through the trained bridge. Exploratory — generations are rough at this scale.</sub>"""
214
-
215
-
216
- HERO = """# 🧩 Modular Mind — two specialists that talk in latent space
217
- **Two ~80M models trained completely separately** — 📖 **Language** on FineWeb-Edu, **Math** on
218
- FineMath that never saw each other's data. A coordinator **routes** your query to the right one,
219
- and a trained **RecursiveLink** lets them **communicate through latent space**: Language can read
220
- information straight out of Math's "mind." The **🔑 Bridge** tab proves it.
221
-
222
- > ℹ️ *These specialists were trained only to demonstrate a **verifiable result** — clean routing and a
223
- > provable latent-bridge ablation **not** for production-quality output. The generated text is
224
- > intentionally rough at this scale; the mechanism is the point.*"""
225
-
226
- BRIDGE_INTRO = """### The proof: two independent models, one latent channel
227
- A random secret key is shown **only to ➗ Math**. 📖 Language never sees it — but by reading Math's
228
- latent through the trained RecursiveLink, it **reproduces the key**. Zero out the latent and it
229
- collapses to chance. That gap *is* the result: real information crossing between two models that
230
- were trained on different data and never met. **Hit the button.**"""
231
-
232
- INTRO_BYTE = """## 🧩 Experiment — Modular Mind as a Mixture of Experts
233
- Three tiny ~10M byte-level specialists (language, math, tool-use), each streamed-trained on its own
234
- dataset. A coordinator **routes** your query to whichever expert is most fluent (perplexity-based MoE)
235
- and fuses their latents through a **RecursiveLink**. Try a math problem vs. a sentence."""
236
-
237
-
238
- def _routing_block():
239
- with gr.Row():
240
- q = gr.Textbox(label="Your prompt", value="Solve for x: 2x + 3 = 11",
241
- scale=4, placeholder="a sentence or a math problem…")
242
- n = gr.Slider(40, 300, value=80, step=20, label="generate tokens", scale=1)
243
- btn = gr.Button("🧭 Route & generate", variant="primary")
244
- out = gr.Markdown()
245
- btn.click(moe_run, [q, n], out)
246
- gr.Examples(examples=[["The theory of evolution explains", 80],
247
- ["Compute the derivative of x^2 + 3x", 80],
248
- ["The history of the Roman Empire began", 80]],
249
- inputs=[q, n])
250
-
251
-
252
- def build_moe_panel():
253
- """Create the MoE demo components inside the current gr.Blocks context."""
254
- if not _SPIKEWHALE:
255
- with gr.Accordion("🧩 Experiment: Modular Mind = Mixture of Experts (3 specialists)", open=False):
256
- gr.Markdown(INTRO_BYTE)
257
- _routing_block()
258
- return
259
-
260
- with gr.Accordion("🧩 Modular Mind — independent specialists communicating in latent space", open=True):
261
- gr.Markdown(HERO)
262
- with gr.Tabs():
263
- # The headline result, FIRST.
264
- with gr.Tab("🔑 The latent bridge — the proof"):
265
- gr.Markdown(BRIDGE_INTRO)
266
- with gr.Row():
267
- kn = gr.Slider(4, 16, value=8, step=1, label="keys to test", scale=3)
268
- kbtn = gr.Button("🔑 Run the proof", variant="primary", scale=1)
269
- kout = gr.Markdown()
270
- kbtn.click(moe_key_recall, [kn], kout)
271
-
272
- # Routing — the supporting act.
273
- with gr.Tab("🧭 Routing & generation"):
274
- gr.Markdown("Type a math problem vs. a sentence and watch the **route flip** — each "
275
- "expert is most fluent (lowest bits/byte) on its own domain.")
276
- _routing_block()
277
-
278
- # Latent influence — honest "with vs without".
279
- with gr.Tab("🔗 Latent influence"):
280
- gr.Markdown(
281
- "Inject Math's latent into 📖 Language through the **trained RecursiveLink** and "
282
- "see generation **with vs without** it. The bridge was trained on key-recall, so the "
283
- "latent *steers* the output (it doesn't answer the question) — the point is how "
284
- "strongly it changes generation. The clean proof is the **Bridge** tab.")
285
- with gr.Row():
286
- cq = gr.Textbox(label="Text to encode (Math's latent → injected into Language)",
287
- value="natural selection", scale=4)
288
- cn = gr.Slider(40, 200, value=80, step=20, label="generate tokens", scale=1)
289
- cbtn = gr.Button("🔗 Show latent influence (with vs without)", variant="secondary")
290
- cout = gr.Markdown()
291
- cbtn.click(moe_consult, [cq, cn], cout)
292
-
293
- # MoE Modular Minds — TWO ways to blend the specialists, compared side by side.
294
- with gr.Tab("🧬 MoE Modular Minds"):
295
- gr.Markdown(
296
- "**Two ways to blend the two specialists**, shown side by side at the same mix ratio:\n"
297
- "- **① Weight merge** — fuse the *parameters* into one model `(1-α)·Language + α·Math` "
298
- "(works because they're the identical dense architecture).\n"
299
- "- **② Output blend** — run both models separately and average their next-token "
300
- "distributions (works because they share the 16k tokenizer).\n\n"
301
- "Slide the mix, and tick *consult* to also route Reasoning's latent into each through the "
302
- "trained bridge.")
303
- with gr.Row():
304
- mq = gr.Textbox(label="Prompt", value="The water cycle works by", scale=4)
305
- mn = gr.Slider(40, 160, value=70, step=10, label="generate tokens", scale=1)
306
- with gr.Row():
307
- mblend = gr.Slider(0.0, 1.0, value=0.5, step=0.1,
308
- label="mix α: 0 = 📖 Language ⟷ 1 = ➗ Math", scale=3)
309
- mconsult = gr.Checkbox(value=False, label="consult (inject Reasoning's latent)", scale=1)
310
- mbtn = gr.Button("🧬 Blend both ways (weight-merge vs output-blend)", variant="primary")
311
- mout = gr.Markdown()
312
- mbtn.click(moe_combine, [mq, mn, mblend, mconsult], mout)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ panel.py -- the Gradio section for the bottom of the boss app: a live demo of the
3
+ Modular-Mind mixture-of-experts.
4
+
5
+ For the SpikeWhale backend it leads with the *latent bridge* (the real result) and
6
+ organizes the three demos into tabs. Output is rendered as rich HTML (animated routing
7
+ cards, a latent-bus strip, character-diff key recovery, live token streaming) instead
8
+ of markdown tables. Every handler is a generator that yields an instant "loading"
9
+ notice first, so the first run never looks frozen while the ~80M models lazy-load.
10
+ Hot-reloads checkpoints.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import html as _h
15
+ import os
16
+ import sys
17
+
18
+ import gradio as gr
19
+
20
+ # ZeroGPU: @spaces.GPU allocates a GPU only for the decorated call (CUDA is never touched at
21
+ # import/startup). Falls back to a no-op decorator when `spaces` isn't installed (local / plain CPU).
22
+ try:
23
+ import spaces
24
+ _gpu = spaces.GPU
25
+ except Exception:
26
+ def _gpu(fn=None, **kw):
27
+ return fn if callable(fn) else (lambda f: f)
28
+
29
+
30
+ def _to_gpu(moe):
31
+ if hasattr(moe, "to_gpu_if_available"):
32
+ moe.to_gpu_if_available()
33
+ return moe
34
+
35
+
36
+ EMOJI = {"language": "📖 Language", "math": "➗ Math", "tool": "🛠️ Tool-use"}
37
+ COLOR = {"language": "#6aa9ff", "math": "#58d68d", "tool": "#f5b041"}
38
+ DEVICE = os.environ.get("MM_AGENTS_DEVICE", "cpu")
39
+ # Self-contained SpikeWhale bundle that ships next to this file (agents/modmind/: the 80M
40
+ # specialists + bridge + inference code). If it's present we default to the SpikeWhale backend
41
+ # so the HuggingFace Space "just works" with no env config. Env vars still override.
42
+ _BUNDLED_MODMIND = os.path.join(os.path.dirname(os.path.abspath(__file__)), "modmind")
43
+ _DEFAULT_BACKEND = "spikewhale" if os.path.isdir(_BUNDLED_MODMIND) else "bytegpt"
44
+ _SPIKEWHALE = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower() in ("spikewhale", "modmind")
45
+ _WARMED = {"done": False} # so the "loading the models" notice only shows on the first run
46
+
47
+ _FOOTER = (
48
+ "Two ~80M dense specialists 📖 Language (FineWeb-Edu) and ➗ Math (FineMath) sharing a "
49
+ "16k length-max tokenizer. A coordinator routes by bits-per-byte, and a trained RecursiveLink "
50
+ "lets them communicate in latent space (proven in the Bridge tab). Hot-reloads checkpoints."
51
+ if _SPIKEWHALE else
52
+ "Three byte-level ~10M specialists, streamed-trained on FineWeb-Edu / FineMath / "
53
+ "glaive-function-calling. Tiny + early-trained, so generations are rough — the routing "
54
+ "(which expert is most confident) is the point. It hot-reloads as training continues."
55
+ )
56
+
57
+
58
+ def _get_moe():
59
+ """Pick the MoE backend. Defaults to the bundled SpikeWhale 80M specialists
60
+ (agents/modmind/) when present, else the byte-level ByteGPT experts. MM_MOE_BACKEND
61
+ and MODMIND_DIR override."""
62
+ backend = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower()
63
+ if backend in ("spikewhale", "modmind"):
64
+ mm_dir = os.environ.get("MODMIND_DIR", _BUNDLED_MODMIND)
65
+ if mm_dir and mm_dir not in sys.path:
66
+ sys.path.insert(0, mm_dir) # front: ModMind's model.py wins over agents/model.py
67
+ from moe_gradio import get_moe
68
+ return get_moe
69
+ from orchestrator import get_moe
70
+ return get_moe
71
+
72
+
73
+ # ---- HTML rendering -------------------------------------------------------------
74
+ _CSS = """<style>
75
+ .mmx{font-family:system-ui,sans-serif;color:#dde;margin:4px 0}
76
+ .mmx .note{background:#14141c;border:1px solid #2a2a35;border-radius:10px;padding:12px 14px;color:#9bd;font-size:14px}
77
+ .mmx .h{font-size:17px;font-weight:800;margin:4px 0 8px}
78
+ .mmx .p{color:#8892a8}
79
+ .mmx .g{color:#eef2ff;font-weight:600}
80
+ .mmx .cards{display:flex;gap:10px;flex-wrap:wrap;margin:6px 0}
81
+ .mmx .card{flex:1;min-width:210px;background:#14141c;border:1px solid #2a2a35;border-radius:12px;padding:11px 13px;position:relative;overflow:hidden}
82
+ .mmx .card .nm{font-weight:800;font-size:15px}
83
+ .mmx .card .meta{color:#99a;font-size:11px;margin-top:2px}
84
+ .mmx .card .bar{height:10px;background:#23232e;border-radius:6px;margin-top:8px;overflow:hidden}
85
+ .mmx .card .fill{height:100%;border-radius:6px;animation:mmxw .7s ease}
86
+ .mmx .card .pct{font-size:12px;color:#bcd;margin-top:4px}
87
+ .mmx .badge{position:absolute;top:9px;right:10px;font-size:10px;font-weight:800;letter-spacing:.08em;padding:3px 8px;border-radius:99px;color:#0a1410}
88
+ @keyframes mmxw{from{width:0}}
89
+ .mmx .lat{display:flex;gap:2px;align-items:center;height:30px;background:#101018;border:1px solid #23232e;border-radius:8px;padding:3px 6px;margin:8px 0 2px}
90
+ .mmx .lat i{flex:1;border-radius:2px}
91
+ .mmx .cap{color:#778;font-size:11px;margin:2px 0 8px}
92
+ .mmx .gen{background:#101018;border:1px solid #2a2a35;border-radius:12px;padding:13px 15px;margin:10px 0;font-size:15px;line-height:1.6}
93
+ .mmx .caret{display:inline-block;width:9px;height:17px;border-radius:2px;background:#7ad1ff;margin-left:2px;vertical-align:text-bottom;animation:mmxb .8s steps(1) infinite}
94
+ @keyframes mmxb{50%{opacity:0}}
95
+ .mmx .stats{display:flex;gap:10px;flex-wrap:wrap;margin:10px 0}
96
+ .mmx .stat{flex:1;min-width:130px;text-align:center;background:#14141c;border:1px solid #2a2a35;border-radius:12px;padding:13px 8px}
97
+ .mmx .stat .v{font-size:30px;font-weight:800;line-height:1}
98
+ .mmx .stat .l{font-size:11px;color:#99a;margin-top:6px}
99
+ .mmx .krow{display:flex;gap:3px;align-items:center;margin:4px 0;flex-wrap:wrap}
100
+ .mmx .kc{width:27px;height:27px;border-radius:6px;display:inline-flex;align-items:center;justify-content:center;font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace;font-weight:700;font-size:14px}
101
+ .mmx .kc.k{background:#23232e;color:#aab}
102
+ .mmx .kc.g{background:#1f8a55;color:#fff}
103
+ .mmx .kc.r{background:#8a2f3d;color:#fff;opacity:.92}
104
+ .mmx .arr{color:#667;margin:0 8px;font-size:15px}
105
+ .mmx .klbl{min-width:240px;color:#99a;font-size:12px;text-align:right;margin-right:10px}
106
+ .mmx .duo{display:flex;gap:10px;flex-wrap:wrap;margin:8px 0}
107
+ .mmx .duo>div{flex:1;min-width:280px;background:#101018;border:1px solid #2a2a35;border-radius:12px;padding:12px 14px;font-size:14.5px;line-height:1.6}
108
+ .mmx .duo .hd{font-weight:800;font-size:13px;margin-bottom:7px}
109
+ .mmx .duo .with{border-color:#2e7d5b;box-shadow:0 0 12px rgba(46,204,113,.12)}
110
+ .mmx .mix{height:12px;border-radius:7px;background:linear-gradient(90deg,#6aa9ff,#58d68d);position:relative;margin:12px 2px 4px}
111
+ .mmx .mix b{position:absolute;top:-4px;width:4px;height:20px;border-radius:2px;background:#fff;box-shadow:0 0 8px #fff}
112
+ .mmx .sub{color:#889;font-size:12px;line-height:1.5;margin-top:8px}
113
+ </style>"""
114
+
115
+
116
+ def _wrap(body):
117
+ return _CSS + "<div class='mmx'>" + body + "</div>"
118
+
119
+
120
+ def _esc(s):
121
+ return _h.escape(s or "").replace("\n", "<br>")
122
+
123
+
124
+ def _notice(action="Generating"):
125
+ """First-run popup + in-place message so nothing ever looks frozen."""
126
+ if not _WARMED["done"]:
127
+ try:
128
+ gr.Info("First run — loading the models (~20–40s on CPU). After this, it's quick.")
129
+ except Exception:
130
+ pass
131
+ return _wrap(f"<div class='note'>⏳ Loading the ~80M specialists + {action.lower()}… "
132
+ "first run can take ~20–40s on CPU; every run after is fast.</div>")
133
+ return _wrap(f"<div class='note'>⏳ {action}…</div>")
134
+
135
+
136
+ def _msg(title, body):
137
+ return _wrap(f"<div class='note'><b>{title}</b><br>{body}</div>")
138
+
139
+
140
+ def _cards(winner, weights, bits, steps):
141
+ """One animated card per expert: fluency, routing weight bar, winner badge + glow."""
142
+ out = []
143
+ for n, wv in weights.items():
144
+ c = COLOR.get(n, "#9b59b6")
145
+ win = (n == winner)
146
+ style = f"border-color:{c};box-shadow:0 0 16px {c}40" if win else ""
147
+ badge = f"<span class='badge' style='background:{c}'>ROUTED ✓</span>" if win else ""
148
+ out.append(
149
+ f"<div class='card' style='{style}'>{badge}"
150
+ f"<div class='nm' style='color:{c}'>{EMOJI.get(n, n)}</div>"
151
+ f"<div class='meta'>{steps.get(n, 0):,} train steps · {bits[n]:.2f} bits/byte (lower = more fluent)</div>"
152
+ f"<div class='bar'><div class='fill' style='width:{wv*100:.1f}%;background:{c}'></div></div>"
153
+ f"<div class='pct'>routing weight {wv*100:.1f}%</div></div>")
154
+ return "<div class='cards'>" + "".join(out) + "</div>"
155
+
156
+
157
+ def _latent(shared, n=48):
158
+ """The shared latent bus as a strip of signed bars (like the piano's latent strip)."""
159
+ vals = list(shared or [])[:n]
160
+ if not vals:
161
+ return ""
162
+ mx = max(1e-6, max(abs(v) for v in vals))
163
+ cells = "".join(
164
+ f"<i style='height:{max(8.0, abs(v) / mx * 100):.0f}%;"
165
+ f"background:{'#5bbcdf' if v >= 0 else '#df7a5b'}'></i>" for v in vals)
166
+ return (f"<div class='lat'>{cells}</div>"
167
+ f"<div class='cap'>the shared latent bus every expert's output latent, fused by the "
168
+ f"RecursiveLink (first {len(vals)} of 256 dims; blue = +, orange =)</div>")
169
+
170
+
171
+ def _gen_box(prompt, gen, live=False):
172
+ caret = "<span class='caret'></span>" if live else ""
173
+ return (f"<div class='gen'><span class='p'>{_esc(prompt)}</span>"
174
+ f"<span class='g'>{_esc(gen)}</span>{caret}</div>")
175
+
176
+
177
+ def _key_rows(examples):
178
+ """Wordle-style per-character diff: secret key -> what the asker recovered."""
179
+ rows = []
180
+ for k, rec, ok in examples:
181
+ sec = "".join(f"<span class='kc k'>{_h.escape(ch)}</span>" for ch in k)
182
+ got = "".join(
183
+ f"<span class='kc {'g' if i < len(rec) and rec[i] == ch else 'r'}'>"
184
+ f"{_h.escape(rec[i]) if i < len(rec) else '·'}</span>"
185
+ for i, ch in enumerate(k))
186
+ rows.append(f"<div class='krow'>{sec}<span class='arr'>→</span>{got}"
187
+ f"{'&nbsp;✅' if ok else ''}</div>")
188
+ return "".join(rows)
189
+
190
+
191
+ def _char_acc(examples):
192
+ tot = hit = 0
193
+ for k, rec, _ in examples:
194
+ for i, ch in enumerate(k):
195
+ tot += 1
196
+ hit += int(i < len(rec) and rec[i] == ch)
197
+ return hit / max(1, tot)
198
+
199
+
200
+ # ---- handlers ---------------------------------------------------------------------
201
+ @_gpu(duration=120)
202
+ def moe_run(query, max_new):
203
+ yield _notice("Routing & generating")
204
+ moe = _to_gpu(_get_moe()(DEVICE))
205
+ if not moe.available():
206
+ if _SPIKEWHALE:
207
+ yield _msg("⏳ No SpikeWhale experts found",
208
+ "Set <code>MODMIND_DIR</code> to your ModMind folder and make sure "
209
+ "<code>&lt;domain&gt;/checkpoints/step_*.pt</code> exist (the panel hot-reloads them).")
210
+ else:
211
+ yield _msg("⏳ No experts trained yet",
212
+ "Run <code>python agents/train.py --expert language</code> (and <code>math</code>, <code>tool</code>).")
213
+ return
214
+ q = (query or "").strip() or "The"
215
+ winner, weights, bits = moe.route(q)
216
+ _, shared = moe.shared_latent(q)
217
+ steps = dict(getattr(moe, "steps", {}) or {})
218
+ c = COLOR.get(winner, "#9b59b6")
219
+ head = (f"<div class='h'>🧭 Routed to <span style='color:{c}'>{EMOJI.get(winner, winner)}</span>"
220
+ f" the expert most fluent on your text (lowest bits/byte) wins</div>"
221
+ + _cards(winner, weights, bits, steps) + _latent(shared))
222
+ gen = ""
223
+ if hasattr(moe, "generate_stream"): # live token streaming
224
+ for _, gen in moe.generate_stream(q, winner, max_new=int(max_new)):
225
+ yield _wrap(head + _gen_box(q, gen, live=True))
226
+ else:
227
+ r = moe.run(q, max_new=int(max_new))
228
+ gen = r.get("generation", "")
229
+ _WARMED["done"] = True
230
+ yield _wrap(head + _gen_box(q, gen, live=False) + f"<div class='sub'>{_FOOTER}</div>")
231
+
232
+
233
+ @_gpu(duration=120)
234
+ def moe_key_recall(n):
235
+ """THE PROOF: a random key shown only to the consultant; the asker reproduces it from the
236
+ latent alone (with) vs ablated (without)."""
237
+ yield _notice("Running the proof")
238
+ moe = _to_gpu(_get_moe()(DEVICE))
239
+ if not getattr(moe, "key_recall_available", lambda: False)():
240
+ yield _msg("🔑 Bridge unavailable",
241
+ "Needs the <b>SpikeWhale</b> backend and a trained "
242
+ "<code>links/&lt;asker&gt;__from__&lt;consultant&gt;.pt</code> saved with the full asker.")
243
+ return
244
+ meta = moe.consult_meta()
245
+ a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"])
246
+ wr = moe.key_recall(n=int(n), ablate=False)
247
+ ar = moe.key_recall(n=int(n), ablate=True)
248
+ _WARMED["done"] = True
249
+ cw, ca = _char_acc(wr["examples"]) * 100, _char_acc(ar["examples"]) * 100
250
+ stats = (
251
+ "<div class='stats'>"
252
+ f"<div class='stat' style='border-color:#2e7d5b;box-shadow:0 0 12px rgba(46,204,113,.12)'>"
253
+ f"<div class='v' style='color:#58d68d'>{cw:.0f}%</div>"
254
+ f"<div class='l'>secret characters recovered<br><b>WITH</b> the latent</div></div>"
255
+ f"<div class='stat'><div class='v' style='color:#e07b8a'>{ca:.0f}%</div>"
256
+ f"<div class='l'>recovered with the latent<br><b>CUT</b> (ablated to zero)</div></div>"
257
+ f"<div class='stat'><div class='v' style='color:#99a'>1.6%</div>"
258
+ f"<div class='l'>chance level<br>(1 in 62 per character)</div></div>"
259
+ "</div>")
260
+ yield _wrap(
261
+ f"<div class='h'>🔑 {a} read {c}'s mind through the latent bridge</div>"
262
+ f"<div class='sub'>A random secret key is shown <b>only to {c}</b>. {a} never sees it — "
263
+ f"it must reproduce the key purely by reading {c}'s latent through the trained RecursiveLink.</div>"
264
+ + stats
265
+ + f"<div class='cap'>secret key (only {c} saw it) → what {a} recovered, character by character"
266
+ f" · {wr['acc']*100:.0f}% of keys perfectly exact</div>"
267
+ + _key_rows(wr["examples"])
268
+ + "<div class='sub'>Cut the latent and recovery collapses to chance — that gap <i>is</i> the result: "
269
+ "real information crossing between two models that were trained <b>separately, on different data</b>, "
270
+ "and never met. Routing and generation are the supporting act.</div>")
271
+
272
+
273
+ def _tile_row(label, chars, classes):
274
+ cells = "".join(f"<span class='kc {cls}'>{_h.escape(ch) if ch else '·'}</span>"
275
+ for ch, cls in zip(chars, classes))
276
+ return f"<div class='krow'><span class='klbl'>{label}</span>{cells}</div>"
277
+
278
+
279
+ @_gpu(duration=120)
280
+ def moe_secret(secret):
281
+ """Interactive bridge demo: the user's secret is shown ONLY to Math; Language answers
282
+ 'what did Math just see?' from the latent alone legible content, not steered babble."""
283
+ yield _notice("Transmitting through the latent bridge")
284
+ moe = _to_gpu(_get_moe()(DEVICE))
285
+ if not getattr(moe, "relay_secret", None) or not getattr(moe, "key_recall_available", lambda: False)():
286
+ yield _msg("📨 Bridge unavailable",
287
+ "Needs the <b>SpikeWhale</b> backend and a trained bridge saved with the full asker.")
288
+ return
289
+ meta = moe.consult_meta()
290
+ a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"])
291
+ wr = moe.relay_secret(secret, ablate=False)
292
+ if wr.get("error"):
293
+ yield _msg("📨 " + _h.escape(wr["error"]),
294
+ "Type exactly 6 characters, letters and digits only — e.g. <code>Xy9Qz2</code>.")
295
+ return
296
+ ar = moe.relay_secret(secret, ablate=True)
297
+ _WARMED["done"] = True
298
+ s, got, abl = wr["secret"], wr["recovered"], ar["recovered"]
299
+ nok = sum(wr["ok"])
300
+ rows = (
301
+ _tile_row(f"you told {c} (only {c} saw this):", list(s), ["k"] * len(s))
302
+ + _tile_row(f"{a} read from {c}'s latent:",
303
+ [got[i] if i < len(got) else "" for i in range(len(s))],
304
+ ["g" if ok else "r" for ok in wr["ok"]])
305
+ + _tile_row("same question, latent cut:",
306
+ [abl[i] if i < len(abl) else "" for i in range(len(s))],
307
+ ["g" if ok else "r" for ok in ar["ok"]])
308
+ )
309
+ align_note = "" if wr["aligned"] else (
310
+ "<div class='sub'>⚠️ The tokenizer fused some of those characters into multi-character tokens "
311
+ "the bridge never saw in training (it was trained on random-looking keys), so transmission "
312
+ "degrades. Random-looking mixes of letters and digits — like <code>Xy9Qz2</code> — transmit best.</div>")
313
+ yield _wrap(
314
+ f"<div class='h'>📨 {a} read your secret out of {c}'s mind — "
315
+ f"{nok}/{len(s)} characters arrived intact</div>"
316
+ f"<div class='sub'>{a} never saw your text. It answered one question — <i>“what did {c} just "
317
+ f"see?”</i> — using only {c}'s latent, passed through the trained RecursiveLink.</div>"
318
+ + rows + align_note
319
+ + f"<div class='sub'>The bridge is a noisy channel (~4–5 of 6 characters usually survive), but cut "
320
+ f"the latent and the answer collapses to gibberish — the content is genuinely crossing in latent "
321
+ f"space, never as text. Two models, trained separately on different data, sharing a thought.</div>")
322
+
323
+
324
+ @_gpu(duration=120)
325
+ def moe_ask(a, op, b):
326
+ """The Q->A bridge: an arithmetic question is shown ONLY to Math; Language answers it
327
+ reading nothing but Math's latent (trained by train_qa_link.py, held-out-validated)."""
328
+ yield _notice("Asking Math through the bridge")
329
+ moe = _to_gpu(_get_moe()(DEVICE))
330
+ if not getattr(moe, "qa_available", lambda: False)():
331
+ yield _msg("🧮 The question→answer bridge isn't trained yet",
332
+ "Run <code>python agents/modmind/train_qa_link.py</code> — the panel "
333
+ "hot-reloads the result as soon as a checkpoint is saved.")
334
+ return
335
+ op = {"×": "*", "−": "-", "x": "*"}.get(str(op), str(op))
336
+ try:
337
+ a, b = int(a), int(b)
338
+ except (TypeError, ValueError):
339
+ yield _msg("🧮 Need two whole numbers", "Pick a and b first.")
340
+ return
341
+ if op == "*" and not (2 <= a <= 12 and 2 <= b <= 12):
342
+ yield _msg("🧮 Outside the trained range", "Multiplication was trained on 2–12 × 2–12.")
343
+ return
344
+ if op in ("+", "-") and not (10 <= a <= 99 and 10 <= b <= 99):
345
+ yield _msg("🧮 Outside the trained range", "Addition and subtraction were trained on 10–99.")
346
+ return
347
+ if op == "-" and a < b:
348
+ a, b = b, a # trained on non-negative answers
349
+ wr = moe.ask_math(a, op, b)
350
+ if wr.get("error"):
351
+ yield _msg("🧮 " + _h.escape(wr["error"]), "Try a different problem.")
352
+ return
353
+ ar = moe.ask_math(a, op, b, ablate=True)
354
+ _WARMED["done"] = True
355
+ info = moe.qa_info() or {}
356
+ A = EMOJI.get(info.get("asker", "language"), "📖 Language")
357
+ C = EMOJI.get(info.get("consultant", "math"), "➗ Math")
358
+ acc = info.get("holdout_exact", float("nan")) * 100
359
+ memorize = info.get("mode", "memorize") == "memorize"
360
+ opd = {"+": "+", "-": "−", "*": "×"}[op]
361
+ verdict = ("✅ correct" if wr["exact"] else f"❌ not quite (it's {wr['truth']})")
362
+ if memorize:
363
+ scorecard = (
364
+ f"Honest scorecard: this bridge was trained on the <b>whole</b> "
365
+ f"table of two-digit problems (10–99 for + and −, 2–12 for ×) and answers "
366
+ f"<b>~{acc:.0f}%</b> of them correctly. It's a <i>lookup table transmitted through the "
367
+ f"latent</i>, not learned arithmetic — {C} stays frozen and never computes; the bridge + "
368
+ f"{A}'s fine-tune memorized every answer and the question only ever travels in latent "
369
+ f"space. Cut the latent and {A} has no question at all.")
370
+ else:
371
+ scorecard = (
372
+ f"Honest scorecard: this bridge solves <b>{acc:.0f}%</b> of problems it has <i>never seen "
373
+ f"in training</i> exactly (held-out validation — generalization). {C} stays frozen; the "
374
+ f"arithmetic skill lives in the bridge + {A}'s fine-tune, and the question only ever "
375
+ f"travels in latent space. Cut the latent and {A} has no question at all.")
376
+ rows = (
377
+ _tile_row(f"the right answer (never shown to anyone):", list(wr["want"]), ["k"] * len(wr["want"]))
378
+ + _tile_row(f"{A} answered, reading {C}'s latent:",
379
+ [wr["digits"][i] if i < len(wr["digits"]) else "" for i in range(len(wr["want"]))],
380
+ ["g" if ok else "r" for ok in wr["ok"]])
381
+ + _tile_row("same prompt, latent cut:",
382
+ [ar["digits"][i] if i < len(ar["digits"]) else "" for i in range(len(wr["want"]))],
383
+ ["g" if ok else "r" for ok in ar["ok"]])
384
+ )
385
+ yield _wrap(
386
+ f"<div class='h'>🧮 Only {C} saw <code>{a} {opd} {b}</code> — "
387
+ f"{A} answered <b>{_h.escape(wr['answer'])}</b> · {verdict}</div>"
388
+ f"<div class='sub'>{A}'s entire input was the prompt <code>ANS&gt;</code>. The question "
389
+ f"existed only in {C}'s mind — it crossed to {A} as a 256-dim latent through a RecursiveLink "
390
+ f"trained for question→answer (zero-padded to {len(wr['want'])} digits).</div>"
391
+ + rows
392
+ + f"<div class='sub'>{scorecard}</div>")
393
+
394
+
395
+ @_gpu(duration=120)
396
+ def moe_combine(query, max_new, blend, consult):
397
+ """Two blends compared at the same mix ratio: a real WEIGHT-MERGE (one merged model) vs an
398
+ OUTPUT-BLEND (two models run separately, distributions averaged)."""
399
+ yield _notice("Building merge + blending")
400
+ moe = _to_gpu(_get_moe()(DEVICE))
401
+ if not getattr(moe, "merge_available", lambda: False)():
402
+ yield _msg("🧬 Unavailable", "Needs both specialists loaded.")
403
+ return
404
+ q = (query or "").strip() or "The water cycle works by"
405
+ a = float(blend)
406
+ merged_gen = moe.merge_generate(q, alpha=a, max_new=int(max_new), consult=bool(consult))
407
+ blend_gen = moe.combine(q, max_new=int(max_new), blend=a, consult=bool(consult))
408
+ _WARMED["done"] = True
409
+ extra = " · +Reasoning's latent (consult)" if consult else ""
410
+ yield _wrap(
411
+ "<div class='h'>🧬 MoE Modular Minds — two ways to blend</div>"
412
+ f"<div class='mix'><b style='left:calc({a*100:.0f}% - 2px)'></b></div>"
413
+ f"<div class='cap'>{int(round((1-a)*100))}% 📖 Language &nbsp;⟷&nbsp; "
414
+ f"{int(round(a*100))}% ➗ Math{extra}</div>"
415
+ "<div class='duo'>"
416
+ f"<div><div class='hd' style='color:#bfa8ff'>① Weight merge — ONE model whose weights are "
417
+ f"(1−α)·Language + α·Math</div>"
418
+ f"<span class='p'>{_esc(q)}</span> <span class='g'>{_esc(merged_gen)}</span></div>"
419
+ f"<div><div class='hd' style='color:#8fd3c7'>② Output blend — both models run, next-token "
420
+ f"distributions averaged each step</div>"
421
+ f"<span class='p'>{_esc(q)}</span> <span class='g'>{_esc(blend_gen)}</span></div>"
422
+ "</div>"
423
+ "<div class='sub'>Same mix ratio, two different mechanisms. <b>Weight merge</b> fuses the actual "
424
+ "parameters into one network (only possible because they're the identical dense architecture); "
425
+ "<b>output blend</b> is an inference-time ensemble of two separate models (only possible because "
426
+ "they share the 16k tokenizer). Tick <i>consult</i> to also route Reasoning's latent into each "
427
+ "through the trained bridge. Exploratory — generations are rough at this scale.</div>")
428
+
429
+
430
+ HERO = """# 🧩 Modular Mind — two specialists that talk in latent space
431
+ **Two ~80M models trained completely separately** — 📖 **Language** on FineWeb-Edu, ➗ **Math** on
432
+ FineMath — that never saw each other's data. A coordinator **routes** your query to the right one,
433
+ and a trained **RecursiveLink** lets them **communicate through latent space**: Language can read
434
+ information straight out of Math's "mind." The **🔑 Bridge** tab proves it.
435
+
436
+ > ℹ️ *These specialists were trained only to demonstrate a **verifiable result** — clean routing and a
437
+ > provable latent-bridge ablation — **not** for production-quality output. The generated text is
438
+ > intentionally rough at this scale; the mechanism is the point.*"""
439
+
440
+ QA_INTRO = """### Ask ➗ Math a question — 📖 Language answers it without ever seeing it
441
+ Pick an arithmetic problem. It is shown **only to ➗ Math** (which stays frozen). 📖 Language
442
+ receives nothing but Math's 256-dim latent, passed through a RecursiveLink trained for
443
+ **question→answer** — and types out the answer digits. Language's only text input is the prompt
444
+ `ANS>`; the question itself crosses purely as a latent. The bridge has **memorized the whole table**
445
+ of two-digit problems (a lookup table transmitted through latent space, not learned arithmetic) —
446
+ cut the latent and Language has no question at all."""
447
+
448
+ SECRET_INTRO = """### Tell ➗ Math a secret — then watch 📖 Language read it out of Math's mind
449
+ Type a 6-character code. It is shown **only to ➗ Math** — 📖 Language never sees your text.
450
+ Language must answer one question: *“what did Math just see?”* — reading **only Math's latent**
451
+ through the trained RecursiveLink. No text crosses between the models; the content arrives in
452
+ latent space, legibly, character by character. (The channel is noisy — random-looking mixes of
453
+ letters and digits transmit best.)"""
454
+
455
+ BRIDGE_INTRO = """### The proof: two independent models, one latent channel
456
+ A random secret key is shown **only to ➗ Math**. 📖 Language never sees it — but by reading Math's
457
+ latent through the trained RecursiveLink, it **reproduces the key, character by character**. Zero out
458
+ the latent and it collapses to chance. That gap *is* the result: real information crossing between two
459
+ models that were trained on different data and never met. **Hit the button.**"""
460
+
461
+ INTRO_BYTE = """## 🧩 Experiment — Modular Mind as a Mixture of Experts
462
+ Three tiny ~10M byte-level specialists (language, math, tool-use), each streamed-trained on its own
463
+ dataset. A coordinator **routes** your query to whichever expert is most fluent (perplexity-based MoE)
464
+ and fuses their latents through a **RecursiveLink**. Try a math problem vs. a sentence."""
465
+
466
+
467
+ def _routing_block():
468
+ with gr.Row():
469
+ q = gr.Textbox(label="Your prompt", value="Solve for x: 2x + 3 = 11",
470
+ scale=4, placeholder="a sentence or a math problem…")
471
+ n = gr.Slider(40, 300, value=80, step=20, label="generate tokens", scale=1)
472
+ btn = gr.Button("🧭 Route & generate", variant="primary")
473
+ out = gr.HTML()
474
+ btn.click(moe_run, [q, n], out)
475
+ gr.Examples(examples=[["The theory of evolution explains", 80],
476
+ ["Compute the derivative of x^2 + 3x", 80],
477
+ ["The history of the Roman Empire began", 80]],
478
+ inputs=[q, n])
479
+
480
+
481
+ def build_moe_panel():
482
+ """Create the MoE demo components inside the current gr.Blocks context."""
483
+ if not _SPIKEWHALE:
484
+ with gr.Accordion("🧩 Experiment: Modular Mind = Mixture of Experts (3 specialists)", open=False):
485
+ gr.Markdown(INTRO_BYTE)
486
+ _routing_block()
487
+ return
488
+
489
+ with gr.Accordion("🧩 Modular Mind — independent specialists communicating in latent space", open=True):
490
+ gr.Markdown(HERO)
491
+ with gr.Tabs():
492
+ # The headline result, FIRST.
493
+ with gr.Tab("🔑 The latent bridge — the proof"):
494
+ gr.Markdown(BRIDGE_INTRO)
495
+ with gr.Row():
496
+ kn = gr.Slider(4, 16, value=8, step=1, label="keys to test", scale=3)
497
+ kbtn = gr.Button("🔑 Run the proof", variant="primary", scale=1)
498
+ kout = gr.HTML()
499
+ kbtn.click(moe_key_recall, [kn], kout)
500
+
501
+ # Interactive: the user's own secret crosses the bridge.
502
+ with gr.Tab("📨 Tell Math a secret"):
503
+ gr.Markdown(SECRET_INTRO)
504
+ with gr.Row():
505
+ sq = gr.Textbox(label="Your 6-character secret (letters & digits)",
506
+ value="Xy9Qz2", max_length=12, scale=3)
507
+ sbtn = gr.Button("📨 Show it ONLY to Math → let Language read it",
508
+ variant="primary", scale=2)
509
+ sout = gr.HTML()
510
+ sbtn.click(moe_secret, [sq], sout)
511
+ gr.Examples(examples=[["Xy9Qz2"], ["Tk7Bn2"], ["q0t0Mz"], ["gG5hH6"]], inputs=[sq])
512
+
513
+ # Q->A: Language answers a question only Math ever saw.
514
+ with gr.Tab("🧮 Ask Math a question"):
515
+ gr.Markdown(QA_INTRO)
516
+ with gr.Row():
517
+ qa_a = gr.Number(value=23, precision=0, label="a", scale=1)
518
+ qa_op = gr.Dropdown(["+", "−", "×"], value="+", label="op", scale=1)
519
+ qa_b = gr.Number(value=54, precision=0, label="b", scale=1)
520
+ qa_btn = gr.Button("🧮 Show ONLY Math the question → Language answers",
521
+ variant="primary", scale=2)
522
+ qa_out = gr.HTML()
523
+ qa_btn.click(moe_ask, [qa_a, qa_op, qa_b], qa_out)
524
+ gr.Examples(examples=[[23, "+", 54], [81, "−", 27], [7, "×", 8], [62, "+", 39]],
525
+ inputs=[qa_a, qa_op, qa_b])
526
+
527
+ # Routing — the supporting act.
528
+ with gr.Tab("🧭 Routing & generation"):
529
+ gr.Markdown("Type a math problem vs. a sentence and watch the **route flip** — each "
530
+ "expert is most fluent (lowest bits/byte) on its own domain. Generation "
531
+ "streams in live.")
532
+ _routing_block()
533
+
534
+ # MoE Modular Minds — TWO ways to blend the specialists, compared side by side.
535
+ with gr.Tab("🧬 MoE Modular Minds"):
536
+ gr.Markdown(
537
+ "**Two ways to blend the two specialists**, shown side by side at the same mix ratio:\n"
538
+ "- **① Weight merge** — fuse the *parameters* into one model `(1-α)·Language + α·Math` "
539
+ "(works because they're the identical dense architecture).\n"
540
+ "- **② Output blend** — run both models separately and average their next-token "
541
+ "distributions (works because they share the 16k tokenizer).\n\n"
542
+ "Slide the mix, and tick *consult* to also route Reasoning's latent into each through the "
543
+ "trained bridge.")
544
+ with gr.Row():
545
+ mq = gr.Textbox(label="Prompt", value="The water cycle works by", scale=4)
546
+ mn = gr.Slider(40, 160, value=70, step=10, label="generate tokens", scale=1)
547
+ with gr.Row():
548
+ mblend = gr.Slider(0.0, 1.0, value=0.5, step=0.1,
549
+ label="mix α: 0 = 📖 Language ⟷ 1 = ➗ Math", scale=3)
550
+ mconsult = gr.Checkbox(value=False, label="consult (inject Reasoning's latent)", scale=1)
551
+ mbtn = gr.Button("🧬 Blend both ways (weight-merge vs output-blend)", variant="primary")
552
+ mout = gr.HTML()
553
+ mbtn.click(moe_combine, [mq, mn, mblend, mconsult], mout)