Spaces:
Running on Zero
Running on Zero
Upload panel.py
Browse files- agents/panel.py +553 -312
agents/panel.py
CHANGED
|
@@ -1,312 +1,553 @@
|
|
| 1 |
-
"""
|
| 2 |
-
panel.py -- the Gradio section for the bottom of the boss app: a live demo of the
|
| 3 |
-
Modular-Mind mixture-of-experts.
|
| 4 |
-
|
| 5 |
-
For the SpikeWhale backend it leads with the *latent bridge* (the real result) and
|
| 6 |
-
organizes the three demos into tabs.
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
import
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
import
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
"
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
{
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
return
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
{
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
<
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
def
|
| 158 |
-
"""
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
>
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
""
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
>
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
(
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
panel.py -- the Gradio section for the bottom of the boss app: a live demo of the
|
| 3 |
+
Modular-Mind mixture-of-experts.
|
| 4 |
+
|
| 5 |
+
For the SpikeWhale backend it leads with the *latent bridge* (the real result) and
|
| 6 |
+
organizes the three demos into tabs. Output is rendered as rich HTML (animated routing
|
| 7 |
+
cards, a latent-bus strip, character-diff key recovery, live token streaming) instead
|
| 8 |
+
of markdown tables. Every handler is a generator that yields an instant "loading"
|
| 9 |
+
notice first, so the first run never looks frozen while the ~80M models lazy-load.
|
| 10 |
+
Hot-reloads checkpoints.
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import html as _h
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
|
| 18 |
+
import gradio as gr
|
| 19 |
+
|
| 20 |
+
# ZeroGPU: @spaces.GPU allocates a GPU only for the decorated call (CUDA is never touched at
|
| 21 |
+
# import/startup). Falls back to a no-op decorator when `spaces` isn't installed (local / plain CPU).
|
| 22 |
+
try:
|
| 23 |
+
import spaces
|
| 24 |
+
_gpu = spaces.GPU
|
| 25 |
+
except Exception:
|
| 26 |
+
def _gpu(fn=None, **kw):
|
| 27 |
+
return fn if callable(fn) else (lambda f: f)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _to_gpu(moe):
|
| 31 |
+
if hasattr(moe, "to_gpu_if_available"):
|
| 32 |
+
moe.to_gpu_if_available()
|
| 33 |
+
return moe
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
EMOJI = {"language": "📖 Language", "math": "➗ Math", "tool": "🛠️ Tool-use"}
|
| 37 |
+
COLOR = {"language": "#6aa9ff", "math": "#58d68d", "tool": "#f5b041"}
|
| 38 |
+
DEVICE = os.environ.get("MM_AGENTS_DEVICE", "cpu")
|
| 39 |
+
# Self-contained SpikeWhale bundle that ships next to this file (agents/modmind/: the 80M
|
| 40 |
+
# specialists + bridge + inference code). If it's present we default to the SpikeWhale backend
|
| 41 |
+
# so the HuggingFace Space "just works" with no env config. Env vars still override.
|
| 42 |
+
_BUNDLED_MODMIND = os.path.join(os.path.dirname(os.path.abspath(__file__)), "modmind")
|
| 43 |
+
_DEFAULT_BACKEND = "spikewhale" if os.path.isdir(_BUNDLED_MODMIND) else "bytegpt"
|
| 44 |
+
_SPIKEWHALE = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower() in ("spikewhale", "modmind")
|
| 45 |
+
_WARMED = {"done": False} # so the "loading the models" notice only shows on the first run
|
| 46 |
+
|
| 47 |
+
_FOOTER = (
|
| 48 |
+
"Two ~80M dense specialists — 📖 Language (FineWeb-Edu) and ➗ Math (FineMath) — sharing a "
|
| 49 |
+
"16k length-max tokenizer. A coordinator routes by bits-per-byte, and a trained RecursiveLink "
|
| 50 |
+
"lets them communicate in latent space (proven in the Bridge tab). Hot-reloads checkpoints."
|
| 51 |
+
if _SPIKEWHALE else
|
| 52 |
+
"Three byte-level ~10M specialists, streamed-trained on FineWeb-Edu / FineMath / "
|
| 53 |
+
"glaive-function-calling. Tiny + early-trained, so generations are rough — the routing "
|
| 54 |
+
"(which expert is most confident) is the point. It hot-reloads as training continues."
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _get_moe():
|
| 59 |
+
"""Pick the MoE backend. Defaults to the bundled SpikeWhale 80M specialists
|
| 60 |
+
(agents/modmind/) when present, else the byte-level ByteGPT experts. MM_MOE_BACKEND
|
| 61 |
+
and MODMIND_DIR override."""
|
| 62 |
+
backend = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower()
|
| 63 |
+
if backend in ("spikewhale", "modmind"):
|
| 64 |
+
mm_dir = os.environ.get("MODMIND_DIR", _BUNDLED_MODMIND)
|
| 65 |
+
if mm_dir and mm_dir not in sys.path:
|
| 66 |
+
sys.path.insert(0, mm_dir) # front: ModMind's model.py wins over agents/model.py
|
| 67 |
+
from moe_gradio import get_moe
|
| 68 |
+
return get_moe
|
| 69 |
+
from orchestrator import get_moe
|
| 70 |
+
return get_moe
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# ---- HTML rendering -------------------------------------------------------------
|
| 74 |
+
_CSS = """<style>
|
| 75 |
+
.mmx{font-family:system-ui,sans-serif;color:#dde;margin:4px 0}
|
| 76 |
+
.mmx .note{background:#14141c;border:1px solid #2a2a35;border-radius:10px;padding:12px 14px;color:#9bd;font-size:14px}
|
| 77 |
+
.mmx .h{font-size:17px;font-weight:800;margin:4px 0 8px}
|
| 78 |
+
.mmx .p{color:#8892a8}
|
| 79 |
+
.mmx .g{color:#eef2ff;font-weight:600}
|
| 80 |
+
.mmx .cards{display:flex;gap:10px;flex-wrap:wrap;margin:6px 0}
|
| 81 |
+
.mmx .card{flex:1;min-width:210px;background:#14141c;border:1px solid #2a2a35;border-radius:12px;padding:11px 13px;position:relative;overflow:hidden}
|
| 82 |
+
.mmx .card .nm{font-weight:800;font-size:15px}
|
| 83 |
+
.mmx .card .meta{color:#99a;font-size:11px;margin-top:2px}
|
| 84 |
+
.mmx .card .bar{height:10px;background:#23232e;border-radius:6px;margin-top:8px;overflow:hidden}
|
| 85 |
+
.mmx .card .fill{height:100%;border-radius:6px;animation:mmxw .7s ease}
|
| 86 |
+
.mmx .card .pct{font-size:12px;color:#bcd;margin-top:4px}
|
| 87 |
+
.mmx .badge{position:absolute;top:9px;right:10px;font-size:10px;font-weight:800;letter-spacing:.08em;padding:3px 8px;border-radius:99px;color:#0a1410}
|
| 88 |
+
@keyframes mmxw{from{width:0}}
|
| 89 |
+
.mmx .lat{display:flex;gap:2px;align-items:center;height:30px;background:#101018;border:1px solid #23232e;border-radius:8px;padding:3px 6px;margin:8px 0 2px}
|
| 90 |
+
.mmx .lat i{flex:1;border-radius:2px}
|
| 91 |
+
.mmx .cap{color:#778;font-size:11px;margin:2px 0 8px}
|
| 92 |
+
.mmx .gen{background:#101018;border:1px solid #2a2a35;border-radius:12px;padding:13px 15px;margin:10px 0;font-size:15px;line-height:1.6}
|
| 93 |
+
.mmx .caret{display:inline-block;width:9px;height:17px;border-radius:2px;background:#7ad1ff;margin-left:2px;vertical-align:text-bottom;animation:mmxb .8s steps(1) infinite}
|
| 94 |
+
@keyframes mmxb{50%{opacity:0}}
|
| 95 |
+
.mmx .stats{display:flex;gap:10px;flex-wrap:wrap;margin:10px 0}
|
| 96 |
+
.mmx .stat{flex:1;min-width:130px;text-align:center;background:#14141c;border:1px solid #2a2a35;border-radius:12px;padding:13px 8px}
|
| 97 |
+
.mmx .stat .v{font-size:30px;font-weight:800;line-height:1}
|
| 98 |
+
.mmx .stat .l{font-size:11px;color:#99a;margin-top:6px}
|
| 99 |
+
.mmx .krow{display:flex;gap:3px;align-items:center;margin:4px 0;flex-wrap:wrap}
|
| 100 |
+
.mmx .kc{width:27px;height:27px;border-radius:6px;display:inline-flex;align-items:center;justify-content:center;font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace;font-weight:700;font-size:14px}
|
| 101 |
+
.mmx .kc.k{background:#23232e;color:#aab}
|
| 102 |
+
.mmx .kc.g{background:#1f8a55;color:#fff}
|
| 103 |
+
.mmx .kc.r{background:#8a2f3d;color:#fff;opacity:.92}
|
| 104 |
+
.mmx .arr{color:#667;margin:0 8px;font-size:15px}
|
| 105 |
+
.mmx .klbl{min-width:240px;color:#99a;font-size:12px;text-align:right;margin-right:10px}
|
| 106 |
+
.mmx .duo{display:flex;gap:10px;flex-wrap:wrap;margin:8px 0}
|
| 107 |
+
.mmx .duo>div{flex:1;min-width:280px;background:#101018;border:1px solid #2a2a35;border-radius:12px;padding:12px 14px;font-size:14.5px;line-height:1.6}
|
| 108 |
+
.mmx .duo .hd{font-weight:800;font-size:13px;margin-bottom:7px}
|
| 109 |
+
.mmx .duo .with{border-color:#2e7d5b;box-shadow:0 0 12px rgba(46,204,113,.12)}
|
| 110 |
+
.mmx .mix{height:12px;border-radius:7px;background:linear-gradient(90deg,#6aa9ff,#58d68d);position:relative;margin:12px 2px 4px}
|
| 111 |
+
.mmx .mix b{position:absolute;top:-4px;width:4px;height:20px;border-radius:2px;background:#fff;box-shadow:0 0 8px #fff}
|
| 112 |
+
.mmx .sub{color:#889;font-size:12px;line-height:1.5;margin-top:8px}
|
| 113 |
+
</style>"""
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _wrap(body):
|
| 117 |
+
return _CSS + "<div class='mmx'>" + body + "</div>"
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def _esc(s):
|
| 121 |
+
return _h.escape(s or "").replace("\n", "<br>")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _notice(action="Generating"):
|
| 125 |
+
"""First-run popup + in-place message so nothing ever looks frozen."""
|
| 126 |
+
if not _WARMED["done"]:
|
| 127 |
+
try:
|
| 128 |
+
gr.Info("First run — loading the models (~20–40s on CPU). After this, it's quick.")
|
| 129 |
+
except Exception:
|
| 130 |
+
pass
|
| 131 |
+
return _wrap(f"<div class='note'>⏳ Loading the ~80M specialists + {action.lower()}… "
|
| 132 |
+
"first run can take ~20–40s on CPU; every run after is fast.</div>")
|
| 133 |
+
return _wrap(f"<div class='note'>⏳ {action}…</div>")
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _msg(title, body):
|
| 137 |
+
return _wrap(f"<div class='note'><b>{title}</b><br>{body}</div>")
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def _cards(winner, weights, bits, steps):
|
| 141 |
+
"""One animated card per expert: fluency, routing weight bar, winner badge + glow."""
|
| 142 |
+
out = []
|
| 143 |
+
for n, wv in weights.items():
|
| 144 |
+
c = COLOR.get(n, "#9b59b6")
|
| 145 |
+
win = (n == winner)
|
| 146 |
+
style = f"border-color:{c};box-shadow:0 0 16px {c}40" if win else ""
|
| 147 |
+
badge = f"<span class='badge' style='background:{c}'>ROUTED ✓</span>" if win else ""
|
| 148 |
+
out.append(
|
| 149 |
+
f"<div class='card' style='{style}'>{badge}"
|
| 150 |
+
f"<div class='nm' style='color:{c}'>{EMOJI.get(n, n)}</div>"
|
| 151 |
+
f"<div class='meta'>{steps.get(n, 0):,} train steps · {bits[n]:.2f} bits/byte (lower = more fluent)</div>"
|
| 152 |
+
f"<div class='bar'><div class='fill' style='width:{wv*100:.1f}%;background:{c}'></div></div>"
|
| 153 |
+
f"<div class='pct'>routing weight {wv*100:.1f}%</div></div>")
|
| 154 |
+
return "<div class='cards'>" + "".join(out) + "</div>"
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _latent(shared, n=48):
|
| 158 |
+
"""The shared latent bus as a strip of signed bars (like the piano's latent strip)."""
|
| 159 |
+
vals = list(shared or [])[:n]
|
| 160 |
+
if not vals:
|
| 161 |
+
return ""
|
| 162 |
+
mx = max(1e-6, max(abs(v) for v in vals))
|
| 163 |
+
cells = "".join(
|
| 164 |
+
f"<i style='height:{max(8.0, abs(v) / mx * 100):.0f}%;"
|
| 165 |
+
f"background:{'#5bbcdf' if v >= 0 else '#df7a5b'}'></i>" for v in vals)
|
| 166 |
+
return (f"<div class='lat'>{cells}</div>"
|
| 167 |
+
f"<div class='cap'>the shared latent bus — every expert's output latent, fused by the "
|
| 168 |
+
f"RecursiveLink (first {len(vals)} of 256 dims; blue = +, orange = −)</div>")
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _gen_box(prompt, gen, live=False):
|
| 172 |
+
caret = "<span class='caret'></span>" if live else ""
|
| 173 |
+
return (f"<div class='gen'><span class='p'>{_esc(prompt)}</span>"
|
| 174 |
+
f"<span class='g'>{_esc(gen)}</span>{caret}</div>")
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _key_rows(examples):
|
| 178 |
+
"""Wordle-style per-character diff: secret key -> what the asker recovered."""
|
| 179 |
+
rows = []
|
| 180 |
+
for k, rec, ok in examples:
|
| 181 |
+
sec = "".join(f"<span class='kc k'>{_h.escape(ch)}</span>" for ch in k)
|
| 182 |
+
got = "".join(
|
| 183 |
+
f"<span class='kc {'g' if i < len(rec) and rec[i] == ch else 'r'}'>"
|
| 184 |
+
f"{_h.escape(rec[i]) if i < len(rec) else '·'}</span>"
|
| 185 |
+
for i, ch in enumerate(k))
|
| 186 |
+
rows.append(f"<div class='krow'>{sec}<span class='arr'>→</span>{got}"
|
| 187 |
+
f"{' ✅' if ok else ''}</div>")
|
| 188 |
+
return "".join(rows)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def _char_acc(examples):
|
| 192 |
+
tot = hit = 0
|
| 193 |
+
for k, rec, _ in examples:
|
| 194 |
+
for i, ch in enumerate(k):
|
| 195 |
+
tot += 1
|
| 196 |
+
hit += int(i < len(rec) and rec[i] == ch)
|
| 197 |
+
return hit / max(1, tot)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
# ---- handlers ---------------------------------------------------------------------
|
| 201 |
+
@_gpu(duration=120)
|
| 202 |
+
def moe_run(query, max_new):
|
| 203 |
+
yield _notice("Routing & generating")
|
| 204 |
+
moe = _to_gpu(_get_moe()(DEVICE))
|
| 205 |
+
if not moe.available():
|
| 206 |
+
if _SPIKEWHALE:
|
| 207 |
+
yield _msg("⏳ No SpikeWhale experts found",
|
| 208 |
+
"Set <code>MODMIND_DIR</code> to your ModMind folder and make sure "
|
| 209 |
+
"<code><domain>/checkpoints/step_*.pt</code> exist (the panel hot-reloads them).")
|
| 210 |
+
else:
|
| 211 |
+
yield _msg("⏳ No experts trained yet",
|
| 212 |
+
"Run <code>python agents/train.py --expert language</code> (and <code>math</code>, <code>tool</code>).")
|
| 213 |
+
return
|
| 214 |
+
q = (query or "").strip() or "The"
|
| 215 |
+
winner, weights, bits = moe.route(q)
|
| 216 |
+
_, shared = moe.shared_latent(q)
|
| 217 |
+
steps = dict(getattr(moe, "steps", {}) or {})
|
| 218 |
+
c = COLOR.get(winner, "#9b59b6")
|
| 219 |
+
head = (f"<div class='h'>🧭 Routed to <span style='color:{c}'>{EMOJI.get(winner, winner)}</span>"
|
| 220 |
+
f" — the expert most fluent on your text (lowest bits/byte) wins</div>"
|
| 221 |
+
+ _cards(winner, weights, bits, steps) + _latent(shared))
|
| 222 |
+
gen = ""
|
| 223 |
+
if hasattr(moe, "generate_stream"): # live token streaming
|
| 224 |
+
for _, gen in moe.generate_stream(q, winner, max_new=int(max_new)):
|
| 225 |
+
yield _wrap(head + _gen_box(q, gen, live=True))
|
| 226 |
+
else:
|
| 227 |
+
r = moe.run(q, max_new=int(max_new))
|
| 228 |
+
gen = r.get("generation", "")
|
| 229 |
+
_WARMED["done"] = True
|
| 230 |
+
yield _wrap(head + _gen_box(q, gen, live=False) + f"<div class='sub'>{_FOOTER}</div>")
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
@_gpu(duration=120)
|
| 234 |
+
def moe_key_recall(n):
|
| 235 |
+
"""THE PROOF: a random key shown only to the consultant; the asker reproduces it from the
|
| 236 |
+
latent alone (with) vs ablated (without)."""
|
| 237 |
+
yield _notice("Running the proof")
|
| 238 |
+
moe = _to_gpu(_get_moe()(DEVICE))
|
| 239 |
+
if not getattr(moe, "key_recall_available", lambda: False)():
|
| 240 |
+
yield _msg("🔑 Bridge unavailable",
|
| 241 |
+
"Needs the <b>SpikeWhale</b> backend and a trained "
|
| 242 |
+
"<code>links/<asker>__from__<consultant>.pt</code> saved with the full asker.")
|
| 243 |
+
return
|
| 244 |
+
meta = moe.consult_meta()
|
| 245 |
+
a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"])
|
| 246 |
+
wr = moe.key_recall(n=int(n), ablate=False)
|
| 247 |
+
ar = moe.key_recall(n=int(n), ablate=True)
|
| 248 |
+
_WARMED["done"] = True
|
| 249 |
+
cw, ca = _char_acc(wr["examples"]) * 100, _char_acc(ar["examples"]) * 100
|
| 250 |
+
stats = (
|
| 251 |
+
"<div class='stats'>"
|
| 252 |
+
f"<div class='stat' style='border-color:#2e7d5b;box-shadow:0 0 12px rgba(46,204,113,.12)'>"
|
| 253 |
+
f"<div class='v' style='color:#58d68d'>{cw:.0f}%</div>"
|
| 254 |
+
f"<div class='l'>secret characters recovered<br><b>WITH</b> the latent</div></div>"
|
| 255 |
+
f"<div class='stat'><div class='v' style='color:#e07b8a'>{ca:.0f}%</div>"
|
| 256 |
+
f"<div class='l'>recovered with the latent<br><b>CUT</b> (ablated to zero)</div></div>"
|
| 257 |
+
f"<div class='stat'><div class='v' style='color:#99a'>1.6%</div>"
|
| 258 |
+
f"<div class='l'>chance level<br>(1 in 62 per character)</div></div>"
|
| 259 |
+
"</div>")
|
| 260 |
+
yield _wrap(
|
| 261 |
+
f"<div class='h'>🔑 {a} read {c}'s mind through the latent bridge</div>"
|
| 262 |
+
f"<div class='sub'>A random secret key is shown <b>only to {c}</b>. {a} never sees it — "
|
| 263 |
+
f"it must reproduce the key purely by reading {c}'s latent through the trained RecursiveLink.</div>"
|
| 264 |
+
+ stats
|
| 265 |
+
+ f"<div class='cap'>secret key (only {c} saw it) → what {a} recovered, character by character"
|
| 266 |
+
f" · {wr['acc']*100:.0f}% of keys perfectly exact</div>"
|
| 267 |
+
+ _key_rows(wr["examples"])
|
| 268 |
+
+ "<div class='sub'>Cut the latent and recovery collapses to chance — that gap <i>is</i> the result: "
|
| 269 |
+
"real information crossing between two models that were trained <b>separately, on different data</b>, "
|
| 270 |
+
"and never met. Routing and generation are the supporting act.</div>")
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def _tile_row(label, chars, classes):
|
| 274 |
+
cells = "".join(f"<span class='kc {cls}'>{_h.escape(ch) if ch else '·'}</span>"
|
| 275 |
+
for ch, cls in zip(chars, classes))
|
| 276 |
+
return f"<div class='krow'><span class='klbl'>{label}</span>{cells}</div>"
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
@_gpu(duration=120)
|
| 280 |
+
def moe_secret(secret):
|
| 281 |
+
"""Interactive bridge demo: the user's secret is shown ONLY to Math; Language answers
|
| 282 |
+
'what did Math just see?' from the latent alone — legible content, not steered babble."""
|
| 283 |
+
yield _notice("Transmitting through the latent bridge")
|
| 284 |
+
moe = _to_gpu(_get_moe()(DEVICE))
|
| 285 |
+
if not getattr(moe, "relay_secret", None) or not getattr(moe, "key_recall_available", lambda: False)():
|
| 286 |
+
yield _msg("📨 Bridge unavailable",
|
| 287 |
+
"Needs the <b>SpikeWhale</b> backend and a trained bridge saved with the full asker.")
|
| 288 |
+
return
|
| 289 |
+
meta = moe.consult_meta()
|
| 290 |
+
a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"])
|
| 291 |
+
wr = moe.relay_secret(secret, ablate=False)
|
| 292 |
+
if wr.get("error"):
|
| 293 |
+
yield _msg("📨 " + _h.escape(wr["error"]),
|
| 294 |
+
"Type exactly 6 characters, letters and digits only — e.g. <code>Xy9Qz2</code>.")
|
| 295 |
+
return
|
| 296 |
+
ar = moe.relay_secret(secret, ablate=True)
|
| 297 |
+
_WARMED["done"] = True
|
| 298 |
+
s, got, abl = wr["secret"], wr["recovered"], ar["recovered"]
|
| 299 |
+
nok = sum(wr["ok"])
|
| 300 |
+
rows = (
|
| 301 |
+
_tile_row(f"you told {c} (only {c} saw this):", list(s), ["k"] * len(s))
|
| 302 |
+
+ _tile_row(f"{a} read from {c}'s latent:",
|
| 303 |
+
[got[i] if i < len(got) else "" for i in range(len(s))],
|
| 304 |
+
["g" if ok else "r" for ok in wr["ok"]])
|
| 305 |
+
+ _tile_row("same question, latent cut:",
|
| 306 |
+
[abl[i] if i < len(abl) else "" for i in range(len(s))],
|
| 307 |
+
["g" if ok else "r" for ok in ar["ok"]])
|
| 308 |
+
)
|
| 309 |
+
align_note = "" if wr["aligned"] else (
|
| 310 |
+
"<div class='sub'>⚠️ The tokenizer fused some of those characters into multi-character tokens "
|
| 311 |
+
"the bridge never saw in training (it was trained on random-looking keys), so transmission "
|
| 312 |
+
"degrades. Random-looking mixes of letters and digits — like <code>Xy9Qz2</code> — transmit best.</div>")
|
| 313 |
+
yield _wrap(
|
| 314 |
+
f"<div class='h'>📨 {a} read your secret out of {c}'s mind — "
|
| 315 |
+
f"{nok}/{len(s)} characters arrived intact</div>"
|
| 316 |
+
f"<div class='sub'>{a} never saw your text. It answered one question — <i>“what did {c} just "
|
| 317 |
+
f"see?”</i> — using only {c}'s latent, passed through the trained RecursiveLink.</div>"
|
| 318 |
+
+ rows + align_note
|
| 319 |
+
+ f"<div class='sub'>The bridge is a noisy channel (~4–5 of 6 characters usually survive), but cut "
|
| 320 |
+
f"the latent and the answer collapses to gibberish — the content is genuinely crossing in latent "
|
| 321 |
+
f"space, never as text. Two models, trained separately on different data, sharing a thought.</div>")
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
@_gpu(duration=120)
|
| 325 |
+
def moe_ask(a, op, b):
|
| 326 |
+
"""The Q->A bridge: an arithmetic question is shown ONLY to Math; Language answers it
|
| 327 |
+
reading nothing but Math's latent (trained by train_qa_link.py, held-out-validated)."""
|
| 328 |
+
yield _notice("Asking Math through the bridge")
|
| 329 |
+
moe = _to_gpu(_get_moe()(DEVICE))
|
| 330 |
+
if not getattr(moe, "qa_available", lambda: False)():
|
| 331 |
+
yield _msg("🧮 The question→answer bridge isn't trained yet",
|
| 332 |
+
"Run <code>python agents/modmind/train_qa_link.py</code> — the panel "
|
| 333 |
+
"hot-reloads the result as soon as a checkpoint is saved.")
|
| 334 |
+
return
|
| 335 |
+
op = {"×": "*", "−": "-", "x": "*"}.get(str(op), str(op))
|
| 336 |
+
try:
|
| 337 |
+
a, b = int(a), int(b)
|
| 338 |
+
except (TypeError, ValueError):
|
| 339 |
+
yield _msg("🧮 Need two whole numbers", "Pick a and b first.")
|
| 340 |
+
return
|
| 341 |
+
if op == "*" and not (2 <= a <= 12 and 2 <= b <= 12):
|
| 342 |
+
yield _msg("🧮 Outside the trained range", "Multiplication was trained on 2–12 × 2–12.")
|
| 343 |
+
return
|
| 344 |
+
if op in ("+", "-") and not (10 <= a <= 99 and 10 <= b <= 99):
|
| 345 |
+
yield _msg("🧮 Outside the trained range", "Addition and subtraction were trained on 10–99.")
|
| 346 |
+
return
|
| 347 |
+
if op == "-" and a < b:
|
| 348 |
+
a, b = b, a # trained on non-negative answers
|
| 349 |
+
wr = moe.ask_math(a, op, b)
|
| 350 |
+
if wr.get("error"):
|
| 351 |
+
yield _msg("🧮 " + _h.escape(wr["error"]), "Try a different problem.")
|
| 352 |
+
return
|
| 353 |
+
ar = moe.ask_math(a, op, b, ablate=True)
|
| 354 |
+
_WARMED["done"] = True
|
| 355 |
+
info = moe.qa_info() or {}
|
| 356 |
+
A = EMOJI.get(info.get("asker", "language"), "📖 Language")
|
| 357 |
+
C = EMOJI.get(info.get("consultant", "math"), "➗ Math")
|
| 358 |
+
acc = info.get("holdout_exact", float("nan")) * 100
|
| 359 |
+
memorize = info.get("mode", "memorize") == "memorize"
|
| 360 |
+
opd = {"+": "+", "-": "−", "*": "×"}[op]
|
| 361 |
+
verdict = ("✅ correct" if wr["exact"] else f"❌ not quite (it's {wr['truth']})")
|
| 362 |
+
if memorize:
|
| 363 |
+
scorecard = (
|
| 364 |
+
f"Honest scorecard: this bridge was trained on the <b>whole</b> "
|
| 365 |
+
f"table of two-digit problems (10–99 for + and −, 2–12 for ×) and answers "
|
| 366 |
+
f"<b>~{acc:.0f}%</b> of them correctly. It's a <i>lookup table transmitted through the "
|
| 367 |
+
f"latent</i>, not learned arithmetic — {C} stays frozen and never computes; the bridge + "
|
| 368 |
+
f"{A}'s fine-tune memorized every answer and the question only ever travels in latent "
|
| 369 |
+
f"space. Cut the latent and {A} has no question at all.")
|
| 370 |
+
else:
|
| 371 |
+
scorecard = (
|
| 372 |
+
f"Honest scorecard: this bridge solves <b>{acc:.0f}%</b> of problems it has <i>never seen "
|
| 373 |
+
f"in training</i> exactly (held-out validation — generalization). {C} stays frozen; the "
|
| 374 |
+
f"arithmetic skill lives in the bridge + {A}'s fine-tune, and the question only ever "
|
| 375 |
+
f"travels in latent space. Cut the latent and {A} has no question at all.")
|
| 376 |
+
rows = (
|
| 377 |
+
_tile_row(f"the right answer (never shown to anyone):", list(wr["want"]), ["k"] * len(wr["want"]))
|
| 378 |
+
+ _tile_row(f"{A} answered, reading {C}'s latent:",
|
| 379 |
+
[wr["digits"][i] if i < len(wr["digits"]) else "" for i in range(len(wr["want"]))],
|
| 380 |
+
["g" if ok else "r" for ok in wr["ok"]])
|
| 381 |
+
+ _tile_row("same prompt, latent cut:",
|
| 382 |
+
[ar["digits"][i] if i < len(ar["digits"]) else "" for i in range(len(wr["want"]))],
|
| 383 |
+
["g" if ok else "r" for ok in ar["ok"]])
|
| 384 |
+
)
|
| 385 |
+
yield _wrap(
|
| 386 |
+
f"<div class='h'>🧮 Only {C} saw <code>{a} {opd} {b}</code> — "
|
| 387 |
+
f"{A} answered <b>{_h.escape(wr['answer'])}</b> · {verdict}</div>"
|
| 388 |
+
f"<div class='sub'>{A}'s entire input was the prompt <code>ANS></code>. The question "
|
| 389 |
+
f"existed only in {C}'s mind — it crossed to {A} as a 256-dim latent through a RecursiveLink "
|
| 390 |
+
f"trained for question→answer (zero-padded to {len(wr['want'])} digits).</div>"
|
| 391 |
+
+ rows
|
| 392 |
+
+ f"<div class='sub'>{scorecard}</div>")
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
@_gpu(duration=120)
|
| 396 |
+
def moe_combine(query, max_new, blend, consult):
|
| 397 |
+
"""Two blends compared at the same mix ratio: a real WEIGHT-MERGE (one merged model) vs an
|
| 398 |
+
OUTPUT-BLEND (two models run separately, distributions averaged)."""
|
| 399 |
+
yield _notice("Building merge + blending")
|
| 400 |
+
moe = _to_gpu(_get_moe()(DEVICE))
|
| 401 |
+
if not getattr(moe, "merge_available", lambda: False)():
|
| 402 |
+
yield _msg("🧬 Unavailable", "Needs both specialists loaded.")
|
| 403 |
+
return
|
| 404 |
+
q = (query or "").strip() or "The water cycle works by"
|
| 405 |
+
a = float(blend)
|
| 406 |
+
merged_gen = moe.merge_generate(q, alpha=a, max_new=int(max_new), consult=bool(consult))
|
| 407 |
+
blend_gen = moe.combine(q, max_new=int(max_new), blend=a, consult=bool(consult))
|
| 408 |
+
_WARMED["done"] = True
|
| 409 |
+
extra = " · +Reasoning's latent (consult)" if consult else ""
|
| 410 |
+
yield _wrap(
|
| 411 |
+
"<div class='h'>🧬 MoE Modular Minds — two ways to blend</div>"
|
| 412 |
+
f"<div class='mix'><b style='left:calc({a*100:.0f}% - 2px)'></b></div>"
|
| 413 |
+
f"<div class='cap'>{int(round((1-a)*100))}% 📖 Language ⟷ "
|
| 414 |
+
f"{int(round(a*100))}% ➗ Math{extra}</div>"
|
| 415 |
+
"<div class='duo'>"
|
| 416 |
+
f"<div><div class='hd' style='color:#bfa8ff'>① Weight merge — ONE model whose weights are "
|
| 417 |
+
f"(1−α)·Language + α·Math</div>"
|
| 418 |
+
f"<span class='p'>{_esc(q)}</span> <span class='g'>{_esc(merged_gen)}</span></div>"
|
| 419 |
+
f"<div><div class='hd' style='color:#8fd3c7'>② Output blend — both models run, next-token "
|
| 420 |
+
f"distributions averaged each step</div>"
|
| 421 |
+
f"<span class='p'>{_esc(q)}</span> <span class='g'>{_esc(blend_gen)}</span></div>"
|
| 422 |
+
"</div>"
|
| 423 |
+
"<div class='sub'>Same mix ratio, two different mechanisms. <b>Weight merge</b> fuses the actual "
|
| 424 |
+
"parameters into one network (only possible because they're the identical dense architecture); "
|
| 425 |
+
"<b>output blend</b> is an inference-time ensemble of two separate models (only possible because "
|
| 426 |
+
"they share the 16k tokenizer). Tick <i>consult</i> to also route Reasoning's latent into each "
|
| 427 |
+
"through the trained bridge. Exploratory — generations are rough at this scale.</div>")
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
HERO = """# 🧩 Modular Mind — two specialists that talk in latent space
|
| 431 |
+
**Two ~80M models trained completely separately** — 📖 **Language** on FineWeb-Edu, ➗ **Math** on
|
| 432 |
+
FineMath — that never saw each other's data. A coordinator **routes** your query to the right one,
|
| 433 |
+
and a trained **RecursiveLink** lets them **communicate through latent space**: Language can read
|
| 434 |
+
information straight out of Math's "mind." The **🔑 Bridge** tab proves it.
|
| 435 |
+
|
| 436 |
+
> ℹ️ *These specialists were trained only to demonstrate a **verifiable result** — clean routing and a
|
| 437 |
+
> provable latent-bridge ablation — **not** for production-quality output. The generated text is
|
| 438 |
+
> intentionally rough at this scale; the mechanism is the point.*"""
|
| 439 |
+
|
| 440 |
+
QA_INTRO = """### Ask ➗ Math a question — 📖 Language answers it without ever seeing it
|
| 441 |
+
Pick an arithmetic problem. It is shown **only to ➗ Math** (which stays frozen). 📖 Language
|
| 442 |
+
receives nothing but Math's 256-dim latent, passed through a RecursiveLink trained for
|
| 443 |
+
**question→answer** — and types out the answer digits. Language's only text input is the prompt
|
| 444 |
+
`ANS>`; the question itself crosses purely as a latent. The bridge has **memorized the whole table**
|
| 445 |
+
of two-digit problems (a lookup table transmitted through latent space, not learned arithmetic) —
|
| 446 |
+
cut the latent and Language has no question at all."""
|
| 447 |
+
|
| 448 |
+
SECRET_INTRO = """### Tell ➗ Math a secret — then watch 📖 Language read it out of Math's mind
|
| 449 |
+
Type a 6-character code. It is shown **only to ➗ Math** — 📖 Language never sees your text.
|
| 450 |
+
Language must answer one question: *“what did Math just see?”* — reading **only Math's latent**
|
| 451 |
+
through the trained RecursiveLink. No text crosses between the models; the content arrives in
|
| 452 |
+
latent space, legibly, character by character. (The channel is noisy — random-looking mixes of
|
| 453 |
+
letters and digits transmit best.)"""
|
| 454 |
+
|
| 455 |
+
BRIDGE_INTRO = """### The proof: two independent models, one latent channel
|
| 456 |
+
A random secret key is shown **only to ➗ Math**. 📖 Language never sees it — but by reading Math's
|
| 457 |
+
latent through the trained RecursiveLink, it **reproduces the key, character by character**. Zero out
|
| 458 |
+
the latent and it collapses to chance. That gap *is* the result: real information crossing between two
|
| 459 |
+
models that were trained on different data and never met. **Hit the button.**"""
|
| 460 |
+
|
| 461 |
+
INTRO_BYTE = """## 🧩 Experiment — Modular Mind as a Mixture of Experts
|
| 462 |
+
Three tiny ~10M byte-level specialists (language, math, tool-use), each streamed-trained on its own
|
| 463 |
+
dataset. A coordinator **routes** your query to whichever expert is most fluent (perplexity-based MoE)
|
| 464 |
+
and fuses their latents through a **RecursiveLink**. Try a math problem vs. a sentence."""
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
def _routing_block():
|
| 468 |
+
with gr.Row():
|
| 469 |
+
q = gr.Textbox(label="Your prompt", value="Solve for x: 2x + 3 = 11",
|
| 470 |
+
scale=4, placeholder="a sentence or a math problem…")
|
| 471 |
+
n = gr.Slider(40, 300, value=80, step=20, label="generate tokens", scale=1)
|
| 472 |
+
btn = gr.Button("🧭 Route & generate", variant="primary")
|
| 473 |
+
out = gr.HTML()
|
| 474 |
+
btn.click(moe_run, [q, n], out)
|
| 475 |
+
gr.Examples(examples=[["The theory of evolution explains", 80],
|
| 476 |
+
["Compute the derivative of x^2 + 3x", 80],
|
| 477 |
+
["The history of the Roman Empire began", 80]],
|
| 478 |
+
inputs=[q, n])
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
def build_moe_panel():
|
| 482 |
+
"""Create the MoE demo components inside the current gr.Blocks context."""
|
| 483 |
+
if not _SPIKEWHALE:
|
| 484 |
+
with gr.Accordion("🧩 Experiment: Modular Mind = Mixture of Experts (3 specialists)", open=False):
|
| 485 |
+
gr.Markdown(INTRO_BYTE)
|
| 486 |
+
_routing_block()
|
| 487 |
+
return
|
| 488 |
+
|
| 489 |
+
with gr.Accordion("🧩 Modular Mind — independent specialists communicating in latent space", open=True):
|
| 490 |
+
gr.Markdown(HERO)
|
| 491 |
+
with gr.Tabs():
|
| 492 |
+
# The headline result, FIRST.
|
| 493 |
+
with gr.Tab("🔑 The latent bridge — the proof"):
|
| 494 |
+
gr.Markdown(BRIDGE_INTRO)
|
| 495 |
+
with gr.Row():
|
| 496 |
+
kn = gr.Slider(4, 16, value=8, step=1, label="keys to test", scale=3)
|
| 497 |
+
kbtn = gr.Button("🔑 Run the proof", variant="primary", scale=1)
|
| 498 |
+
kout = gr.HTML()
|
| 499 |
+
kbtn.click(moe_key_recall, [kn], kout)
|
| 500 |
+
|
| 501 |
+
# Interactive: the user's own secret crosses the bridge.
|
| 502 |
+
with gr.Tab("📨 Tell Math a secret"):
|
| 503 |
+
gr.Markdown(SECRET_INTRO)
|
| 504 |
+
with gr.Row():
|
| 505 |
+
sq = gr.Textbox(label="Your 6-character secret (letters & digits)",
|
| 506 |
+
value="Xy9Qz2", max_length=12, scale=3)
|
| 507 |
+
sbtn = gr.Button("📨 Show it ONLY to Math → let Language read it",
|
| 508 |
+
variant="primary", scale=2)
|
| 509 |
+
sout = gr.HTML()
|
| 510 |
+
sbtn.click(moe_secret, [sq], sout)
|
| 511 |
+
gr.Examples(examples=[["Xy9Qz2"], ["Tk7Bn2"], ["q0t0Mz"], ["gG5hH6"]], inputs=[sq])
|
| 512 |
+
|
| 513 |
+
# Q->A: Language answers a question only Math ever saw.
|
| 514 |
+
with gr.Tab("🧮 Ask Math a question"):
|
| 515 |
+
gr.Markdown(QA_INTRO)
|
| 516 |
+
with gr.Row():
|
| 517 |
+
qa_a = gr.Number(value=23, precision=0, label="a", scale=1)
|
| 518 |
+
qa_op = gr.Dropdown(["+", "−", "×"], value="+", label="op", scale=1)
|
| 519 |
+
qa_b = gr.Number(value=54, precision=0, label="b", scale=1)
|
| 520 |
+
qa_btn = gr.Button("🧮 Show ONLY Math the question → Language answers",
|
| 521 |
+
variant="primary", scale=2)
|
| 522 |
+
qa_out = gr.HTML()
|
| 523 |
+
qa_btn.click(moe_ask, [qa_a, qa_op, qa_b], qa_out)
|
| 524 |
+
gr.Examples(examples=[[23, "+", 54], [81, "−", 27], [7, "×", 8], [62, "+", 39]],
|
| 525 |
+
inputs=[qa_a, qa_op, qa_b])
|
| 526 |
+
|
| 527 |
+
# Routing — the supporting act.
|
| 528 |
+
with gr.Tab("🧭 Routing & generation"):
|
| 529 |
+
gr.Markdown("Type a math problem vs. a sentence and watch the **route flip** — each "
|
| 530 |
+
"expert is most fluent (lowest bits/byte) on its own domain. Generation "
|
| 531 |
+
"streams in live.")
|
| 532 |
+
_routing_block()
|
| 533 |
+
|
| 534 |
+
# MoE Modular Minds — TWO ways to blend the specialists, compared side by side.
|
| 535 |
+
with gr.Tab("🧬 MoE Modular Minds"):
|
| 536 |
+
gr.Markdown(
|
| 537 |
+
"**Two ways to blend the two specialists**, shown side by side at the same mix ratio:\n"
|
| 538 |
+
"- **① Weight merge** — fuse the *parameters* into one model `(1-α)·Language + α·Math` "
|
| 539 |
+
"(works because they're the identical dense architecture).\n"
|
| 540 |
+
"- **② Output blend** — run both models separately and average their next-token "
|
| 541 |
+
"distributions (works because they share the 16k tokenizer).\n\n"
|
| 542 |
+
"Slide the mix, and tick *consult* to also route Reasoning's latent into each through the "
|
| 543 |
+
"trained bridge.")
|
| 544 |
+
with gr.Row():
|
| 545 |
+
mq = gr.Textbox(label="Prompt", value="The water cycle works by", scale=4)
|
| 546 |
+
mn = gr.Slider(40, 160, value=70, step=10, label="generate tokens", scale=1)
|
| 547 |
+
with gr.Row():
|
| 548 |
+
mblend = gr.Slider(0.0, 1.0, value=0.5, step=0.1,
|
| 549 |
+
label="mix α: 0 = 📖 Language ⟷ 1 = ➗ Math", scale=3)
|
| 550 |
+
mconsult = gr.Checkbox(value=False, label="consult (inject Reasoning's latent)", scale=1)
|
| 551 |
+
mbtn = gr.Button("🧬 Blend both ways (weight-merge vs output-blend)", variant="primary")
|
| 552 |
+
mout = gr.HTML()
|
| 553 |
+
mbtn.click(moe_combine, [mq, mn, mblend, mconsult], mout)
|