Spaces:
Running on Zero
Running on Zero
| """ | |
| panel.py -- the Gradio section for the bottom of the boss app: a live demo of the | |
| Modular-Mind mixture-of-experts. | |
| For the SpikeWhale backend it leads with the *latent bridge* (the real result) and | |
| organizes the three demos into tabs. Output is rendered as rich HTML (animated routing | |
| cards, a latent-bus strip, character-diff key recovery, live token streaming) instead | |
| of markdown tables. Every handler is a generator that yields an instant "loading" | |
| notice first, so the first run never looks frozen while the ~80M models lazy-load. | |
| Hot-reloads checkpoints. | |
| """ | |
| from __future__ import annotations | |
| import html as _h | |
| import os | |
| import sys | |
| import gradio as gr | |
| # ZeroGPU: @spaces.GPU allocates a GPU only for the decorated call (CUDA is never touched at | |
| # import/startup). Falls back to a no-op decorator when `spaces` isn't installed (local / plain CPU). | |
| try: | |
| import spaces | |
| _gpu = spaces.GPU | |
| except Exception: | |
| def _gpu(fn=None, **kw): | |
| return fn if callable(fn) else (lambda f: f) | |
| def _to_gpu(moe): | |
| if hasattr(moe, "to_gpu_if_available"): | |
| moe.to_gpu_if_available() | |
| return moe | |
| EMOJI = {"language": "📖 Language", "math": "➗ Math", "tool": "🛠️ Tool-use"} | |
| COLOR = {"language": "#6aa9ff", "math": "#58d68d", "tool": "#f5b041"} | |
| DEVICE = os.environ.get("MM_AGENTS_DEVICE", "cpu") | |
| # Self-contained SpikeWhale bundle that ships next to this file (agents/modmind/: the 80M | |
| # specialists + bridge + inference code). If it's present we default to the SpikeWhale backend | |
| # so the HuggingFace Space "just works" with no env config. Env vars still override. | |
| _BUNDLED_MODMIND = os.path.join(os.path.dirname(os.path.abspath(__file__)), "modmind") | |
| _DEFAULT_BACKEND = "spikewhale" if os.path.isdir(_BUNDLED_MODMIND) else "bytegpt" | |
| _SPIKEWHALE = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower() in ("spikewhale", "modmind") | |
| _WARMED = {"done": False} # so the "loading the models" notice only shows on the first run | |
| _FOOTER = ( | |
| "Two ~80M dense specialists — 📖 Language (FineWeb-Edu) and ➗ Math (FineMath) — sharing a " | |
| "16k length-max tokenizer. A coordinator routes by bits-per-byte, and a trained RecursiveLink " | |
| "lets them communicate in latent space (proven in the Bridge tab). Hot-reloads checkpoints." | |
| if _SPIKEWHALE else | |
| "Three byte-level ~10M specialists, streamed-trained on FineWeb-Edu / FineMath / " | |
| "glaive-function-calling. Tiny + early-trained, so generations are rough — the routing " | |
| "(which expert is most confident) is the point. It hot-reloads as training continues." | |
| ) | |
| def _get_moe(): | |
| """Pick the MoE backend. Defaults to the bundled SpikeWhale 80M specialists | |
| (agents/modmind/) when present, else the byte-level ByteGPT experts. MM_MOE_BACKEND | |
| and MODMIND_DIR override.""" | |
| backend = os.environ.get("MM_MOE_BACKEND", _DEFAULT_BACKEND).lower() | |
| if backend in ("spikewhale", "modmind"): | |
| mm_dir = os.environ.get("MODMIND_DIR", _BUNDLED_MODMIND) | |
| if mm_dir and mm_dir not in sys.path: | |
| sys.path.insert(0, mm_dir) # front: ModMind's model.py wins over agents/model.py | |
| from moe_gradio import get_moe | |
| return get_moe | |
| from orchestrator import get_moe | |
| return get_moe | |
| # ---- HTML rendering ------------------------------------------------------------- | |
| _CSS = """<style> | |
| .mmx{font-family:system-ui,sans-serif;color:#dde;margin:4px 0} | |
| .mmx .note{background:#14141c;border:1px solid #2a2a35;border-radius:10px;padding:12px 14px;color:#9bd;font-size:14px} | |
| .mmx .h{font-size:17px;font-weight:800;margin:4px 0 8px} | |
| .mmx .p{color:#8892a8} | |
| .mmx .g{color:#eef2ff;font-weight:600} | |
| .mmx .cards{display:flex;gap:10px;flex-wrap:wrap;margin:6px 0} | |
| .mmx .card{flex:1;min-width:210px;background:#14141c;border:1px solid #2a2a35;border-radius:12px;padding:11px 13px;position:relative;overflow:hidden} | |
| .mmx .card .nm{font-weight:800;font-size:15px} | |
| .mmx .card .meta{color:#99a;font-size:11px;margin-top:2px} | |
| .mmx .card .bar{height:10px;background:#23232e;border-radius:6px;margin-top:8px;overflow:hidden} | |
| .mmx .card .fill{height:100%;border-radius:6px;animation:mmxw .7s ease} | |
| .mmx .card .pct{font-size:12px;color:#bcd;margin-top:4px} | |
| .mmx .badge{position:absolute;top:9px;right:10px;font-size:10px;font-weight:800;letter-spacing:.08em;padding:3px 8px;border-radius:99px;color:#0a1410} | |
| @keyframes mmxw{from{width:0}} | |
| .mmx .lat{display:flex;gap:2px;align-items:center;height:30px;background:#101018;border:1px solid #23232e;border-radius:8px;padding:3px 6px;margin:8px 0 2px} | |
| .mmx .lat i{flex:1;border-radius:2px} | |
| .mmx .cap{color:#778;font-size:11px;margin:2px 0 8px} | |
| .mmx .gen{background:#101018;border:1px solid #2a2a35;border-radius:12px;padding:13px 15px;margin:10px 0;font-size:15px;line-height:1.6} | |
| .mmx .caret{display:inline-block;width:9px;height:17px;border-radius:2px;background:#7ad1ff;margin-left:2px;vertical-align:text-bottom;animation:mmxb .8s steps(1) infinite} | |
| @keyframes mmxb{50%{opacity:0}} | |
| .mmx .stats{display:flex;gap:10px;flex-wrap:wrap;margin:10px 0} | |
| .mmx .stat{flex:1;min-width:130px;text-align:center;background:#14141c;border:1px solid #2a2a35;border-radius:12px;padding:13px 8px} | |
| .mmx .stat .v{font-size:30px;font-weight:800;line-height:1} | |
| .mmx .stat .l{font-size:11px;color:#99a;margin-top:6px} | |
| .mmx .krow{display:flex;gap:3px;align-items:center;margin:4px 0;flex-wrap:wrap} | |
| .mmx .kc{width:27px;height:27px;border-radius:6px;display:inline-flex;align-items:center;justify-content:center;font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace;font-weight:700;font-size:14px} | |
| .mmx .kc.k{background:#23232e;color:#aab} | |
| .mmx .kc.g{background:#1f8a55;color:#fff} | |
| .mmx .kc.r{background:#8a2f3d;color:#fff;opacity:.92} | |
| .mmx .arr{color:#667;margin:0 8px;font-size:15px} | |
| .mmx .klbl{min-width:240px;color:#99a;font-size:12px;text-align:right;margin-right:10px} | |
| .mmx .duo{display:flex;gap:10px;flex-wrap:wrap;margin:8px 0} | |
| .mmx .duo>div{flex:1;min-width:280px;background:#101018;border:1px solid #2a2a35;border-radius:12px;padding:12px 14px;font-size:14.5px;line-height:1.6} | |
| .mmx .duo .hd{font-weight:800;font-size:13px;margin-bottom:7px} | |
| .mmx .duo .with{border-color:#2e7d5b;box-shadow:0 0 12px rgba(46,204,113,.12)} | |
| .mmx .mix{height:12px;border-radius:7px;background:linear-gradient(90deg,#6aa9ff,#58d68d);position:relative;margin:12px 2px 4px} | |
| .mmx .mix b{position:absolute;top:-4px;width:4px;height:20px;border-radius:2px;background:#fff;box-shadow:0 0 8px #fff} | |
| .mmx .sub{color:#889;font-size:12px;line-height:1.5;margin-top:8px} | |
| </style>""" | |
| def _wrap(body): | |
| return _CSS + "<div class='mmx'>" + body + "</div>" | |
| def _esc(s): | |
| return _h.escape(s or "").replace("\n", "<br>") | |
| def _notice(action="Generating"): | |
| """First-run popup + in-place message so nothing ever looks frozen.""" | |
| if not _WARMED["done"]: | |
| try: | |
| gr.Info("First run — loading the models (~20–40s on CPU). After this, it's quick.") | |
| except Exception: | |
| pass | |
| return _wrap(f"<div class='note'>⏳ Loading the ~80M specialists + {action.lower()}… " | |
| "first run can take ~20–40s on CPU; every run after is fast.</div>") | |
| return _wrap(f"<div class='note'>⏳ {action}…</div>") | |
| def _msg(title, body): | |
| return _wrap(f"<div class='note'><b>{title}</b><br>{body}</div>") | |
| def _cards(winner, weights, bits, steps): | |
| """One animated card per expert: fluency, routing weight bar, winner badge + glow.""" | |
| out = [] | |
| for n, wv in weights.items(): | |
| c = COLOR.get(n, "#9b59b6") | |
| win = (n == winner) | |
| style = f"border-color:{c};box-shadow:0 0 16px {c}40" if win else "" | |
| badge = f"<span class='badge' style='background:{c}'>ROUTED ✓</span>" if win else "" | |
| out.append( | |
| f"<div class='card' style='{style}'>{badge}" | |
| f"<div class='nm' style='color:{c}'>{EMOJI.get(n, n)}</div>" | |
| f"<div class='meta'>{steps.get(n, 0):,} train steps · {bits[n]:.2f} bits/byte (lower = more fluent)</div>" | |
| f"<div class='bar'><div class='fill' style='width:{wv*100:.1f}%;background:{c}'></div></div>" | |
| f"<div class='pct'>routing weight {wv*100:.1f}%</div></div>") | |
| return "<div class='cards'>" + "".join(out) + "</div>" | |
| def _latent(shared, n=48): | |
| """The shared latent bus as a strip of signed bars (like the piano's latent strip).""" | |
| vals = list(shared or [])[:n] | |
| if not vals: | |
| return "" | |
| mx = max(1e-6, max(abs(v) for v in vals)) | |
| cells = "".join( | |
| f"<i style='height:{max(8.0, abs(v) / mx * 100):.0f}%;" | |
| f"background:{'#5bbcdf' if v >= 0 else '#df7a5b'}'></i>" for v in vals) | |
| return (f"<div class='lat'>{cells}</div>" | |
| f"<div class='cap'>the shared latent bus — every expert's output latent, fused by the " | |
| f"RecursiveLink (first {len(vals)} of 256 dims; blue = +, orange = −)</div>") | |
| def _gen_box(prompt, gen, live=False): | |
| caret = "<span class='caret'></span>" if live else "" | |
| return (f"<div class='gen'><span class='p'>{_esc(prompt)}</span>" | |
| f"<span class='g'>{_esc(gen)}</span>{caret}</div>") | |
| def _key_rows(examples): | |
| """Wordle-style per-character diff: secret key -> what the asker recovered.""" | |
| rows = [] | |
| for k, rec, ok in examples: | |
| sec = "".join(f"<span class='kc k'>{_h.escape(ch)}</span>" for ch in k) | |
| got = "".join( | |
| f"<span class='kc {'g' if i < len(rec) and rec[i] == ch else 'r'}'>" | |
| f"{_h.escape(rec[i]) if i < len(rec) else '·'}</span>" | |
| for i, ch in enumerate(k)) | |
| rows.append(f"<div class='krow'>{sec}<span class='arr'>→</span>{got}" | |
| f"{' ✅' if ok else ''}</div>") | |
| return "".join(rows) | |
| def _char_acc(examples): | |
| tot = hit = 0 | |
| for k, rec, _ in examples: | |
| for i, ch in enumerate(k): | |
| tot += 1 | |
| hit += int(i < len(rec) and rec[i] == ch) | |
| return hit / max(1, tot) | |
| # ---- handlers --------------------------------------------------------------------- | |
| def moe_run(query, max_new): | |
| yield _notice("Routing & generating") | |
| moe = _to_gpu(_get_moe()(DEVICE)) | |
| if not moe.available(): | |
| if _SPIKEWHALE: | |
| yield _msg("⏳ No SpikeWhale experts found", | |
| "Set <code>MODMIND_DIR</code> to your ModMind folder and make sure " | |
| "<code><domain>/checkpoints/step_*.pt</code> exist (the panel hot-reloads them).") | |
| else: | |
| yield _msg("⏳ No experts trained yet", | |
| "Run <code>python agents/train.py --expert language</code> (and <code>math</code>, <code>tool</code>).") | |
| return | |
| q = (query or "").strip() or "The" | |
| winner, weights, bits = moe.route(q) | |
| _, shared = moe.shared_latent(q) | |
| steps = dict(getattr(moe, "steps", {}) or {}) | |
| c = COLOR.get(winner, "#9b59b6") | |
| head = (f"<div class='h'>🧭 Routed to <span style='color:{c}'>{EMOJI.get(winner, winner)}</span>" | |
| f" — the expert most fluent on your text (lowest bits/byte) wins</div>" | |
| + _cards(winner, weights, bits, steps) + _latent(shared)) | |
| gen = "" | |
| if hasattr(moe, "generate_stream"): # live token streaming | |
| for _, gen in moe.generate_stream(q, winner, max_new=int(max_new)): | |
| yield _wrap(head + _gen_box(q, gen, live=True)) | |
| else: | |
| r = moe.run(q, max_new=int(max_new)) | |
| gen = r.get("generation", "") | |
| _WARMED["done"] = True | |
| yield _wrap(head + _gen_box(q, gen, live=False) + f"<div class='sub'>{_FOOTER}</div>") | |
| def moe_key_recall(n): | |
| """THE PROOF: a random key shown only to the consultant; the asker reproduces it from the | |
| latent alone (with) vs ablated (without).""" | |
| yield _notice("Running the proof") | |
| moe = _to_gpu(_get_moe()(DEVICE)) | |
| if not getattr(moe, "key_recall_available", lambda: False)(): | |
| yield _msg("🔑 Bridge unavailable", | |
| "Needs the <b>SpikeWhale</b> backend and a trained " | |
| "<code>links/<asker>__from__<consultant>.pt</code> saved with the full asker.") | |
| return | |
| meta = moe.consult_meta() | |
| a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"]) | |
| wr = moe.key_recall(n=int(n), ablate=False) | |
| ar = moe.key_recall(n=int(n), ablate=True) | |
| _WARMED["done"] = True | |
| cw, ca = _char_acc(wr["examples"]) * 100, _char_acc(ar["examples"]) * 100 | |
| stats = ( | |
| "<div class='stats'>" | |
| f"<div class='stat' style='border-color:#2e7d5b;box-shadow:0 0 12px rgba(46,204,113,.12)'>" | |
| f"<div class='v' style='color:#58d68d'>{cw:.0f}%</div>" | |
| f"<div class='l'>secret characters recovered<br><b>WITH</b> the latent</div></div>" | |
| f"<div class='stat'><div class='v' style='color:#e07b8a'>{ca:.0f}%</div>" | |
| f"<div class='l'>recovered with the latent<br><b>CUT</b> (ablated to zero)</div></div>" | |
| f"<div class='stat'><div class='v' style='color:#99a'>1.6%</div>" | |
| f"<div class='l'>chance level<br>(1 in 62 per character)</div></div>" | |
| "</div>") | |
| yield _wrap( | |
| f"<div class='h'>🔑 {a} read {c}'s mind through the latent bridge</div>" | |
| f"<div class='sub'>A random secret key is shown <b>only to {c}</b>. {a} never sees it — " | |
| f"it must reproduce the key purely by reading {c}'s latent through the trained RecursiveLink.</div>" | |
| + stats | |
| + f"<div class='cap'>secret key (only {c} saw it) → what {a} recovered, character by character" | |
| f" · {wr['acc']*100:.0f}% of keys perfectly exact</div>" | |
| + _key_rows(wr["examples"]) | |
| + "<div class='sub'>Cut the latent and recovery collapses to chance — that gap <i>is</i> the result: " | |
| "real information crossing between two models that were trained <b>separately, on different data</b>, " | |
| "and never met. Routing and generation are the supporting act.</div>") | |
| def _tile_row(label, chars, classes): | |
| cells = "".join(f"<span class='kc {cls}'>{_h.escape(ch) if ch else '·'}</span>" | |
| for ch, cls in zip(chars, classes)) | |
| return f"<div class='krow'><span class='klbl'>{label}</span>{cells}</div>" | |
| def moe_secret(secret): | |
| """Interactive bridge demo: the user's secret is shown ONLY to Math; Language answers | |
| 'what did Math just see?' from the latent alone — legible content, not steered babble.""" | |
| yield _notice("Transmitting through the latent bridge") | |
| moe = _to_gpu(_get_moe()(DEVICE)) | |
| if not getattr(moe, "relay_secret", None) or not getattr(moe, "key_recall_available", lambda: False)(): | |
| yield _msg("📨 Bridge unavailable", | |
| "Needs the <b>SpikeWhale</b> backend and a trained bridge saved with the full asker.") | |
| return | |
| meta = moe.consult_meta() | |
| a = EMOJI.get(meta["asker"], meta["asker"]); c = EMOJI.get(meta["consultant"], meta["consultant"]) | |
| wr = moe.relay_secret(secret, ablate=False) | |
| if wr.get("error"): | |
| yield _msg("📨 " + _h.escape(wr["error"]), | |
| "Type exactly 6 characters, letters and digits only — e.g. <code>Xy9Qz2</code>.") | |
| return | |
| ar = moe.relay_secret(secret, ablate=True) | |
| _WARMED["done"] = True | |
| s, got, abl = wr["secret"], wr["recovered"], ar["recovered"] | |
| nok = sum(wr["ok"]) | |
| rows = ( | |
| _tile_row(f"you told {c} (only {c} saw this):", list(s), ["k"] * len(s)) | |
| + _tile_row(f"{a} read from {c}'s latent:", | |
| [got[i] if i < len(got) else "" for i in range(len(s))], | |
| ["g" if ok else "r" for ok in wr["ok"]]) | |
| + _tile_row("same question, latent cut:", | |
| [abl[i] if i < len(abl) else "" for i in range(len(s))], | |
| ["g" if ok else "r" for ok in ar["ok"]]) | |
| ) | |
| align_note = "" if wr["aligned"] else ( | |
| "<div class='sub'>⚠️ The tokenizer fused some of those characters into multi-character tokens " | |
| "the bridge never saw in training (it was trained on random-looking keys), so transmission " | |
| "degrades. Random-looking mixes of letters and digits — like <code>Xy9Qz2</code> — transmit best.</div>") | |
| yield _wrap( | |
| f"<div class='h'>📨 {a} read your secret out of {c}'s mind — " | |
| f"{nok}/{len(s)} characters arrived intact</div>" | |
| f"<div class='sub'>{a} never saw your text. It answered one question — <i>“what did {c} just " | |
| f"see?”</i> — using only {c}'s latent, passed through the trained RecursiveLink.</div>" | |
| + rows + align_note | |
| + f"<div class='sub'>The bridge is a noisy channel (~4–5 of 6 characters usually survive), but cut " | |
| f"the latent and the answer collapses to gibberish — the content is genuinely crossing in latent " | |
| f"space, never as text. Two models, trained separately on different data, sharing a thought.</div>") | |
| def moe_ask(a, op, b): | |
| """The Q->A bridge: an arithmetic question is shown ONLY to Math; Language answers it | |
| reading nothing but Math's latent (trained by train_qa_link.py, held-out-validated).""" | |
| yield _notice("Asking Math through the bridge") | |
| moe = _to_gpu(_get_moe()(DEVICE)) | |
| if not getattr(moe, "qa_available", lambda: False)(): | |
| yield _msg("🧮 The question→answer bridge isn't trained yet", | |
| "Run <code>python agents/modmind/train_qa_link.py</code> — the panel " | |
| "hot-reloads the result as soon as a checkpoint is saved.") | |
| return | |
| op = {"×": "*", "−": "-", "x": "*"}.get(str(op), str(op)) | |
| try: | |
| a, b = int(a), int(b) | |
| except (TypeError, ValueError): | |
| yield _msg("🧮 Need two whole numbers", "Pick a and b first.") | |
| return | |
| if op == "*" and not (2 <= a <= 12 and 2 <= b <= 12): | |
| yield _msg("🧮 Outside the trained range", "Multiplication was trained on 2–12 × 2–12.") | |
| return | |
| if op in ("+", "-") and not (10 <= a <= 99 and 10 <= b <= 99): | |
| yield _msg("🧮 Outside the trained range", "Addition and subtraction were trained on 10–99.") | |
| return | |
| if op == "-" and a < b: | |
| a, b = b, a # trained on non-negative answers | |
| wr = moe.ask_math(a, op, b) | |
| if wr.get("error"): | |
| yield _msg("🧮 " + _h.escape(wr["error"]), "Try a different problem.") | |
| return | |
| ar = moe.ask_math(a, op, b, ablate=True) | |
| _WARMED["done"] = True | |
| info = moe.qa_info() or {} | |
| A = EMOJI.get(info.get("asker", "language"), "📖 Language") | |
| C = EMOJI.get(info.get("consultant", "math"), "➗ Math") | |
| acc = info.get("holdout_exact", float("nan")) * 100 | |
| memorize = info.get("mode", "memorize") == "memorize" | |
| opd = {"+": "+", "-": "−", "*": "×"}[op] | |
| verdict = ("✅ correct" if wr["exact"] else f"❌ not quite (it's {wr['truth']})") | |
| if memorize: | |
| scorecard = ( | |
| f"Honest scorecard: this bridge was trained on the <b>whole</b> " | |
| f"table of two-digit problems (10–99 for + and −, 2–12 for ×) and answers " | |
| f"<b>~{acc:.0f}%</b> of them correctly. It's a <i>lookup table transmitted through the " | |
| f"latent</i>, not learned arithmetic — {C} stays frozen and never computes; the bridge + " | |
| f"{A}'s fine-tune memorized every answer and the question only ever travels in latent " | |
| f"space. Cut the latent and {A} has no question at all.") | |
| else: | |
| scorecard = ( | |
| f"Honest scorecard: this bridge solves <b>{acc:.0f}%</b> of problems it has <i>never seen " | |
| f"in training</i> exactly (held-out validation — generalization). {C} stays frozen; the " | |
| f"arithmetic skill lives in the bridge + {A}'s fine-tune, and the question only ever " | |
| f"travels in latent space. Cut the latent and {A} has no question at all.") | |
| rows = ( | |
| _tile_row(f"the right answer (never shown to anyone):", list(wr["want"]), ["k"] * len(wr["want"])) | |
| + _tile_row(f"{A} answered, reading {C}'s latent:", | |
| [wr["digits"][i] if i < len(wr["digits"]) else "" for i in range(len(wr["want"]))], | |
| ["g" if ok else "r" for ok in wr["ok"]]) | |
| + _tile_row("same prompt, latent cut:", | |
| [ar["digits"][i] if i < len(ar["digits"]) else "" for i in range(len(wr["want"]))], | |
| ["g" if ok else "r" for ok in ar["ok"]]) | |
| ) | |
| yield _wrap( | |
| f"<div class='h'>🧮 Only {C} saw <code>{a} {opd} {b}</code> — " | |
| f"{A} answered <b>{_h.escape(wr['answer'])}</b> · {verdict}</div>" | |
| f"<div class='sub'>{A}'s entire input was the prompt <code>ANS></code>. The question " | |
| f"existed only in {C}'s mind — it crossed to {A} as a 256-dim latent through a RecursiveLink " | |
| f"trained for question→answer (zero-padded to {len(wr['want'])} digits).</div>" | |
| + rows | |
| + f"<div class='sub'>{scorecard}</div>") | |
| def moe_combine(query, max_new, blend, consult): | |
| """Two blends compared at the same mix ratio: a real WEIGHT-MERGE (one merged model) vs an | |
| OUTPUT-BLEND (two models run separately, distributions averaged).""" | |
| yield _notice("Building merge + blending") | |
| moe = _to_gpu(_get_moe()(DEVICE)) | |
| if not getattr(moe, "merge_available", lambda: False)(): | |
| yield _msg("🧬 Unavailable", "Needs both specialists loaded.") | |
| return | |
| q = (query or "").strip() or "The water cycle works by" | |
| a = float(blend) | |
| merged_gen = moe.merge_generate(q, alpha=a, max_new=int(max_new), consult=bool(consult)) | |
| blend_gen = moe.combine(q, max_new=int(max_new), blend=a, consult=bool(consult)) | |
| _WARMED["done"] = True | |
| extra = " · +Reasoning's latent (consult)" if consult else "" | |
| yield _wrap( | |
| "<div class='h'>🧬 MoE Modular Minds — two ways to blend</div>" | |
| f"<div class='mix'><b style='left:calc({a*100:.0f}% - 2px)'></b></div>" | |
| f"<div class='cap'>{int(round((1-a)*100))}% 📖 Language ⟷ " | |
| f"{int(round(a*100))}% ➗ Math{extra}</div>" | |
| "<div class='duo'>" | |
| f"<div><div class='hd' style='color:#bfa8ff'>① Weight merge — ONE model whose weights are " | |
| f"(1−α)·Language + α·Math</div>" | |
| f"<span class='p'>{_esc(q)}</span> <span class='g'>{_esc(merged_gen)}</span></div>" | |
| f"<div><div class='hd' style='color:#8fd3c7'>② Output blend — both models run, next-token " | |
| f"distributions averaged each step</div>" | |
| f"<span class='p'>{_esc(q)}</span> <span class='g'>{_esc(blend_gen)}</span></div>" | |
| "</div>" | |
| "<div class='sub'>Same mix ratio, two different mechanisms. <b>Weight merge</b> fuses the actual " | |
| "parameters into one network (only possible because they're the identical dense architecture); " | |
| "<b>output blend</b> is an inference-time ensemble of two separate models (only possible because " | |
| "they share the 16k tokenizer). Tick <i>consult</i> to also route Reasoning's latent into each " | |
| "through the trained bridge. Exploratory — generations are rough at this scale.</div>") | |
| HERO = """# 🧩 Modular Mind — two specialists that talk in latent space | |
| **Two ~80M models trained completely separately** — 📖 **Language** on FineWeb-Edu, ➗ **Math** on | |
| FineMath — that never saw each other's data. A coordinator **routes** your query to the right one, | |
| and a trained **RecursiveLink** lets them **communicate through latent space**: Language can read | |
| information straight out of Math's "mind." The **🔑 Bridge** tab proves it. | |
| > ℹ️ *These specialists were trained only to demonstrate a **verifiable result** — clean routing and a | |
| > provable latent-bridge ablation — **not** for production-quality output. The generated text is | |
| > intentionally rough at this scale; the mechanism is the point.*""" | |
| QA_INTRO = """### Ask ➗ Math a question — 📖 Language answers it without ever seeing it | |
| Pick an arithmetic problem. It is shown **only to ➗ Math** (which stays frozen). 📖 Language | |
| receives nothing but Math's 256-dim latent, passed through a RecursiveLink trained for | |
| **question→answer** — and types out the answer digits. Language's only text input is the prompt | |
| `ANS>`; the question itself crosses purely as a latent. The bridge has **memorized the whole table** | |
| of two-digit problems (a lookup table transmitted through latent space, not learned arithmetic) — | |
| cut the latent and Language has no question at all.""" | |
| SECRET_INTRO = """### Tell ➗ Math a secret — then watch 📖 Language read it out of Math's mind | |
| Type a 6-character code. It is shown **only to ➗ Math** — 📖 Language never sees your text. | |
| Language must answer one question: *“what did Math just see?”* — reading **only Math's latent** | |
| through the trained RecursiveLink. No text crosses between the models; the content arrives in | |
| latent space, legibly, character by character. (The channel is noisy — random-looking mixes of | |
| letters and digits transmit best.)""" | |
| BRIDGE_INTRO = """### The proof: two independent models, one latent channel | |
| A random secret key is shown **only to ➗ Math**. 📖 Language never sees it — but by reading Math's | |
| latent through the trained RecursiveLink, it **reproduces the key, character by character**. Zero out | |
| the latent and it collapses to chance. That gap *is* the result: real information crossing between two | |
| models that were trained on different data and never met. **Hit the button.**""" | |
| INTRO_BYTE = """## 🧩 Experiment — Modular Mind as a Mixture of Experts | |
| Three tiny ~10M byte-level specialists (language, math, tool-use), each streamed-trained on its own | |
| dataset. A coordinator **routes** your query to whichever expert is most fluent (perplexity-based MoE) | |
| and fuses their latents through a **RecursiveLink**. Try a math problem vs. a sentence.""" | |
| def _routing_block(): | |
| with gr.Row(): | |
| q = gr.Textbox(label="Your prompt", value="Solve for x: 2x + 3 = 11", | |
| scale=4, placeholder="a sentence or a math problem…") | |
| n = gr.Slider(40, 300, value=80, step=20, label="generate tokens", scale=1) | |
| btn = gr.Button("🧭 Route & generate", variant="primary") | |
| out = gr.HTML() | |
| btn.click(moe_run, [q, n], out) | |
| gr.Examples(examples=[["The theory of evolution explains", 80], | |
| ["Compute the derivative of x^2 + 3x", 80], | |
| ["The history of the Roman Empire began", 80]], | |
| inputs=[q, n]) | |
| def build_moe_panel(): | |
| """Create the MoE demo components inside the current gr.Blocks context.""" | |
| if not _SPIKEWHALE: | |
| with gr.Accordion("🧩 Experiment: Modular Mind = Mixture of Experts (3 specialists)", open=False): | |
| gr.Markdown(INTRO_BYTE) | |
| _routing_block() | |
| return | |
| with gr.Accordion("🧩 Modular Mind — independent specialists communicating in latent space", open=True): | |
| gr.Markdown(HERO) | |
| with gr.Tabs(): | |
| # The headline result, FIRST. | |
| with gr.Tab("🔑 The latent bridge — the proof"): | |
| gr.Markdown(BRIDGE_INTRO) | |
| with gr.Row(): | |
| kn = gr.Slider(4, 16, value=8, step=1, label="keys to test", scale=3) | |
| kbtn = gr.Button("🔑 Run the proof", variant="primary", scale=1) | |
| kout = gr.HTML() | |
| kbtn.click(moe_key_recall, [kn], kout) | |
| # Interactive: the user's own secret crosses the bridge. | |
| with gr.Tab("📨 Tell Math a secret"): | |
| gr.Markdown(SECRET_INTRO) | |
| with gr.Row(): | |
| sq = gr.Textbox(label="Your 6-character secret (letters & digits)", | |
| value="Xy9Qz2", max_length=12, scale=3) | |
| sbtn = gr.Button("📨 Show it ONLY to Math → let Language read it", | |
| variant="primary", scale=2) | |
| sout = gr.HTML() | |
| sbtn.click(moe_secret, [sq], sout) | |
| gr.Examples(examples=[["Xy9Qz2"], ["Tk7Bn2"], ["q0t0Mz"], ["gG5hH6"]], inputs=[sq]) | |
| # Q->A: Language answers a question only Math ever saw. | |
| with gr.Tab("🧮 Ask Math a question"): | |
| gr.Markdown(QA_INTRO) | |
| with gr.Row(): | |
| qa_a = gr.Number(value=23, precision=0, label="a", scale=1) | |
| qa_op = gr.Dropdown(["+", "−", "×"], value="+", label="op", scale=1) | |
| qa_b = gr.Number(value=54, precision=0, label="b", scale=1) | |
| qa_btn = gr.Button("🧮 Show ONLY Math the question → Language answers", | |
| variant="primary", scale=2) | |
| qa_out = gr.HTML() | |
| qa_btn.click(moe_ask, [qa_a, qa_op, qa_b], qa_out) | |
| gr.Examples(examples=[[23, "+", 54], [81, "−", 27], [7, "×", 8], [62, "+", 39]], | |
| inputs=[qa_a, qa_op, qa_b]) | |
| # Routing — the supporting act. | |
| with gr.Tab("🧭 Routing & generation"): | |
| gr.Markdown("Type a math problem vs. a sentence and watch the **route flip** — each " | |
| "expert is most fluent (lowest bits/byte) on its own domain. Generation " | |
| "streams in live.") | |
| _routing_block() | |
| # MoE Modular Minds — TWO ways to blend the specialists, compared side by side. | |
| with gr.Tab("🧬 MoE Modular Minds"): | |
| gr.Markdown( | |
| "**Two ways to blend the two specialists**, shown side by side at the same mix ratio:\n" | |
| "- **① Weight merge** — fuse the *parameters* into one model `(1-α)·Language + α·Math` " | |
| "(works because they're the identical dense architecture).\n" | |
| "- **② Output blend** — run both models separately and average their next-token " | |
| "distributions (works because they share the 16k tokenizer).\n\n" | |
| "Slide the mix, and tick *consult* to also route Reasoning's latent into each through the " | |
| "trained bridge.") | |
| with gr.Row(): | |
| mq = gr.Textbox(label="Prompt", value="The water cycle works by", scale=4) | |
| mn = gr.Slider(40, 160, value=70, step=10, label="generate tokens", scale=1) | |
| with gr.Row(): | |
| mblend = gr.Slider(0.0, 1.0, value=0.5, step=0.1, | |
| label="mix α: 0 = 📖 Language ⟷ 1 = ➗ Math", scale=3) | |
| mconsult = gr.Checkbox(value=False, label="consult (inject Reasoning's latent)", scale=1) | |
| mbtn = gr.Button("🧬 Blend both ways (weight-merge vs output-blend)", variant="primary") | |
| mout = gr.HTML() | |
| mbtn.click(moe_combine, [mq, mn, mblend, mconsult], mout) | |