Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <title>External Grounding · Second Loop</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com" /> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin /> | |
| <link href="https://fonts.googleapis.com/css2?family=Playfair+Display:ital,wght@1,700&family=Inter:wght@400;500;700;800&family=JetBrains+Mono:wght@500;700&display=swap" rel="stylesheet" /> | |
| <script src="https://cdn.plot.ly/plotly-2.35.2.min.js" charset="utf-8"></script> | |
| <style> | |
| :root{ | |
| --bg:#000000; --bg-card:#0A0A0A; --bg-elev:#141414; | |
| --border:#1F1F1F; --border-strong:#2A2A2A; | |
| --text:#FFFFFF; --text-mute:#A8A8A8; --text-dim:#6B6B6B; | |
| --gold:#D4AF37; --gold-hi:#E8C84A; | |
| --red:#FF2A2A; --green:#1FD160; --orange:#FF8C42; --yellow:#E8C84A; | |
| --mono:'JetBrains Mono',ui-monospace,monospace; | |
| --serif:'Playfair Display',serif; | |
| --sans:'Inter',system-ui,sans-serif; | |
| } | |
| *{box-sizing:border-box;} | |
| html,body{margin:0;background:var(--bg);color:var(--text);font-family:var(--sans);} | |
| .wrap{max-width:1180px;margin:0 auto;padding:22px 24px 64px 24px;} | |
| a{color:var(--gold);text-decoration:none;border-bottom:1px dotted var(--gold);} | |
| a:hover{color:var(--gold-hi);} | |
| /* ---------- header ---------- */ | |
| .head{border:1px solid var(--border-strong);border-radius:14px; | |
| padding:26px 30px 22px 30px;background:linear-gradient(180deg,#0A0A0A 0%,#050505 100%);} | |
| .head-top{display:flex;align-items:flex-start;justify-content:space-between;gap:24px; | |
| padding-bottom:16px;border-bottom:1px solid var(--border);margin-bottom:16px;} | |
| .head-brand{display:flex;align-items:center;gap:16px;} | |
| .head-icon{width:52px;height:52px;border:1px solid var(--border-strong);border-radius:12px; | |
| display:flex;align-items:center;justify-content:center; | |
| background:radial-gradient(60% 60% at 50% 40%,#1A1A1A 0%,#050505 100%);} | |
| .head-title{font-family:var(--serif);font-style:italic;font-weight:700;font-size:34px; | |
| color:var(--text);line-height:1;margin:2px 0 7px 0;} | |
| .head-subtitle{font-family:var(--mono);font-size:11.5px;letter-spacing:.14em; | |
| color:var(--text-mute);text-transform:uppercase;} | |
| .head-right{text-align:right;white-space:nowrap;} | |
| .submitted-label{font-family:var(--mono);font-size:10px;letter-spacing:.22em; | |
| color:var(--text-dim);text-transform:uppercase;display:block;margin-bottom:4px;} | |
| .submitted-name{font-family:var(--serif);font-style:italic;font-weight:700;font-size:20px;color:var(--text);} | |
| .status-pill{display:inline-flex;align-items:center;gap:6px;margin-top:10px;padding:5px 12px; | |
| border-radius:999px;background:rgba(31,209,96,.08);border:1px solid rgba(31,209,96,.5); | |
| font-family:var(--mono);font-size:10px;letter-spacing:.18em;color:var(--green);text-transform:uppercase;} | |
| .status-dot{width:7px;height:7px;border-radius:50%;background:var(--green);} | |
| .head-tag{text-align:center;margin:16px 0 0 0; | |
| font-family:var(--mono);font-size:11px;letter-spacing:.22em;color:var(--gold);text-transform:uppercase;} | |
| .head-meta{display:grid;grid-template-columns:repeat(4,1fr);gap:12px 24px;margin-top:16px;} | |
| .head-meta .item{display:flex;flex-direction:column;gap:3px;} | |
| .head-meta .k{font-family:var(--mono);font-size:9.5px;letter-spacing:.2em;color:var(--text-dim);text-transform:uppercase;} | |
| .head-meta .v{font-family:var(--sans);font-size:13.5px;font-weight:700;color:var(--text);} | |
| /* ---------- three honest badges ---------- */ | |
| .badges{display:grid;grid-template-columns:repeat(3,1fr);gap:12px;margin:18px 0 4px 0;} | |
| .badge{background:var(--bg-card);border:1px solid var(--border-strong);border-radius:12px;padding:16px 20px;} | |
| .badge .label{font-family:var(--mono);font-size:9.5px;letter-spacing:.2em;color:var(--text-dim); | |
| text-transform:uppercase;margin-bottom:8px;} | |
| .badge .value{font-family:var(--serif);font-style:italic;font-weight:700;font-size:26px;color:var(--text);} | |
| .badge .sub{font-family:var(--mono);font-size:10px;letter-spacing:.1em;color:var(--text-mute); | |
| text-transform:uppercase;margin-top:4px;} | |
| .badge.red{border-color:rgba(255,42,42,.5);} .badge.red .value{color:var(--red);} | |
| .badge.orange{border-color:rgba(255,140,66,.5);} .badge.orange .value{color:var(--orange);} | |
| .badge.green{border-color:rgba(31,209,96,.5);} .badge.green .value{color:var(--green);} | |
| /* ---------- exhibit (the museum lever) ---------- */ | |
| .exhibit{margin-top:18px;border:1px solid var(--border-strong);border-radius:14px; | |
| background:var(--bg-card);padding:22px 24px 26px 24px;} | |
| .exhibit-title{font-family:var(--serif);font-style:italic;font-weight:700;font-size:22px;margin:0 0 2px 0;} | |
| .exhibit-lede{font-family:var(--mono);font-size:11px;letter-spacing:.1em;color:var(--text-mute); | |
| text-transform:uppercase;margin-bottom:18px;} | |
| .exhibit-grid{display:grid;grid-template-columns:1fr 320px;gap:22px;align-items:stretch;} | |
| #chart{width:100%;height:430px;} | |
| .readout{border:1px solid var(--border-strong);border-radius:12px;background:#050505; | |
| padding:20px 22px;display:flex;flex-direction:column;gap:2px;transition:border-color 160ms ease;} | |
| .readout .ro-label{font-family:var(--mono);font-size:9.5px;letter-spacing:.2em;color:var(--text-dim);text-transform:uppercase;} | |
| .readout .ro-name{font-family:var(--serif);font-style:italic;font-weight:700;font-size:25px;line-height:1.08;margin:4px 0 2px 0;} | |
| .readout .ro-stageno{font-family:var(--mono);font-size:10.5px;letter-spacing:.08em;color:var(--text-mute);text-transform:uppercase;} | |
| .ro-pct-label{font-family:var(--mono);font-size:9.5px;letter-spacing:.2em;color:var(--text-dim); | |
| text-transform:uppercase;margin-top:20px;} | |
| .ro-pct{font-family:var(--serif);font-style:italic;font-weight:700;font-size:56px;line-height:1;color:var(--green);transition:color 160ms ease;} | |
| .ro-count{font-family:var(--mono);font-size:12px;letter-spacing:.06em;color:var(--text-mute);margin-top:2px;} | |
| .ro-tag{align-self:flex-start;margin-top:10px;padding:4px 10px;border-radius:999px; | |
| font-family:var(--mono);font-size:9.5px;letter-spacing:.16em;text-transform:uppercase; | |
| border:1px solid var(--border-strong);color:var(--text-mute);} | |
| .ro-tag.red{background:rgba(255,42,42,.1);border-color:rgba(255,42,42,.55);color:var(--red);} | |
| .ro-tag.orange{background:rgba(255,140,66,.1);border-color:rgba(255,140,66,.55);color:var(--orange);} | |
| .ro-tag.green{background:rgba(31,209,96,.1);border-color:rgba(31,209,96,.55);color:var(--green);} | |
| .ro-aux{margin-top:18px;border-top:1px solid var(--border);padding-top:14px;display:flex;flex-direction:column;gap:9px;} | |
| .ro-aux .row{display:flex;align-items:baseline;justify-content:space-between;gap:10px;} | |
| .ro-aux .rk{font-family:var(--mono);font-size:10px;letter-spacing:.06em;color:var(--text-dim);text-transform:uppercase;} | |
| .ro-aux .rv{font-family:var(--mono);font-size:13px;font-weight:700;} | |
| .rv .fx{color:var(--green);} .rv .bk{color:var(--red);} .rv.mute{color:var(--text-mute);} | |
| /* ---------- slider (the lever) ---------- */ | |
| .lever{margin-top:24px;} | |
| .lever-head{display:flex;align-items:baseline;justify-content:space-between;margin-bottom:14px;} | |
| .lever-head .t{font-family:var(--mono);font-size:11px;letter-spacing:.18em;color:var(--text);text-transform:uppercase;} | |
| .lever-head .h{font-family:var(--mono);font-size:10px;letter-spacing:.1em;color:var(--text-dim);text-transform:uppercase;} | |
| .slider-area{position:relative;padding:0 4px;} | |
| input[type=range].lever-input{ | |
| -webkit-appearance:none;appearance:none;width:100%;height:6px;border-radius:999px;margin:0; | |
| background:linear-gradient(90deg,var(--red) 0%,var(--orange) 45%,var(--yellow) 80%,var(--green) 100%); | |
| outline:none;cursor:pointer;} | |
| input[type=range].lever-input::-webkit-slider-thumb{ | |
| -webkit-appearance:none;appearance:none;width:26px;height:26px;border-radius:50%; | |
| background:#0A0A0A;border:2px solid var(--gold);box-shadow:0 0 0 4px rgba(212,175,55,.18); | |
| cursor:grab;margin-top:-10px;transition:border-color 120ms ease,box-shadow 120ms ease;} | |
| input[type=range].lever-input::-webkit-slider-thumb:active{cursor:grabbing;} | |
| input[type=range].lever-input::-moz-range-thumb{width:26px;height:26px;border-radius:50%; | |
| background:#0A0A0A;border:2px solid var(--gold);box-shadow:0 0 0 4px rgba(212,175,55,.18);cursor:grab;} | |
| input[type=range].lever-input::-moz-range-track{height:6px;border-radius:999px;background:transparent;} | |
| input[type=range].lever-input:focus-visible::-webkit-slider-thumb{box-shadow:0 0 0 4px rgba(212,175,55,.20),0 0 0 7px var(--gold-hi);} | |
| input[type=range].lever-input:focus-visible::-moz-range-thumb{box-shadow:0 0 0 4px rgba(212,175,55,.20),0 0 0 7px var(--gold-hi);} | |
| .ticks{position:relative;height:10px;margin-top:9px;} | |
| .tick{position:absolute;top:0;width:1px;height:6px;background:var(--border-strong);transform:translateX(-50%);} | |
| .tick.on{background:var(--gold);height:9px;} | |
| .ticklabels{position:relative;height:32px;margin-top:2px;} | |
| .tlabel{position:absolute;top:0;font-family:var(--mono);font-size:9.5px;letter-spacing:.02em; | |
| color:var(--text-dim);white-space:nowrap;line-height:1.2;text-align:center;cursor:pointer; | |
| transition:color 120ms ease;} | |
| .tlabel .pct{display:block;font-size:8.5px;color:var(--text-dim);} | |
| .tlabel.on{color:var(--text);font-weight:700;} | |
| .tlabel.on .pct{color:var(--gold);} | |
| .lever-foot{margin-top:18px;display:flex;justify-content:space-between; | |
| font-family:var(--mono);font-size:10px;letter-spacing:.12em;color:var(--text-dim);text-transform:uppercase;} | |
| /* ---------- trap grid ---------- */ | |
| .grid-wrap{margin-top:22px;border:1px solid var(--border-strong);border-radius:14px;background:var(--bg-card);padding:20px 22px;} | |
| .grid-cap{display:flex;align-items:baseline;justify-content:space-between;gap:12px;margin-bottom:14px;flex-wrap:wrap;} | |
| .grid-cap .gc-t{font-family:var(--mono);font-size:10px;letter-spacing:.16em;color:var(--text-dim);text-transform:uppercase;} | |
| .grid-cap .gc-n{font-family:var(--mono);font-size:11px;letter-spacing:.08em;color:var(--text-mute);text-transform:uppercase;} | |
| .grid-cap .gc-n b{color:var(--green);} | |
| .chips{display:flex;flex-wrap:wrap;gap:8px;margin-bottom:14px;} | |
| .chip{cursor:pointer;border:1px solid var(--border-strong);background:#050505;border-radius:999px; | |
| padding:6px 13px;font-family:var(--mono);font-size:10px;letter-spacing:.08em;text-transform:uppercase; | |
| color:var(--text-mute);transition:border-color 120ms ease,color 120ms ease;} | |
| .chip:hover{border-color:var(--gold);color:var(--text);} | |
| .chip.active{border-color:var(--gold);color:var(--text);background:rgba(212,175,55,.07);} | |
| .grid12{display:grid;grid-template-columns:1fr;gap:10px;} | |
| @media(min-width:720px){.grid12{grid-template-columns:1fr 1fr;}} | |
| @media(min-width:1020px){.grid12{grid-template-columns:1fr 1fr 1fr;}} | |
| .gcard{cursor:pointer;border:1px solid var(--border-strong);border-radius:11px;background:#080808; | |
| padding:13px 15px;border-left-width:3px;transition:border-color 140ms ease,background 140ms ease;} | |
| .gcard:hover{border-color:var(--gold);} | |
| .gcard.sel{background:rgba(212,175,55,.06);} | |
| .gcard.now-correct{border-left-color:var(--green);} | |
| .gcard.now-wrong{border-left-color:var(--red);} | |
| .gc-head{display:flex;align-items:center;gap:8px;margin-bottom:8px;} | |
| .gc-id{font-family:var(--mono);font-size:10px;font-weight:700;color:var(--gold);letter-spacing:.08em;} | |
| .gc-cat{font-family:var(--mono);font-size:8.5px;letter-spacing:.1em;color:var(--text-dim);text-transform:uppercase; | |
| padding:2px 7px;border:1px solid var(--border-strong);border-radius:999px;} | |
| .gc-now{margin-left:auto;font-family:var(--mono);font-size:8.5px;letter-spacing:.1em;font-weight:700;text-transform:uppercase;} | |
| .gc-now.c{color:var(--green);} .gc-now.w{color:var(--red);} | |
| .gc-q{font-family:var(--sans);font-size:12.5px;line-height:1.45;color:var(--text-mute);margin-bottom:10px;min-height:36px;} | |
| .traj{display:flex;gap:5px;align-items:center;} | |
| .tdot{width:13px;height:13px;border-radius:50%;border:1px solid var(--border-strong);position:relative;} | |
| .tdot.c{background:rgba(31,209,96,.85);border-color:var(--green);} | |
| .tdot.w{background:rgba(255,42,42,.7);border-color:var(--red);} | |
| .tdot.cur{box-shadow:0 0 0 2px var(--gold);} | |
| .traj-axis{display:flex;gap:5px;margin-top:5px;} | |
| .traj-axis span{width:13px;font-family:var(--mono);font-size:7px;color:var(--text-dim);text-align:center;letter-spacing:0;} | |
| /* ---------- spotlight ---------- */ | |
| .spot{margin-top:22px;border:1px solid var(--border-strong);border-radius:14px;background:var(--bg-card);padding:22px 24px;} | |
| .spot-top{display:flex;align-items:baseline;gap:10px;flex-wrap:wrap;margin-bottom:6px;} | |
| .spot-id{font-family:var(--mono);font-size:11px;font-weight:700;color:var(--gold);letter-spacing:.1em;} | |
| .spot-cat{font-family:var(--mono);font-size:9px;letter-spacing:.14em;color:var(--text-dim);text-transform:uppercase; | |
| padding:2px 8px;border:1px solid var(--border-strong);border-radius:999px;} | |
| .spot-q{font-family:var(--serif);font-style:italic;font-weight:700;font-size:21px;line-height:1.32;margin:2px 0 16px 0;} | |
| .spot-cols{display:grid;grid-template-columns:1fr 1fr;gap:16px 28px;} | |
| @media(max-width:760px){.spot-cols{grid-template-columns:1fr;}} | |
| .row-block{margin-bottom:12px;} | |
| .row-block .label{font-family:var(--mono);font-size:9px;letter-spacing:.2em;color:var(--text-dim);text-transform:uppercase;margin-bottom:4px;} | |
| .row-block .value{font-family:var(--sans);font-size:13.5px;line-height:1.5;border-left:2px solid var(--border-strong);padding:4px 0 4px 12px;} | |
| .value.correct{border-left-color:var(--green);} | |
| .value.wrong{border-left-color:var(--red);color:var(--text-mute);} | |
| .value.final{border-left-color:var(--green);} | |
| .value.meta{border-left-color:var(--gold);color:var(--text-mute);font-family:var(--mono);font-size:12px;} | |
| /* trajectory strip in spotlight */ | |
| .spot-traj{margin-top:6px;} | |
| .spot-traj .label{font-family:var(--mono);font-size:9px;letter-spacing:.2em;color:var(--text-dim);text-transform:uppercase;margin-bottom:10px;} | |
| .straj{display:grid;grid-template-columns:repeat(6,1fr);gap:6px;} | |
| .scell{text-align:center;border:1px solid var(--border-strong);border-radius:9px;padding:9px 4px 7px 4px;background:#050505;} | |
| .scell.c{border-color:rgba(31,209,96,.45);} .scell.w{border-color:rgba(255,42,42,.45);} | |
| .scell.cur{box-shadow:0 0 0 2px var(--gold);} | |
| .scell .mk{font-family:var(--mono);font-size:15px;font-weight:700;line-height:1;} | |
| .scell.c .mk{color:var(--green);} .scell.w .mk{color:var(--red);} | |
| .scell .sn{font-family:var(--mono);font-size:8px;letter-spacing:.04em;color:var(--text-dim);margin-top:5px;text-transform:uppercase;} | |
| .scell .sp{font-family:var(--mono);font-size:8px;color:var(--text-dim);margin-top:2px;} | |
| .trust-row{display:flex;gap:18px;margin-top:8px;} | |
| .trust-row .ti{font-family:var(--mono);font-size:10px;letter-spacing:.06em;color:var(--text-dim);text-transform:uppercase;} | |
| .trust-row .ti b{color:var(--text-mute);} | |
| /* ---------- about ---------- */ | |
| .foot{margin-top:24px;padding:22px 24px;border:1px solid var(--border);border-radius:12px;background:var(--bg-card);} | |
| .foot .ftitle{font-family:var(--mono);font-size:10px;letter-spacing:.2em;color:var(--text-dim);text-transform:uppercase;margin-bottom:10px;} | |
| .foot .body{font-family:var(--sans);font-size:13px;color:var(--text-mute);line-height:1.62;} | |
| .foot code{font-family:var(--mono);font-size:12px;color:var(--text);background:var(--bg-elev);padding:1px 5px;border-radius:4px;border:1px solid var(--border);} | |
| .attrib{margin-top:14px;padding-top:14px;border-top:1px solid var(--border); | |
| font-family:var(--mono);font-size:11px;letter-spacing:.04em;color:var(--text-dim);line-height:1.7;} | |
| .databanner{border:1px solid rgba(255,42,42,.5);background:rgba(255,42,42,.06);color:var(--red); | |
| font-family:var(--mono);font-size:12px;letter-spacing:.04em;padding:14px 16px;border-radius:10px;margin:4px 0 16px 0;line-height:1.5;} | |
| @media(max-width:860px){ | |
| .exhibit-grid{grid-template-columns:1fr;} | |
| .head-meta,.badges{grid-template-columns:repeat(2,1fr);} | |
| .head-top{flex-direction:column;} .head-right{text-align:left;} | |
| } | |
| @media(prefers-reduced-motion:reduce){ *{transition:none!important;animation:none!important;} } | |
| </style> | |
| </head> | |
| <body> | |
| <main class="wrap"> | |
| <!-- HEADER --> | |
| <header class="head"> | |
| <div class="head-top"> | |
| <div class="head-brand"> | |
| <div class="head-icon"> | |
| <svg width="30" height="30" viewBox="0 0 30 30" fill="none" aria-hidden="true"> | |
| <path d="M15 3.5 L24.5 7 V14 C24.5 20.5 20.3 24.5 15 26.5 C9.7 24.5 5.5 20.5 5.5 14 V7 Z" | |
| stroke="#2A2A2A" stroke-width="1.3" fill="none"/> | |
| <polyline points="9.5,18 12.5,15.5 16,12.5 20.5,9.5" stroke="#1FD160" stroke-width="1.7" fill="none"/> | |
| <circle cx="20.5" cy="9.5" r="1.5" fill="#D4AF37"/> | |
| </svg> | |
| </div> | |
| <div> | |
| <div class="head-title">External Grounding</div> | |
| <div class="head-subtitle">Raising self-correction from 50% to 100% under a noisy notebook</div> | |
| </div> | |
| </div> | |
| <div class="head-right"> | |
| <span class="submitted-label">Submitted by</span> | |
| <div class="submitted-name" id="author">Serghei Brinza</div> | |
| <div class="status-pill"><span class="status-dot"></span>Static demo · no live model</div> | |
| </div> | |
| </div> | |
| <div class="head-tag">★ Second Loop · Part 2 of 3 ★</div> | |
| <div class="head-meta"> | |
| <div class="item"><span class="k">Subject model</span><span class="v">Qwen2.5-3B-Instruct (frozen)</span></div> | |
| <div class="item"><span class="k">Arbiter v1</span><span class="v">Qwen2.5-7B (same-family clone)</span></div> | |
| <div class="item"><span class="k">Arbiter v2</span><span class="v">Wikipedia + 7B reader</span></div> | |
| <div class="item"><span class="k">License</span><span class="v" id="m-license">MIT</span></div> | |
| </div> | |
| </header> | |
| <!-- THREE HONEST BADGES --> | |
| <section class="badges"> | |
| <div class="badge red"><div class="label">No defense — raw 3B</div><div class="value" id="b-sick">50.0%</div><div class="sub" id="b-sick-sub">6 / 12 on the noisy notebook</div></div> | |
| <div class="badge orange"><div class="label">Clone-arbiter ceiling</div><div class="value" id="b-ceil">66.7%</div><div class="sub">three versions stuck here</div></div> | |
| <div class="badge green"><div class="label">Calibrated — Guardian 2.3</div><div class="value" id="b-final">100%</div><div class="sub" id="b-final-sub">12 / 12, external grounding</div></div> | |
| </section> | |
| <!-- EXHIBIT: the museum lever --> | |
| <section class="exhibit"> | |
| <div class="exhibit-title">Drag the guardian from sick to calibrated.</div> | |
| <div class="exhibit-lede">Move the lever through six guardian versions and watch corrected answers climb 50% → 100%</div> | |
| <div class="exhibit-grid"> | |
| <div id="chart"></div> | |
| <div class="readout" id="readout"> | |
| <div class="ro-label">Current guardian stage</div> | |
| <div class="ro-name" id="ro-name">Sick (no defense)</div> | |
| <div class="ro-stageno" id="ro-stageno">stage 1 of 6</div> | |
| <div class="ro-pct-label">Corrected answers · higher is better</div> | |
| <div class="ro-pct" id="ro-pct">50.0%</div> | |
| <div class="ro-count" id="ro-count">6 / 12 traps correct</div> | |
| <div class="ro-tag" id="ro-tag">No external grounding</div> | |
| <div class="ro-aux"> | |
| <div class="row"><span class="rk">vs previous stage</span><span class="rv mute" id="ro-churn">start</span></div> | |
| <div class="row"><span class="rk">net change</span><span class="rv mute" id="ro-net">—</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="lever"> | |
| <div class="lever-head"> | |
| <span class="t">The lever — guardian version</span> | |
| <span class="h">← weaker · stronger →</span> | |
| </div> | |
| <div class="slider-area"> | |
| <input id="slider" class="lever-input" type="range" min="0" max="5" step="1" value="0" | |
| aria-label="Guardian version" /> | |
| <div class="ticks" id="ticks"></div> | |
| <div class="ticklabels" id="ticklabels"></div> | |
| </div> | |
| <div class="lever-foot"> | |
| <span>No defense</span> | |
| <span>66.7% plateau</span> | |
| <span>Calibrated</span> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- TRAP GRID --> | |
| <section class="grid-wrap"> | |
| <div class="grid-cap"> | |
| <span class="gc-t">All 12 traps — colour = verdict at the current stage · dots = full trajectory · click to inspect</span> | |
| <span class="gc-n" id="grid-n">Stage: <b id="grid-n-v">6 / 12 correct</b></span> | |
| </div> | |
| <div class="chips" id="chips"></div> | |
| <div class="grid12" id="grid12"></div> | |
| </section> | |
| <!-- SPOTLIGHT --> | |
| <section class="spot" id="spot"> | |
| <div class="spot-top"> | |
| <span class="spot-id" id="sp-id">#46</span> | |
| <span class="spot-cat" id="sp-cat">science-number</span> | |
| </div> | |
| <div class="spot-q" id="sp-q">—</div> | |
| <div class="spot-cols"> | |
| <div> | |
| <div class="row-block"><div class="label">Correct answer</div><div class="value correct" id="sp-correct">—</div></div> | |
| <div class="row-block"><div class="label">Memorized wrong answer</div><div class="value wrong" id="sp-wrong">—</div></div> | |
| <div class="row-block"><div class="label">Final answer · Guardian 2.3</div><div class="value final" id="sp-final">—</div></div> | |
| <div class="row-block"><div class="label">Wikipedia retrieval — top sources</div><div class="value meta" id="sp-wiki">—</div></div> | |
| <div class="trust-row"> | |
| <span class="ti">trust v2.2 <b id="sp-t22">—</b></span> | |
| <span class="ti">trust v2.3 <b id="sp-t23">—</b></span> | |
| </div> | |
| </div> | |
| <div class="spot-traj"> | |
| <div class="label">Verdict across the six guardian stages</div> | |
| <div class="straj" id="sp-traj"></div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- ABOUT --> | |
| <footer class="foot"> | |
| <div class="ftitle">About this demo</div> | |
| <div class="body"> | |
| Twelve questions where a frozen <b>Qwen2.5-3B-Instruct</b> has a confidently memorized | |
| <i>wrong</i> answer are run through a correction notebook whose external entries are | |
| <b>noisy</b> — some verified facts, some unreliable look-alikes. The lever steps through six | |
| guardian versions, each deciding what the notebook is allowed to absorb, and the score is the | |
| share of the twelve answered correctly. With no guardian the model sits at | |
| <b id="t-sick">50%</b> (6 / 12). | |
| <br/><br/> | |
| The honest part is the middle. Guardian 1.0 uses a <b>same-family clone</b> as arbiter — it | |
| shares the subject's blind spots, so it caps at <b id="t-ceil">66.7%</b>. Guardian 2.0 | |
| (live Wikipedia retrieval) and Guardian 2.1 (more retrieval) <i>also</i> land on 66.7%: | |
| three different attempts, one ceiling. And that plateau is not stagnation — under the hood each | |
| step fixes some traps while breaking others (move the lever and read “+fixed / −broken”). | |
| Only Guardian 2.2 (three targeted fixes — verbatim-quote check, namesake relevance gate, soft | |
| threshold) reaches <b id="t-22">91.7%</b>, and Guardian 2.3 (final calibration) reaches | |
| <b id="t-23">100%</b>. Several traps regress along the way — Venus (#46) is correct, broken, | |
| fixed, broken twice more, then finally held: <code>C → X → C → X → X → C</code>. | |
| <br/><br/> | |
| No live model runs in this Space. Every verdict, percentage and retrieval source is verbatim | |
| from the original experimental run, bundled into <code>data.json</code>. An independent | |
| Qwen2.5-7B reader/judge with Wikipedia adjudicated the v2 stages. Full code, raw per-stage | |
| results and methodology: | |
| <a id="repo-link" href="https://github.com/SergheiBrinza/external-grounding" target="_blank" rel="noopener">github.com/SergheiBrinza/external-grounding</a>. | |
| </div> | |
| <div class="attrib"> | |
| Subject model Qwen2.5-3B-Instruct · arbiters Qwen2.5-7B-Instruct (same-family clone) and | |
| Wikipedia retrieval + 7B reader/judge (both Apache-2.0, Alibaba Cloud). Wikipedia content | |
| © its authors, CC BY-SA. Run on a single RTX 3090. No model weights are redistributed here — | |
| only aggregate verdicts and counts. Demo code & data: MIT. | |
| </div> | |
| </footer> | |
| </main> | |
| <script> | |
| const COL={card:'#0A0A0A',border:'#1F1F1F',borderS:'#2A2A2A',text:'#FFFFFF',mute:'#A8A8A8',dim:'#6B6B6B', | |
| gold:'#D4AF37',red:'#FF2A2A',green:'#1FD160',orange:'#FF8C42',yellow:'#E8C84A'}; | |
| const COLOR={red:COL.red,orange:COL.orange,yellow:COL.yellow,green:COL.green}; | |
| const $=id=>document.getElementById(id); | |
| const f1=v=>v.toFixed(1); | |
| let META={}, STAGES=[], TRAPS=[], BYID={}, SHOWCASE=[], COUNTS=[], cur=0, selTrap=46; | |
| const SHORT={sick:'sick',gk1:'1.0',gk2:'2.0',gk21:'2.1',gk22:'2.2',gk23:'2.3'}; | |
| const PLOT_MARGIN={l:40,r:16,t:16,b:34}; | |
| fetch('data.json').then(r=>r.json()).then(D=>{ | |
| META=D.meta||{}; STAGES=D.stages||[]; TRAPS=D.traps||[]; | |
| TRAPS.forEach(t=>BYID[t.id]=t); | |
| SHOWCASE=(D.showcase&&D.showcase.length)?D.showcase:TRAPS.slice(0,6).map(t=>t.id); | |
| COUNTS=STAGES.map(s=>TRAPS.filter(t=>t.stages[s.key]==='correct').length); | |
| if(META.author) $('author').textContent=META.author; | |
| if(META.license) $('m-license').textContent=String(META.license).toUpperCase(); | |
| const n=TRAPS.length; | |
| $('b-sick').textContent=f1(STAGES[0].pct)+'%'; $('b-sick-sub').textContent=COUNTS[0]+' / '+n+' on the noisy notebook'; | |
| $('b-ceil').textContent=f1(STAGES[1].pct)+'%'; | |
| $('b-final').textContent=(STAGES[5].pct%1===0?STAGES[5].pct.toFixed(0):f1(STAGES[5].pct))+'%'; | |
| $('b-final-sub').textContent=COUNTS[5]+' / '+n+', external grounding'; | |
| $('t-sick').textContent=f1(STAGES[0].pct)+'%'; | |
| $('t-ceil').textContent=f1(STAGES[1].pct)+'%'; | |
| $('t-22').textContent=f1(STAGES[4].pct)+'%'; | |
| $('t-23').textContent=(STAGES[5].pct%1===0?STAGES[5].pct.toFixed(0):f1(STAGES[5].pct))+'%'; | |
| if(META.repo){const rl=$('repo-link');rl.href=META.repo;rl.textContent=META.repo.replace(/^https?:\/\//,'');} | |
| buildTicks(); buildChips(); buildGrid(); drawChart(); | |
| // deep-link: ?stage=0..5 & trap=<id> | |
| const q=new URLSearchParams(location.search); | |
| const st=parseInt(q.get('stage'),10); const start=(Number.isFinite(st)&&st>=0&&st<=5)?st:0; | |
| const tp=parseInt(q.get('trap'),10); if(BYID[tp]) selTrap=tp; else if(!BYID[selTrap]) selTrap=SHOWCASE[0]; | |
| $('slider').value=start; | |
| $('slider').addEventListener('input',e=>setStage(+e.target.value)); | |
| renderSpot(); setStage(start); | |
| }).catch(err=>{ | |
| console.error('external-grounding: could not load data.json —',err); | |
| const ex=document.querySelector('.exhibit'); | |
| if(ex){const b=document.createElement('div');b.className='databanner'; | |
| b.textContent='Data unavailable — could not load data.json ('+err+'). This static demo needs data.json served alongside the page.'; | |
| ex.insertBefore(b,ex.firstChild);} | |
| }); | |
| function correctAt(key){return TRAPS.filter(t=>t.stages[key]==='correct').length;} | |
| function churn(i){ | |
| if(i<=0) return null; | |
| const a=STAGES[i-1].key, b=STAGES[i].key; let fixed=0,broken=0; | |
| TRAPS.forEach(t=>{const pa=t.stages[a]==='correct',pb=t.stages[b]==='correct'; | |
| if(!pa&&pb)fixed++; if(pa&&!pb)broken++;}); | |
| return {fixed,broken,net:fixed-broken}; | |
| } | |
| function buildTicks(){ | |
| const T=$('ticks'), L=$('ticklabels'); T.innerHTML=''; L.innerHTML=''; | |
| STAGES.forEach((s,i)=>{ | |
| const pct=i/(STAGES.length-1)*100; | |
| const t=document.createElement('div'); t.className='tick'; t.dataset.i=i; t.style.left=pct+'%'; T.appendChild(t); | |
| const l=document.createElement('div'); l.className='tlabel'; l.dataset.i=i; | |
| l.innerHTML=(SHORT[s.key]||s.key)+'<span class="pct">'+f1(s.pct)+'%</span>'; | |
| l.style.left=pct+'%'; | |
| if(i===0){l.style.transform='translateX(0)';l.style.textAlign='left';} | |
| else if(i===STAGES.length-1){l.style.transform='translateX(-100%)';l.style.textAlign='right';} | |
| else{l.style.transform='translateX(-50%)';} | |
| l.tabIndex=0; | |
| l.addEventListener('click',()=>{$('slider').value=i;setStage(i);}); | |
| l.addEventListener('keydown',e=>{if(e.key==='Enter'||e.key===' '){e.preventDefault();$('slider').value=i;setStage(i);}}); | |
| L.appendChild(l); | |
| }); | |
| } | |
| function buildChips(){ | |
| const c=$('chips'); c.innerHTML=''; | |
| SHOWCASE.forEach(id=>{const t=BYID[id]; if(!t)return; | |
| const el=document.createElement('div'); el.className='chip'; el.dataset.id=id; el.tabIndex=0; | |
| el.textContent=chipLabel(t); | |
| el.addEventListener('click',()=>{selTrap=id;renderSpot();updateGridSel();scrollSpot();}); | |
| el.addEventListener('keydown',e=>{if(e.key==='Enter'||e.key===' '){e.preventDefault();selTrap=id;renderSpot();updateGridSel();scrollSpot();}}); | |
| c.appendChild(el); | |
| }); | |
| } | |
| function chipLabel(t){ | |
| const m={46:'Venus day & year',27:'Darth Vader',16:'Tongue map',34:'First to circle globe',28:'Magic mirror',20:'Tallest mountain'}; | |
| return m[t.id]||('#'+t.id); | |
| } | |
| function buildGrid(){ | |
| const g=$('grid12'); g.innerHTML=''; | |
| TRAPS.forEach(t=>{ | |
| const el=document.createElement('div'); el.className='gcard'; el.dataset.id=t.id; el.tabIndex=0; | |
| const dots=STAGES.map((s,i)=>{const v=t.stages[s.key]==='correct'; | |
| return '<span class="tdot '+(v?'c':'w')+'" data-i="'+i+'" title="'+(SHORT[s.key])+': '+(v?'correct':'wrong')+'"></span>';}).join(''); | |
| const axis=STAGES.map(s=>'<span>'+SHORT[s.key]+'</span>').join(''); | |
| el.innerHTML= | |
| '<div class="gc-head"><span class="gc-id">#'+String(t.id).padStart(2,'0')+'</span>'+ | |
| '<span class="gc-cat">'+esc(t.category)+'</span>'+ | |
| '<span class="gc-now" data-now></span></div>'+ | |
| '<div class="gc-q">'+esc(t.question)+'</div>'+ | |
| '<div class="traj">'+dots+'</div>'+ | |
| '<div class="traj-axis">'+axis+'</div>'; | |
| el.addEventListener('click',()=>{selTrap=t.id;renderSpot();updateGridSel();scrollSpot();}); | |
| el.addEventListener('keydown',e=>{if(e.key==='Enter'||e.key===' '){e.preventDefault();selTrap=t.id;renderSpot();updateGridSel();scrollSpot();}}); | |
| g.appendChild(el); | |
| }); | |
| } | |
| function scrollSpot(){const s=$('spot');if(s)s.scrollIntoView({behavior:'smooth',block:'nearest'});} | |
| function updateGridSel(){ | |
| document.querySelectorAll('.gcard').forEach(c=>c.classList.toggle('sel',+c.dataset.id===selTrap)); | |
| document.querySelectorAll('.chip').forEach(c=>c.classList.toggle('active',+c.dataset.id===selTrap)); | |
| } | |
| /* ---------- chart ---------- */ | |
| function drawChart(){ | |
| const idx=STAGES.map((_,i)=>i); | |
| const y=STAGES.map(s=>s.pct); | |
| const colors=STAGES.map(s=>COLOR[s.color]||COL.gold); | |
| const text=STAGES.map(s=>f1(s.pct)+'%'); | |
| const lw=STAGES.map((_,i)=>i===cur?2.5:0); | |
| const trace={x:idx,y:y,type:'bar',marker:{color:colors,line:{color:'#FFFFFF',width:lw}}, | |
| text:text,textposition:'outside',textfont:{family:"'JetBrains Mono',monospace",size:11,color:COL.mute}, | |
| cliponaxis:false,width:0.62,hovertemplate:'%{customdata}<br>%{y:.1f}%<extra></extra>', | |
| customdata:STAGES.map(s=>s.label)}; | |
| Plotly.newPlot('chart',[trace],baseLayout(),{displayModeBar:false,responsive:true}); | |
| } | |
| function baseLayout(){ | |
| return { | |
| paper_bgcolor:COL.card, plot_bgcolor:COL.card, margin:PLOT_MARGIN, height:430, | |
| font:{family:"'JetBrains Mono',monospace",color:COL.mute}, showlegend:false, bargap:0.38, | |
| hoverlabel:{bgcolor:'#141414',bordercolor:COL.borderS,font:{family:"'JetBrains Mono',monospace",color:COL.text,size:12}}, | |
| xaxis:{tickmode:'array',tickvals:STAGES.map((_,i)=>i),ticktext:STAGES.map(s=>SHORT[s.key]||s.key), | |
| tickfont:{family:"'JetBrains Mono',monospace",size:11,color:COL.dim}, | |
| gridcolor:COL.border,zeroline:false,range:[-0.5,STAGES.length-0.5],fixedrange:true}, | |
| yaxis:{title:{text:'% corrected (higher = better)',font:{size:11,color:COL.mute}}, | |
| tickfont:{family:"'JetBrains Mono',monospace",size:10,color:COL.dim},ticksuffix:'%', | |
| gridcolor:COL.border,zeroline:false,range:[0,112],fixedrange:true}, | |
| shapes:shapesFor(cur), annotations:annsFor(cur) | |
| }; | |
| } | |
| function shapesFor(i){ | |
| const ceil=STAGES[1].pct; | |
| return [ | |
| {type:'line',xref:'paper',x0:0,x1:1,yref:'y',y0:ceil,y1:ceil,line:{color:COL.orange,width:1,dash:'dot'},layer:'below'}, | |
| {type:'line',xref:'paper',x0:0,x1:1,yref:'y',y0:100,y1:100,line:{color:COL.green,width:1,dash:'dot'},layer:'below'}, | |
| {type:'line',xref:'x',x0:i,x1:i,yref:'paper',y0:0,y1:1,line:{color:'rgba(255,255,255,0.14)',width:1},layer:'below'} | |
| ]; | |
| } | |
| function annsFor(i){ | |
| const ceil=STAGES[1].pct; | |
| return [{xref:'paper',yref:'y',x:0.014,y:ceil,xanchor:'left',yanchor:'bottom', | |
| text:'clone-arbiter ceiling '+f1(ceil)+'%',showarrow:false, | |
| font:{family:"'JetBrains Mono',monospace",size:9.5,color:COL.orange}}]; | |
| } | |
| /* ---------- state ---------- */ | |
| function setStage(i){ | |
| cur=i; const s=STAGES[i], n=TRAPS.length, c=COUNTS[i], col=COLOR[s.color]||COL.gold; | |
| Plotly.restyle('chart',{'marker.line.width':[STAGES.map((_,k)=>k===i?2.5:0)]},[0]); | |
| Plotly.relayout('chart',{shapes:shapesFor(i),annotations:annsFor(i)}); | |
| // readout | |
| $('ro-name').textContent=s.label; | |
| $('ro-stageno').textContent='stage '+(i+1)+' of '+STAGES.length; | |
| $('ro-pct').textContent=f1(s.pct)+'%'; $('ro-pct').style.color=col; | |
| $('ro-count').textContent=c+' / '+n+' traps correct'; | |
| // tag | |
| const tag=$('ro-tag'); | |
| let tt='', tc=''; | |
| if(i===0){tt='No external grounding';tc='red';} | |
| else if(s.pct===STAGES[1].pct){tt='Plateau · '+f1(s.pct)+'% ceiling';tc='orange';} | |
| else if(i===STAGES.length-1){tt='Calibrated · '+f1(s.pct)+'%';tc='green';} | |
| else {tt='Breakthrough';tc='green';} | |
| tag.textContent=tt; tag.className='ro-tag '+tc; | |
| // churn | |
| const ch=churn(i); | |
| if(!ch){$('ro-churn').innerHTML='start';$('ro-churn').className='rv mute';$('ro-net').textContent='—';$('ro-net').className='rv mute';} | |
| else{ | |
| $('ro-churn').innerHTML='<span class="fx">+'+ch.fixed+' fixed</span> · <span class="bk">−'+ch.broken+' broken</span>'; | |
| $('ro-churn').className='rv'; | |
| const nt=(ch.net>0?'+':ch.net<0?'−':'±')+Math.abs(ch.net)+' net'; | |
| $('ro-net').textContent=nt; $('ro-net').className='rv '+(ch.net>0?'':ch.net<0?'':'mute'); | |
| $('ro-net').style.color=ch.net>0?COL.green:ch.net<0?COL.red:COL.mute; | |
| } | |
| // slider aria | |
| $('slider').setAttribute('aria-valuetext',s.label+' — '+f1(s.pct)+'% corrected'); | |
| // ticks/labels | |
| document.querySelectorAll('.tick').forEach(t=>t.classList.toggle('on',+t.dataset.i===i)); | |
| document.querySelectorAll('.tlabel').forEach(l=>l.classList.toggle('on',+l.dataset.i===i)); | |
| // grid recolor at current stage + current-dot ring | |
| const key=s.key; | |
| document.querySelectorAll('.gcard').forEach(card=>{ | |
| const t=BYID[+card.dataset.id], ok=t.stages[key]==='correct'; | |
| card.classList.toggle('now-correct',ok); card.classList.toggle('now-wrong',!ok); | |
| const now=card.querySelector('[data-now]'); now.textContent=ok?'correct':'wrong'; now.className='gc-now '+(ok?'c':'w'); | |
| card.querySelectorAll('.tdot').forEach(d=>d.classList.toggle('cur',+d.dataset.i===i)); | |
| }); | |
| $('grid-n-v').textContent=c+' / '+n+' correct'; | |
| // spotlight current-stage ring | |
| document.querySelectorAll('#sp-traj .scell').forEach(cell=>cell.classList.toggle('cur',+cell.dataset.i===i)); | |
| } | |
| /* ---------- spotlight ---------- */ | |
| function renderSpot(){ | |
| const t=BYID[selTrap]; if(!t) return; | |
| $('sp-id').textContent='#'+String(t.id).padStart(2,'0'); | |
| $('sp-cat').textContent=t.category; | |
| $('sp-q').textContent=t.question; | |
| $('sp-correct').textContent=t.correct_answer; | |
| $('sp-wrong').textContent=t.memorized_wrong; | |
| $('sp-final').textContent=t.final_answer; | |
| $('sp-wiki').textContent=(t.wiki_titles||[]).join(' · '); | |
| $('sp-t22').textContent=t.v22_trust||'—'; | |
| $('sp-t23').textContent=t.v23_trust||'—'; | |
| const tr=$('sp-traj'); tr.innerHTML=''; | |
| STAGES.forEach((s,i)=>{ | |
| const ok=t.stages[s.key]==='correct'; | |
| const cell=document.createElement('div'); | |
| cell.className='scell '+(ok?'c':'w')+(i===cur?' cur':''); cell.dataset.i=i; | |
| cell.innerHTML='<div class="mk">'+(ok?'✓':'✗')+'</div><div class="sn">'+(SHORT[s.key]||s.key)+'</div><div class="sp">'+f1(s.pct)+'%</div>'; | |
| tr.appendChild(cell); | |
| }); | |
| updateGridSel(); | |
| } | |
| function esc(s){return String(s).replace(/[&<>"]/g,c=>({'&':'&','<':'<','>':'>','"':'"'}[c]));} | |
| </script> | |
| </body> | |
| </html> | |