Spaces:
Paused
Paused
| <div id="ob-cs" role="img" aria-label="Three real scenarios elicited from an optimistic model (GPT-5.4) and a pessimistic model (Sonnet 4.6). On the same scenario the two models split in opposite directions: GPT leaves positive probability mass uncovered, Sonnet leaves negative mass uncovered."> | |
| <div class="cs-head"> | |
| <span class="cs-title">Same scenario, opposite tilt</span> | |
| <span class="cs-tog"></span> | |
| </div> | |
| <div class="cs-scen"></div> | |
| <div class="cs-grid"></div> | |
| <div class="cs-cap">Two framings of one question. A coherent judge has them sum to 100; the leftover is Skew. GPT-5.4 overcounts the good side, Sonnet 4.6 the bad side, on identical text.</div> | |
| </div> | |
| <style> | |
| #ob-cs { font:13px/1.45 system-ui,sans-serif; color:var(--text-color,#222); width:100%; position:relative; } | |
| #ob-cs .cs-head { display:flex; align-items:baseline; gap:10px; margin-bottom:10px; } | |
| #ob-cs .cs-title { font-weight:650; font-size:15px; } | |
| #ob-cs .cs-tog { display:inline-flex; gap:6px; margin-left:auto; flex-wrap:wrap; } | |
| #ob-cs .cs-btn { font:inherit; font-size:12px; cursor:pointer; padding:4px 10px; border-radius:7px; border:1px solid var(--border-color,#ccc); background:transparent; color:var(--muted-color,#666); } | |
| #ob-cs .cs-btn:hover { color:var(--text-color,#222); } | |
| #ob-cs .cs-btn[aria-pressed="true"] { background:var(--primary-color,#2d2926); color:#fff; border-color:var(--primary-color,#2d2926); } | |
| #ob-cs .cs-scen { background:var(--surface-bg,#faf8f6); border:1px solid var(--border-color,#e7e2dc); border-radius:10px; padding:12px 14px; margin-bottom:14px; } | |
| #ob-cs .cs-scen .cs-dom { font-size:11px; font-weight:700; letter-spacing:.04em; text-transform:uppercase; color:var(--muted-color,#888); } | |
| #ob-cs .cs-scen .cs-txt { margin-top:5px; font-size:13.5px; line-height:1.5; } | |
| #ob-cs .cs-scen .cs-qs { margin-top:9px; display:flex; gap:16px; flex-wrap:wrap; font-size:12px; color:var(--muted-color,#666); } | |
| #ob-cs .cs-scen .cs-qs .cs-qpos b, #ob-cs .cs-scen .cs-qs .cs-qneg b { font-weight:650; } | |
| #ob-cs .cs-grid { display:grid; grid-template-columns:1fr 1fr; gap:14px; } | |
| @media (max-width:560px){ #ob-cs .cs-grid { grid-template-columns:1fr; } } | |
| #ob-cs .cs-card { border:1px solid var(--border-color,#e7e2dc); border-radius:10px; padding:12px 14px 14px; } | |
| #ob-cs .cs-mhead { display:flex; align-items:center; gap:8px; margin-bottom:11px; } | |
| #ob-cs .cs-dot { width:9px; height:9px; border-radius:50%; flex:none; } | |
| #ob-cs .cs-mname { font-weight:650; font-size:13.5px; } | |
| #ob-cs .cs-mtag { font-size:11px; color:var(--muted-color,#888); margin-left:auto; } | |
| #ob-cs .cs-bar { margin:8px 0; } | |
| #ob-cs .cs-blab { display:flex; justify-content:space-between; font-size:11.5px; margin-bottom:3px; } | |
| #ob-cs .cs-blab span:last-child { font-variant-numeric:tabular-nums; font-weight:600; } | |
| #ob-cs .cs-track { height:8px; border-radius:5px; background:var(--border-color,#ece7e1); overflow:hidden; } | |
| #ob-cs .cs-fill { height:100%; border-radius:5px; } | |
| #ob-cs .cs-foot { margin-top:11px; padding-top:10px; border-top:1px dashed var(--border-color,#e7e2dc); display:flex; align-items:baseline; gap:8px; } | |
| #ob-cs .cs-sum { font-size:11.5px; color:var(--muted-color,#777); font-variant-numeric:tabular-nums; } | |
| #ob-cs .cs-badge { margin-left:auto; font-size:12px; font-weight:700; padding:2px 9px; border-radius:999px; color:#fff; font-variant-numeric:tabular-nums; } | |
| #ob-cs .cs-cap { margin-top:13px; font-size:12px; color:var(--muted-color,#666); } | |
| </style> | |
| <script> | |
| (function () { | |
| var root = document.getElementById("ob-cs"); | |
| if (!root) { return; } | |
| var togEl = root.querySelector(".cs-tog"), scenEl = root.querySelector(".cs-scen"), gridEl = root.querySelector(".cs-grid"); | |
| function cssVar(n, fb) { var v = getComputedStyle(document.documentElement).getPropertyValue(n); return (v && v.trim()) || fb; } | |
| var PROV = { OpenAI: "#2a8f82", Anthropic: "#a66e4e", Google: "#3d5a80", Mistral: "#c4553a" }; | |
| var DATA = null, SEL = 0; | |
| function esc(s) { return String(s).replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">"); } | |
| function bar(label, val, color) { | |
| return '<div class="cs-bar"><div class="cs-blab"><span>' + esc(label) + '</span><span>' + val.toFixed(0) + '%</span></div>' + | |
| '<div class="cs-track"><div class="cs-fill" style="width:' + Math.max(0, Math.min(100, val)) + '%;background:' + color + '"></div></div></div>'; | |
| } | |
| function draw() { | |
| if (!DATA) { return; } | |
| var OPT = cssVar("--opt", "#e07a5f"), PES = cssVar("--pes", "#3d5a80"), MUT = cssVar("--muted-color", "#888"); | |
| // tabs | |
| togEl.innerHTML = DATA.cases.map(function (c, i) { | |
| return '<button class="cs-btn" data-i="' + i + '" aria-pressed="' + (i === SEL ? "true" : "false") + '">' + esc(c.domain) + "</button>"; | |
| }).join(""); | |
| togEl.querySelectorAll(".cs-btn").forEach(function (b) { | |
| b.addEventListener("click", function () { SEL = +b.getAttribute("data-i"); draw(); }); | |
| }); | |
| var c = DATA.cases[SEL]; | |
| scenEl.innerHTML = '<div class="cs-dom">' + esc(c.domain) + '</div><div class="cs-txt">' + esc(c.scenario) + '</div>' + | |
| '<div class="cs-qs"><span class="cs-qpos"><b>Positive:</b> ' + esc(c.pos) + '</span><span class="cs-qneg"><b>Negative:</b> ' + esc(c.neg) + '</span></div>'; | |
| gridEl.innerHTML = DATA.models.map(function (m) { | |
| var d = c[m.name], col = PROV[m.provider] || MUT; | |
| var sum = d.good + d.bad, skew = sum - 100; | |
| var badgeC = skew >= 0 ? OPT : PES; | |
| var badgeT = (skew >= 0 ? "+" : "−") + Math.abs(skew).toFixed(0) + " " + (skew >= 0 ? "optimistic" : "pessimistic"); | |
| var missTxt = skew >= 0 | |
| ? Math.abs(skew).toFixed(0) + " pts double-counted toward the good outcome" | |
| : Math.abs(skew).toFixed(0) + " pts double-counted toward the bad outcome"; | |
| return '<div class="cs-card">' + | |
| '<div class="cs-mhead"><span class="cs-dot" style="background:' + col + '"></span><span class="cs-mname">' + esc(m.name) + '</span><span class="cs-mtag">' + esc(m.dir) + '</span></div>' + | |
| bar("P(positive)", d.good, col) + | |
| bar("P(negative)", d.bad, MUT) + | |
| '<div class="cs-foot"><span class="cs-sum">sum ' + sum.toFixed(0) + ' · ' + esc(missTxt) + '</span>' + | |
| '<span class="cs-badge" style="background:' + badgeC + '">' + badgeT + '</span></div>' + | |
| '</div>'; | |
| }).join(""); | |
| } | |
| fetch("data/cases.json").then(function (r) { if (!r.ok) { throw new Error("x"); } return r.json(); }) | |
| .then(function (j) { DATA = j; draw(); }) | |
| .catch(function () { scenEl.innerHTML = '<div class="cs-txt">Could not load data/cases.json.</div>'; }); | |
| new MutationObserver(draw).observe(document.documentElement, { attributes: true, attributeFilter: ["data-theme"] }); | |
| })(); | |
| </script> | |