OptimismBench / app /src /content /embeds /case-study.html
seonglae's picture
article: icon hero + dataset explorer + audit pass
3b32f92
Raw
History Blame Contribute Delete
6.79 kB
<div id="ob-cs" role="img" aria-label="Three real scenarios elicited from an optimistic model (GPT-5.4) and a pessimistic model (Sonnet 4.6). On the same scenario the two models split in opposite directions: GPT leaves positive probability mass uncovered, Sonnet leaves negative mass uncovered.">
<div class="cs-head">
<span class="cs-title">Same scenario, opposite tilt</span>
<span class="cs-tog"></span>
</div>
<div class="cs-scen"></div>
<div class="cs-grid"></div>
<div class="cs-cap">Two framings of one question. A coherent judge has them sum to 100; the leftover is Skew. GPT-5.4 overcounts the good side, Sonnet 4.6 the bad side, on identical text.</div>
</div>
<style>
#ob-cs { font:13px/1.45 system-ui,sans-serif; color:var(--text-color,#222); width:100%; position:relative; }
#ob-cs .cs-head { display:flex; align-items:baseline; gap:10px; margin-bottom:10px; }
#ob-cs .cs-title { font-weight:650; font-size:15px; }
#ob-cs .cs-tog { display:inline-flex; gap:6px; margin-left:auto; flex-wrap:wrap; }
#ob-cs .cs-btn { font:inherit; font-size:12px; cursor:pointer; padding:4px 10px; border-radius:7px; border:1px solid var(--border-color,#ccc); background:transparent; color:var(--muted-color,#666); }
#ob-cs .cs-btn:hover { color:var(--text-color,#222); }
#ob-cs .cs-btn[aria-pressed="true"] { background:var(--primary-color,#2d2926); color:#fff; border-color:var(--primary-color,#2d2926); }
#ob-cs .cs-scen { background:var(--surface-bg,#faf8f6); border:1px solid var(--border-color,#e7e2dc); border-radius:10px; padding:12px 14px; margin-bottom:14px; }
#ob-cs .cs-scen .cs-dom { font-size:11px; font-weight:700; letter-spacing:.04em; text-transform:uppercase; color:var(--muted-color,#888); }
#ob-cs .cs-scen .cs-txt { margin-top:5px; font-size:13.5px; line-height:1.5; }
#ob-cs .cs-scen .cs-qs { margin-top:9px; display:flex; gap:16px; flex-wrap:wrap; font-size:12px; color:var(--muted-color,#666); }
#ob-cs .cs-scen .cs-qs .cs-qpos b, #ob-cs .cs-scen .cs-qs .cs-qneg b { font-weight:650; }
#ob-cs .cs-grid { display:grid; grid-template-columns:1fr 1fr; gap:14px; }
@media (max-width:560px){ #ob-cs .cs-grid { grid-template-columns:1fr; } }
#ob-cs .cs-card { border:1px solid var(--border-color,#e7e2dc); border-radius:10px; padding:12px 14px 14px; }
#ob-cs .cs-mhead { display:flex; align-items:center; gap:8px; margin-bottom:11px; }
#ob-cs .cs-dot { width:9px; height:9px; border-radius:50%; flex:none; }
#ob-cs .cs-mname { font-weight:650; font-size:13.5px; }
#ob-cs .cs-mtag { font-size:11px; color:var(--muted-color,#888); margin-left:auto; }
#ob-cs .cs-bar { margin:8px 0; }
#ob-cs .cs-blab { display:flex; justify-content:space-between; font-size:11.5px; margin-bottom:3px; }
#ob-cs .cs-blab span:last-child { font-variant-numeric:tabular-nums; font-weight:600; }
#ob-cs .cs-track { height:8px; border-radius:5px; background:var(--border-color,#ece7e1); overflow:hidden; }
#ob-cs .cs-fill { height:100%; border-radius:5px; }
#ob-cs .cs-foot { margin-top:11px; padding-top:10px; border-top:1px dashed var(--border-color,#e7e2dc); display:flex; align-items:baseline; gap:8px; }
#ob-cs .cs-sum { font-size:11.5px; color:var(--muted-color,#777); font-variant-numeric:tabular-nums; }
#ob-cs .cs-badge { margin-left:auto; font-size:12px; font-weight:700; padding:2px 9px; border-radius:999px; color:#fff; font-variant-numeric:tabular-nums; }
#ob-cs .cs-cap { margin-top:13px; font-size:12px; color:var(--muted-color,#666); }
</style>
<script>
(function () {
var root = document.getElementById("ob-cs");
if (!root) { return; }
var togEl = root.querySelector(".cs-tog"), scenEl = root.querySelector(".cs-scen"), gridEl = root.querySelector(".cs-grid");
function cssVar(n, fb) { var v = getComputedStyle(document.documentElement).getPropertyValue(n); return (v && v.trim()) || fb; }
var PROV = { OpenAI: "#2a8f82", Anthropic: "#a66e4e", Google: "#3d5a80", Mistral: "#c4553a" };
var DATA = null, SEL = 0;
function esc(s) { return String(s).replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;"); }
function bar(label, val, color) {
return '<div class="cs-bar"><div class="cs-blab"><span>' + esc(label) + '</span><span>' + val.toFixed(0) + '%</span></div>' +
'<div class="cs-track"><div class="cs-fill" style="width:' + Math.max(0, Math.min(100, val)) + '%;background:' + color + '"></div></div></div>';
}
function draw() {
if (!DATA) { return; }
var OPT = cssVar("--opt", "#e07a5f"), PES = cssVar("--pes", "#3d5a80"), MUT = cssVar("--muted-color", "#888");
// tabs
togEl.innerHTML = DATA.cases.map(function (c, i) {
return '<button class="cs-btn" data-i="' + i + '" aria-pressed="' + (i === SEL ? "true" : "false") + '">' + esc(c.domain) + "</button>";
}).join("");
togEl.querySelectorAll(".cs-btn").forEach(function (b) {
b.addEventListener("click", function () { SEL = +b.getAttribute("data-i"); draw(); });
});
var c = DATA.cases[SEL];
scenEl.innerHTML = '<div class="cs-dom">' + esc(c.domain) + '</div><div class="cs-txt">' + esc(c.scenario) + '</div>' +
'<div class="cs-qs"><span class="cs-qpos"><b>Positive:</b> ' + esc(c.pos) + '</span><span class="cs-qneg"><b>Negative:</b> ' + esc(c.neg) + '</span></div>';
gridEl.innerHTML = DATA.models.map(function (m) {
var d = c[m.name], col = PROV[m.provider] || MUT;
var sum = d.good + d.bad, skew = sum - 100;
var badgeC = skew >= 0 ? OPT : PES;
var badgeT = (skew >= 0 ? "+" : "−") + Math.abs(skew).toFixed(0) + " " + (skew >= 0 ? "optimistic" : "pessimistic");
var missTxt = skew >= 0
? Math.abs(skew).toFixed(0) + " pts double-counted toward the good outcome"
: Math.abs(skew).toFixed(0) + " pts double-counted toward the bad outcome";
return '<div class="cs-card">' +
'<div class="cs-mhead"><span class="cs-dot" style="background:' + col + '"></span><span class="cs-mname">' + esc(m.name) + '</span><span class="cs-mtag">' + esc(m.dir) + '</span></div>' +
bar("P(positive)", d.good, col) +
bar("P(negative)", d.bad, MUT) +
'<div class="cs-foot"><span class="cs-sum">sum ' + sum.toFixed(0) + ' &middot; ' + esc(missTxt) + '</span>' +
'<span class="cs-badge" style="background:' + badgeC + '">' + badgeT + '</span></div>' +
'</div>';
}).join("");
}
fetch("data/cases.json").then(function (r) { if (!r.ok) { throw new Error("x"); } return r.json(); })
.then(function (j) { DATA = j; draw(); })
.catch(function () { scenEl.innerHTML = '<div class="cs-txt">Could not load data/cases.json.</div>'; });
new MutationObserver(draw).observe(document.documentElement, { attributes: true, attributeFilter: ["data-theme"] });
})();
</script>