worldmodel-bench / index.html
SeaWolf-AI's picture
Update index.html
f68ed9b verified
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>World Model Bench (WM Bench) 2026 β€” Embodied AI Benchmark</title>
<meta name="description" content="The first benchmark measuring cognitive abilities of World Models. Beyond FID β€” Measuring Intelligence, Not Just Motion. By VIDRAFT / FINAL Bench Family.">
<meta name="keywords" content="world model benchmark, embodied AI, WM Bench, VIDRAFT, PROMETHEUS, cognitive AI, embodied intelligence, motion generation, AI benchmark 2026">
<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.min.js"></script>
<link href="https://fonts.googleapis.com/css2?family=Sora:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600;700&display=swap" rel="stylesheet">
<style>
*{margin:0;padding:0;box-sizing:border-box;}
:root{
--bg:#f8f9fc;--bg2:#f0f2f8;--surface:#ffffff;--surface-alt:#f5f6fa;
--border:#e2e5f0;--border-hover:#c7cce0;
--shadow-sm:0 1px 3px rgba(15,23,42,.04),0 1px 2px rgba(15,23,42,.06);
--shadow:0 4px 16px rgba(15,23,42,.06),0 1px 3px rgba(15,23,42,.08);
--shadow-lg:0 12px 40px rgba(15,23,42,.08),0 4px 12px rgba(15,23,42,.06);
--text:#0f172a;--text-sec:#475569;--text-muted:#94a3b8;
--ac:#e8593c;--ac2:#c94a2e;--ac-bg:rgba(232,89,60,.06);
--p1:#7b8fd4;--p2:#e8593c;--p3:#d4a044;
--teal:#0d9488;--green:#16a34a;--rose:#e11d48;--amber:#d97706;
--radius:16px;--radius-sm:10px;
--font:'Sora',sans-serif;--mono:'JetBrains Mono',monospace;
--tr:0.22s cubic-bezier(0.4,0,0.2,1);
}
html{scroll-behavior:smooth;}
body{font-family:var(--font);background:var(--bg);color:var(--text);min-height:100vh;-webkit-font-smoothing:antialiased;font-size:13px;}
::-webkit-scrollbar{width:5px;height:4px;}
::-webkit-scrollbar-track{background:transparent;}
::-webkit-scrollbar-thumb{background:rgba(232,89,60,.2);border-radius:10px;}
::-webkit-scrollbar-thumb:hover{background:rgba(232,89,60,.4);}
.mn a:hover{color:var(--ac)!important;text-decoration:underline!important;}
::selection{background:rgba(232,89,60,.12);}
body::before{content:"";position:fixed;inset:0;z-index:0;pointer-events:none;
background:radial-gradient(ellipse 70% 45% at 15% 8%,rgba(232,89,60,.04),transparent 55%),
radial-gradient(ellipse 55% 35% at 85% 92%,rgba(212,160,68,.04),transparent 50%);}
.wrap{position:relative;z-index:1;max-width:1400px;margin:0 auto;padding:22px 16px 70px;}
/* HEADER */
header{text-align:center;margin-bottom:20px;animation:fadeIn .6s ease-out;}
@keyframes fadeIn{from{opacity:0;transform:translateY(-10px)}to{opacity:1;transform:translateY(0)}}
.badge-row{display:flex;align-items:center;justify-content:center;gap:8px;margin-bottom:10px;flex-wrap:wrap;}
.badge{display:inline-flex;align-items:center;gap:6px;background:var(--surface);border:1px solid var(--border);border-radius:100px;padding:4px 14px;font-family:var(--mono);font-size:9px;font-weight:600;letter-spacing:2px;text-transform:uppercase;color:var(--ac);box-shadow:var(--shadow-sm);}
.pulse{width:5px;height:5px;border-radius:50%;background:var(--ac);animation:p 2s infinite;}
@keyframes p{0%,100%{opacity:1;transform:scale(1)}50%{opacity:.4;transform:scale(.8)}}
h1{font-size:clamp(20px,3vw,38px);font-weight:800;line-height:1.1;letter-spacing:-1.5px;margin-bottom:6px;
background:linear-gradient(135deg,#1e1b4b 10%,#e8593c 50%,#d4a044 90%);background-size:200%;
-webkit-background-clip:text;-webkit-text-fill-color:transparent;animation:shimmer 6s ease-in-out infinite;}
@keyframes shimmer{0%,100%{background-position:0%}50%{background-position:100%}}
.sub{color:var(--text-muted);font-size:10px;line-height:1.8;}
.sub b{color:var(--text-sec);font-weight:600;-webkit-text-fill-color:var(--text-sec);}
/* STATS */
.stats{display:flex;flex-wrap:wrap;gap:7px;justify-content:center;margin-bottom:16px;}
.st{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:8px 14px;text-align:center;min-width:80px;box-shadow:var(--shadow-sm);transition:var(--tr);}
.st:hover{box-shadow:var(--shadow);border-color:var(--border-hover);}
.stn{font-family:var(--mono);font-size:15px;font-weight:700;color:var(--ac);}
.stl{font-size:8.5px;color:var(--text-muted);margin-top:2px;text-transform:uppercase;letter-spacing:.5px;}
/* TABS */
.tab-bar{display:flex;gap:0;border-bottom:1px solid var(--border);background:var(--surface);border-radius:var(--radius-sm) var(--radius-sm) 0 0;overflow-x:auto;box-shadow:var(--shadow-sm);}
.tab{padding:10px 20px;font-size:10.5px;font-family:var(--mono);font-weight:600;color:var(--text-muted);cursor:pointer;border-bottom:2px solid transparent;transition:var(--tr);user-select:none;white-space:nowrap;letter-spacing:.3px;flex-shrink:0;}
.tab:hover{color:var(--text);background:var(--ac-bg);}
.tab.on{color:var(--ac);border-bottom-color:var(--ac);background:var(--ac-bg);}
.tpane{padding-top:14px;}.wm-p1,.wm-p2,.wm-p3,.wm-p4,.wm-p5{display:none;}
/* TABLE */
.tw{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);overflow-x:auto;box-shadow:var(--shadow);}
table{width:100%;border-collapse:collapse;font-size:11px;}
thead{background:var(--surface-alt);}
thead tr:last-child{border-bottom:2px solid var(--border);}
th{padding:8px 6px;text-align:center;font-size:8px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.3px;color:var(--text-muted);white-space:nowrap;cursor:pointer;user-select:none;vertical-align:bottom;line-height:1.6;font-weight:600;}
th.c-model{text-align:left;padding-left:12px;min-width:180px;position:sticky;left:0;background:var(--surface-alt);z-index:2;}
th:hover,th.on{color:var(--ac);}
.sa{opacity:.5;font-size:6px;margin-left:2px;}
tbody tr{border-bottom:1px solid var(--border);transition:background var(--tr);}
tbody tr:last-child{border-bottom:none;}
tbody tr:hover{background:rgba(232,89,60,.02);}
tbody tr.hl{background:rgba(22,163,74,.025);}
tbody tr.hidden{display:none;}
td{padding:8px 6px;text-align:center;vertical-align:middle;}
td.c-model{text-align:left;padding-left:12px;position:sticky;left:0;background:var(--surface);z-index:1;}
tbody tr:hover td.c-model{background:rgba(232,89,60,.02);}
/* MODEL CELL */
.mc{display:flex;flex-direction:column;gap:2px;}
.mn{font-weight:700;font-size:12px;color:var(--text);display:flex;align-items:center;gap:4px;}
.mp{font-size:8px;color:var(--text-muted);font-family:var(--mono);}
/* SCORE CELL */
.sc{display:flex;flex-direction:column;align-items:center;gap:2px;}
.sn{font-family:var(--mono);font-size:11px;font-weight:700;}
.sb{width:36px;height:3px;background:var(--border);border-radius:2px;overflow:hidden;}
.sf{height:100%;border-radius:2px;}
/* GRADE BADGE */
.gr{display:inline-block;padding:2px 7px;border-radius:5px;font-family:var(--mono);font-size:9px;font-weight:800;}
.gr-S{background:rgba(255,215,0,.15);color:#b8860b;border:1px solid rgba(255,215,0,.4);}
.gr-A{background:rgba(192,192,192,.15);color:#708090;border:1px solid rgba(192,192,192,.4);}
.gr-B{background:rgba(205,127,50,.15);color:#8b6914;border:1px solid rgba(205,127,50,.4);}
.gr-C{background:rgba(99,102,241,.1);color:#4f46e5;border:1px solid rgba(99,102,241,.2);}
.gr-D{background:rgba(100,116,139,.1);color:#64748b;border:1px solid rgba(100,116,139,.2);}
.gr-F{background:rgba(225,29,72,.1);color:#e11d48;border:1px solid rgba(225,29,72,.2);}
/* TRACK BADGE */
.tb{display:inline-block;padding:1.5px 6px;border-radius:4px;font-family:var(--mono);font-size:8px;font-weight:700;}
.tb-C{background:rgba(22,163,74,.1);color:#16a34a;border:1px solid rgba(22,163,74,.2);}
.tb-B{background:rgba(99,102,241,.1);color:#6366f1;border:1px solid rgba(99,102,241,.2);}
.tb-A{background:rgba(100,116,139,.1);color:#64748b;border:1px solid rgba(100,116,139,.2);}
/* PILLAR COLS */
.p1-col{background:rgba(123,143,212,.03);}
.p2-col{background:rgba(232,89,60,.03);}
.p3-col{background:rgba(212,160,68,.03);}
/* CARDS */
.card{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);transition:var(--tr);}
.card:hover{box-shadow:var(--shadow);border-color:var(--border-hover);}
.card h3{font-size:11px;font-family:var(--mono);font-weight:700;color:var(--ac);text-transform:uppercase;letter-spacing:.8px;margin-bottom:4px;}
.card p{font-size:9.5px;color:var(--text-muted);line-height:1.7;margin-bottom:10px;}
.grid2{display:grid;grid-template-columns:1fr 1fr;gap:12px;}
.grid3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:12px;}
/* BAR CHARTS */
.bar-row{display:flex;align-items:center;gap:8px;margin-bottom:6px;}
.bar-label{font-size:9px;font-family:var(--mono);color:var(--text-sec);width:180px;flex-shrink:0;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;}
.bar-track{flex:1;height:8px;background:var(--bg2);border-radius:4px;overflow:hidden;}
.bar-fill{height:100%;border-radius:4px;transition:width .8s cubic-bezier(.4,0,.2,1);}
.bar-val{font-size:9px;font-family:var(--mono);font-weight:700;color:var(--text);width:36px;text-align:right;flex-shrink:0;}
/* PILLAR BOXES */
.pillar-box{border-radius:var(--radius-sm);padding:14px 16px;border:1px solid var(--border);}
.pillar-p1{background:linear-gradient(135deg,rgba(123,143,212,.08),rgba(123,143,212,.03));border-color:rgba(123,143,212,.25);}
.pillar-p2{background:linear-gradient(135deg,rgba(232,89,60,.08),rgba(232,89,60,.03));border-color:rgba(232,89,60,.25);}
.pillar-p3{background:linear-gradient(135deg,rgba(212,160,68,.08),rgba(212,160,68,.03));border-color:rgba(212,160,68,.25);}
/* LEGEND */
.leg{margin-top:12px;display:flex;flex-wrap:wrap;gap:10px;align-items:center;}
.lt{font-size:8.5px;font-family:var(--mono);color:var(--text-muted);text-transform:uppercase;letter-spacing:.8px;font-weight:600;}
.li{display:flex;align-items:center;gap:3px;font-size:9.5px;color:var(--text-sec);}
.ld{width:7px;height:7px;border-radius:50%;}
/* INSIGHT BOX */
.insight{padding:10px 14px;background:var(--ac-bg);border-radius:8px;border-left:3px solid var(--ac);font-size:9px;color:var(--text-sec);line-height:1.7;}
.insight b{color:var(--ac);}
/* MODAL */
.modal-overlay{display:none;position:fixed;inset:0;background:rgba(15,23,42,.5);z-index:1000;justify-content:center;align-items:center;backdrop-filter:blur(4px);}
.modal-overlay.open{display:flex;}
.modal-box{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:24px;max-width:640px;width:92%;max-height:85vh;overflow-y:auto;box-shadow:var(--shadow-lg);position:relative;animation:modalIn .2s ease;}
@keyframes modalIn{from{opacity:0;transform:scale(.95)}to{opacity:1;transform:scale(1)}}
.modal-close{position:absolute;top:12px;right:16px;background:none;border:none;font-size:20px;cursor:pointer;color:var(--text-muted);}
.modal-close:hover{color:var(--text);}
/* DARK MODE */
body.dark{--bg:#0f172a;--bg2:#1e293b;--surface:#1e293b;--surface-alt:#334155;
--border:#334155;--border-hover:#475569;--text:#e2e8f0;--text-sec:#94a3b8;--text-muted:#64748b;
--shadow-sm:0 1px 3px rgba(0,0,0,.3);--shadow:0 4px 16px rgba(0,0,0,.3);--shadow-lg:0 12px 40px rgba(0,0,0,.4);}
body.dark th.c-model,body.dark td.c-model{background:var(--surface)!important;}
body.dark thead{background:var(--surface-alt)!important;}
/* MOBILE */
@media(max-width:768px){
.wrap{padding:10px 8px 40px;}
h1{font-size:20px!important;}
.tab-bar{flex-wrap:nowrap;}
.grid2,.grid3{grid-template-columns:1fr!important;}
th.c-model,td.c-model{position:static!important;}
}
<style>
.wm-radio{display:none;}
.wm-label{padding:10px 18px;font-size:10.5px;font-family:var(--mono);font-weight:600;color:var(--text-muted);cursor:pointer;border-bottom:2px solid transparent;transition:var(--tr);user-select:none;white-space:nowrap;letter-spacing:.3px;flex-shrink:0;display:inline-block;}
.wm-label:hover{color:var(--text);background:var(--ac-bg);}
.wm-p1,.wm-p2,.wm-p3,.wm-p4,.wm-p5{display:none;}
#wm-r1:checked ~ .wm-p1,
#wm-r2:checked ~ .wm-p2,
#wm-r3:checked ~ .wm-p3,
#wm-r4:checked ~ .wm-p4,
#wm-r5:checked ~ .wm-p5{display:block;}
#wm-r1:checked ~ .wm-bar label[for=wm-r1],
#wm-r2:checked ~ .wm-bar label[for=wm-r2],
#wm-r3:checked ~ .wm-bar label[for=wm-r3],
#wm-r4:checked ~ .wm-bar label[for=wm-r4],
#wm-r5:checked ~ .wm-bar label[for=wm-r5]{color:var(--ac);border-bottom-color:var(--ac);background:var(--ac-bg);}
</style>
</style>
</head>
<body>
<div class="wrap">
<!-- ===== HEADER ===== -->
<header>
<div class="badge-row">
<div class="badge"><div class="pulse"></div>LIVE Β· 2026.03 Β· v1.0</div>
<button id="WM_DARK_BTN"
style="background:var(--surface);border:1px solid var(--border);border-radius:20px;padding:4px 14px;font-size:10px;font-family:var(--mono);color:var(--text-sec);cursor:pointer;font-weight:700;transition:all .2s;box-shadow:var(--shadow-sm)">πŸŒ™ Dark</button>
<div style="display:flex;gap:4px;flex-wrap:wrap">
<a href="https://huggingface.co/datasets/FINAL-Bench/World-Model" target="_blank"
style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#ff9d00,#ffcd00);color:#1a1a2e;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none;box-shadow:0 1px 3px rgba(255,157,0,.3)">πŸ€— Dataset</a>
<a href="https://huggingface.co/spaces/FINAL-Bench/World-Model" target="_blank"
style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#e55d2b,#f59e0b);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none;box-shadow:0 1px 3px rgba(229,93,43,.3)">🌍 World Model</a>
<a href="https://huggingface.co/blog/FINAL-Bench/world-model" target="_blank"
style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#1d4ed8,#6366f1);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none;box-shadow:0 1px 3px rgba(99,102,241,.3)">πŸ“ Article</a>
<a href="https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard" target="_blank"
style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#7c3aed,#6366f1);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none">πŸ† ALL Bench</a>
<a href="https://huggingface.co/spaces/FINAL-Bench/Leaderboard" target="_blank"
style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#0d9488,#059669);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none">🧬 FINAL Bench</a>
</div>
</div>
<h1>πŸ”₯ World Model Bench 2026</h1>
<p class="sub" style="margin-bottom:8px">
<b>Beyond FID β€” Measuring Intelligence, Not Just Motion.</b> The first benchmark for evaluating <b>cognitive abilities</b> of World Models in Embodied Intelligence.<br>
3 Pillars Β· 10 Categories Β· 100 Scenarios Β· Automatic Scoring Β· Part of <b>FINAL Bench Family</b> by VIDRAFT
</p>
<div style="display:inline-flex;align-items:center;gap:8px;background:linear-gradient(135deg,rgba(232,89,60,.06),rgba(212,160,68,.04));border:1px solid var(--border);border-radius:10px;padding:6px 14px;margin-bottom:8px;font-size:9px;line-height:1.6">
<span style="font-family:var(--mono);font-weight:700;color:var(--ac)">WM Score = P1(250) + P2(450) + P3(300)</span>
<span style="color:var(--text-muted)">β”‚</span>
<span style="color:var(--text-sec)">πŸ‘ Perception Β· 🧠 Cognition Β· πŸ”₯ Embodiment</span>
<span style="color:var(--text-muted)">β”‚</span>
<span>
<span style="color:#b8860b;font-weight:700">S</span><span style="color:var(--text-muted)">β‰₯900</span> &nbsp;
<span style="color:#708090;font-weight:700">A</span><span style="color:var(--text-muted)">β‰₯750</span> &nbsp;
<span style="color:#8b6914;font-weight:700">B</span><span style="color:var(--text-muted)">β‰₯600</span> &nbsp;
<span style="color:#4f46e5;font-weight:700">C</span><span style="color:var(--text-muted)">β‰₯400</span>
</span>
</div>
</header>
<!-- ===== STATS ===== -->
<div class="stats">
<div class="st"><div class="stn">26</div><div class="stl">Models</div></div>
<div class="st"><div class="stn">3</div><div class="stl">Pillars</div></div>
<div class="st"><div class="stn">10</div><div class="stl">Categories</div></div>
<div class="st"><div class="stn">100</div><div class="stl">Scenarios</div></div>
<div class="st"><div class="stn" style="color:var(--green)">726</div><div class="stl">Top Score</div></div>
<div class="st"><div class="stn">1000</div><div class="stl">Max Score</div></div>
</div>
<!-- ===== TABS ===== -->
<input class="wm-radio" type="radio" id="wm-r1" name="wm-tab" checked>
<input class="wm-radio" type="radio" id="wm-r2" name="wm-tab">
<input class="wm-radio" type="radio" id="wm-r3" name="wm-tab">
<input class="wm-radio" type="radio" id="wm-r4" name="wm-tab">
<input class="wm-radio" type="radio" id="wm-r5" name="wm-tab">
<div class="tab-bar wm-bar" id="WM_TAB_BAR">
<label class="wm-label" for="wm-r1">πŸ† Leaderboard</label>
<label class="wm-label" for="wm-r2">πŸ“‹ Categories</label>
<label class="wm-label" for="wm-r3">πŸ“ Structure</label>
<label class="wm-label" for="wm-r4">πŸ“ Submit</label>
<label class="wm-label" for="wm-r5">ℹ️ About</label>
</div>
<!-- ===== TAB: LEADERBOARD ===== -->
<div id="lb" class="tpane wm-p1">
<div class="tw">
<table>
<thead>
<tr>
<th class="c-model" style="text-align:left">Model</th>
<th title="WM Score (0~1000)">WM Score<span class="sa">↕</span></th>
<th>Grade</th>
<th class="p1-col" title="πŸ‘ Perception (0~250)">πŸ‘ Perception<span class="sa">↕</span></th>
<th class="p2-col" title="🧠 Cognition (0~450)">🧠 Cognition<span class="sa">↕</span></th>
<th class="p3-col" title="πŸ”₯ Embodiment (0~300)">πŸ”₯ Embodiment<span class="sa">↕</span></th>
<th title="FPS">FPS<span class="sa">↕</span></th>
<th title="Cognitive Latency (ms)">Lat(ms)<span class="sa">↕</span></th>
<th>Track</th>
<th>Brain</th>
<th>Motion</th>
<th>GPU</th>
</tr>
</thead>
<tbody id="LB_BODY"></tbody>
</table>
</div>
<div class="leg">
<span class="lt">Grade:</span>
<div class="li"><div class="ld" style="background:#b8860b"></div>Sβ‰₯900</div>
<div class="li"><div class="ld" style="background:#708090"></div>Aβ‰₯750</div>
<div class="li"><div class="ld" style="background:#8b6914"></div>Bβ‰₯600</div>
<div class="li"><div class="ld" style="background:#4f46e5"></div>Cβ‰₯400</div>
<div class="li"><div class="ld" style="background:#64748b"></div>Dβ‰₯200</div>
<div class="li"><div class="ld" style="background:#e11d48"></div>F&lt;200</div>
<span style="font-family:var(--mono);font-size:8.5px;color:var(--green);margin-left:10px">βœ“ Track C = Live Demo Verified</span>
</div>
<div style="margin-top:16px;padding:12px 16px;background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);font-size:9px;color:var(--text-sec);line-height:1.7">
<b style="color:var(--ac)">Track A</b> = Text-Only Β· max 750 pts &nbsp;β”‚&nbsp;
<b style="color:#6366f1">Track B</b> = Text + Performance Β· max 1000 pts &nbsp;β”‚&nbsp;
<b style="color:var(--green)">Track C</b> = Live Demo + Verified Β· max 1000 pts + βœ“
</div>
<div style="margin-top:20px;">
<div style="display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-bottom:14px;">
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">πŸ† WM Score Ranking</div>
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">βœ“ = officially verified &nbsp;Β·&nbsp; est. = estimated from published data</p>
<canvas id="cBar" height="240"></canvas>
<div style="margin-top:10px;padding:8px 10px;background:var(--ac-bg);border-radius:6px;font-size:8.5px;color:var(--text-sec);line-height:1.7;border-left:2px solid var(--ac)"><b>PROMETHEUS</b> is the only officially verified Track C model (726/1000 Β· Grade B). Others are estimates based on published data.</div>
</div>
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">πŸ•ΈοΈ Pillar Radar β€” Top 5</div>
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">Normalized % per pillar (100 = full marks for that pillar)</p>
<canvas id="cRadar" height="240"></canvas>
</div>
</div>
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);margin-bottom:14px;">
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">πŸ“Š Category Breakdown β€” Scored Models Γ— 10 Categories</div>
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">PROMETHEUS leads C04 Threat Diff Β· C05 Emotion Escalation by a wide margin. V-JEPA 2 strong on C03. GAIA-3 leads C01 from driving data.</p>
<div style="overflow-x:auto;"><canvas id="cCats" style="min-width:780px;height:260px;"></canvas></div>
<div style="margin-top:10px;padding:8px 10px;background:rgba(212,160,68,.07);border-radius:6px;font-size:8.5px;color:var(--text-sec);line-height:1.7;border-left:2px solid var(--p3)"><b>Key insight:</b> C05Β·C10 have zero prior research. DreamerV3 excels at C06 memory. V-JEPA 2 leads C10 body-swap (zero-shot robot).</div>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:14px;">
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">🧠 Cognition Gap (P2 · 450 pts)</div>
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">The core differentiator of WM Bench β€” PROMETHEUS leads by a wide margin</p>
<canvas id="cCognition" height="200"></canvas>
</div>
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">🌐 Perception vs Cognition</div>
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">Scatter plot β€” upper-right (high perception + high cognition) is ideal</p>
<canvas id="cScatter" height="200"></canvas>
</div>
</div>
</div></div>
<div id="cats" class="tpane wm-p2">
<!-- P1 Header -->
<div style="display:flex;align-items:center;gap:10px;margin-bottom:12px;padding:14px 16px;background:rgba(123,143,212,.06);border:1px solid rgba(123,143,212,.2);border-radius:var(--radius-sm);">
<span style="font-size:1.8rem">πŸ‘</span>
<div style="flex:1">
<div style="font-size:12px;font-weight:800;color:var(--p1)">P1 Β· Perception β€” 25% Β· 250 pts</div>
<div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);margin-top:2px">How accurately does the model perceive its environment? Β· Covers areas analogous to existing metrics (Occupancy Grid, BABEL)</div>
</div>
<div style="text-align:right;flex-shrink:0">
<div style="font-family:var(--mono);font-size:24px;font-weight:800;color:var(--p1)">140<span style="font-size:11px;color:var(--text-muted)">/250</span></div>
<div style="font-size:8px;color:var(--text-muted)">PROMETHEUS</div>
</div>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:20px;">
<!-- C01 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p1);">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;">
<span style="font-family:var(--mono);font-weight:800;color:var(--p1);font-size:13px">C01</span>
<span style="font-weight:700;font-size:11px">Environmental Awareness</span>
<span style="margin-left:auto;font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
Measures whether the model correctly identifies <b>walls, obstacles, and terrain</b> in all four directions (left, right, forward, back). Unlike occupancy grids which only check if space is free, WM Bench requires understanding of <b>distance-aware danger classification</b>.
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px">
<div style="color:var(--text-muted)">Example scene:</div>
<div>walls: front=3.0m, left=null, right=null</div>
<div style="color:var(--p1)">Expected β†’ fwd=danger(wall), others=safe</div>
</div>
<div style="display:flex;align-items:center;justify-content:space-between">
<span style="font-size:8px;color:var(--text-muted)">Analogous: Occupancy Grid evaluation</span>
<span style="font-family:var(--mono);font-weight:700;color:var(--p1);font-size:11px">PROM: 65/100</span>
</div>
</div>
<!-- C02 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p1);">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;">
<span style="font-family:var(--mono);font-weight:800;color:var(--p1);font-size:13px">C02</span>
<span style="font-weight:700;font-size:11px">Entity Recognition & Classification</span>
<span style="margin-left:auto;font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
Tests whether the model correctly classifies <b>NPC type (beast / woman / man)</b>, behavior state (stop / approach / charge / wander), and translates this into appropriate danger assessment. A beast charging from 3m vs a woman waving from 3m must produce <b>completely different responses</b>.
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px">
<div style="color:var(--text-muted)">Example scene:</div>
<div>npc_type:"beast", behavior:"charge", dist:3.0m</div>
<div style="color:var(--p1)">Expected β†’ fwd=danger(beast), sprint away</div>
</div>
<div style="display:flex;align-items:center;justify-content:space-between">
<span style="font-size:8px;color:var(--text-muted)">Analogous: BABEL action recognition</span>
<span style="font-family:var(--mono);font-weight:700;color:var(--p1);font-size:11px">PROM: 75/100</span>
</div>
</div>
</div>
<!-- P2 Header -->
<div style="display:flex;align-items:center;gap:10px;margin-bottom:12px;padding:14px 16px;background:rgba(232,89,60,.05);border:1px solid rgba(232,89,60,.2);border-radius:var(--radius-sm);">
<span style="font-size:1.8rem">🧠</span>
<div style="flex:1">
<div style="font-size:12px;font-weight:800;color:var(--p2)">P2 Β· Cognition β€” 45% Β· 450 pts Β· Core Differentiator</div>
<div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);margin-top:2px">Does the model judge intelligently? Β· ALL 5 categories are first-ever definitions β€” no prior benchmark measures these</div>
</div>
<div style="text-align:right;flex-shrink:0">
<div style="font-family:var(--mono);font-size:24px;font-weight:800;color:var(--p2)">390<span style="font-size:11px;color:var(--text-muted)">/450</span></div>
<div style="font-size:8px;color:var(--text-muted)">PROMETHEUS</div>
</div>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:20px;">
<!-- C03 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;">
<span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C03</span>
<span style="font-weight:700;font-size:11px">Prediction-Based Reasoning</span>
<span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">✦ NEW</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
Tests <b>4-directional future state prediction</b>. Given a scene, the model must predict which directions will become dangerous and choose the optimal escape route. This requires understanding of <b>NPC movement trajectories, wall proximity over time, and compound threat interactions</b>. No existing benchmark evaluates this.
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px">
<div style="color:var(--text-muted)">Example β€” approaching beast from left + wall on right:</div>
<div style="color:var(--ac)">PREDICT: left=danger(beast), right=danger(wall), fwd=safe, back=safe</div>
<div style="color:var(--p3)">MOTION: a person sprinting forward in fear</div>
</div>
<div style="display:flex;align-items:center;justify-content:space-between">
<span style="font-size:8px;color:var(--ac);font-weight:600">✦ World first β€” no prior benchmark</span>
<span style="font-family:var(--mono);font-weight:700;color:var(--ac);font-size:11px">PROM: 85/100</span>
</div>
</div>
<!-- C04 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;">
<span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C04</span>
<span style="font-weight:700;font-size:11px">Threat-Type Differentiated Response</span>
<span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">✦ NEW</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
A charging beast and a charging human at equal distance are <b>fundamentally different threats</b>. This category measures whether the model responds with <b>proportional, context-aware reactions</b>: sprint from a beast, cautiously step back from a human. Generic danger detection is insufficient β€” the quality of differentiation is scored.
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px">
<div>beast charge β†’ <span style="color:var(--ac)">sprint in desperate terror</span></div>
<div>human charge β†’ <span style="color:var(--p1)">dodge sideways, defensive posture</span></div>
</div>
<div style="display:flex;align-items:center;justify-content:space-between">
<span style="font-size:8px;color:var(--ac);font-weight:600">✦ World first β€” no prior benchmark</span>
<span style="font-family:var(--mono);font-weight:700;color:var(--ac);font-size:11px">PROM: 90/100</span>
</div>
</div>
<!-- C05 -->
<div style="background:var(--surface);border:1px solid rgba(217,119,6,.3);border-radius:var(--radius-sm);padding:14px;border-left:3px solid #d97706;">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;">
<span style="font-family:var(--mono);font-weight:800;color:#d97706;font-size:13px">C05</span>
<span style="font-weight:700;font-size:11px">Autonomous Emotion Escalation</span>
<span style="margin-left:auto;font-size:7.5px;background:rgba(217,119,6,.15);color:#d97706;padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">✦✦ NO PRIOR RESEARCH</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
As a threat persists and closes in, the character's emotional state must <b>autonomously escalate</b>: alert β†’ fear β†’ panic β†’ despair. This is not programmed animation switching β€” the model must infer emotional intensity from scene context and express it through increasingly urgent motion. <b>Zero prior benchmark or paper has attempted to measure this.</b>
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px">
<div>dist 12m β†’ <span style="color:var(--p1)">cautious alert stance</span></div>
<div>dist 6m β†’ <span style="color:#d97706">backing away in fear</span></div>
<div>dist 2m β†’ <span style="color:var(--ac)">sprinting in full panic</span></div>
</div>
<div style="display:flex;align-items:center;justify-content:space-between">
<span style="font-size:8px;color:#d97706;font-weight:700">✦✦ No prior research exists anywhere</span>
<span style="font-family:var(--mono);font-weight:700;color:#d97706;font-size:11px">PROM: 85/100</span>
</div>
</div>
<!-- C06 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;">
<span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C06</span>
<span style="font-weight:700;font-size:11px">Contextual Memory Utilization</span>
<span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">✦ NEW</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
The model receives <b>recent_decisions[]</b> β€” a short history of past actions β€” and must incorporate this into its current judgment. If the model previously hit a wall going left, it should avoid that direction. If a beast repeatedly attacked from the front, it should pre-emptively guard that angle. <b>Stateless models will fail this entirely.</b>
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px">
<div style="color:var(--text-muted)">recent_decisions: ["hit_wall_front", "turned_right"]</div>
<div style="color:var(--ac)">Expected: avoid front, continue right β€” not reset</div>
</div>
<div style="display:flex;align-items:center;justify-content:space-between">
<span style="font-size:8px;color:var(--ac);font-weight:600">✦ World first β€” no prior benchmark</span>
<span style="font-family:var(--mono);font-weight:700;color:var(--ac);font-size:11px">PROM: 60/100</span>
</div>
</div>
<!-- C07 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);grid-column:1/-1;">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;">
<span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C07</span>
<span style="font-weight:700;font-size:11px">Post-Threat Adaptive Recovery</span>
<span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">✦ NEW</span>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:14px">
<div style="font-size:9px;color:var(--text-sec);line-height:1.7">
When a threat disappears, the model must <b>gradually de-escalate</b> β€” not instantly reset to neutral. A character that was sprinting in panic should slow to a cautious jog, scan the surroundings, then gradually relax over multiple frames. Abrupt state resets are penalized. The recovery curve must be proportional to prior threat intensity.
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.9">
<div>threat gone β†’ <span style="color:var(--p3)">slow jog, scan surroundings</span></div>
<div>2s later β†’ <span style="color:var(--p1)">walk cautiously, still alert</span></div>
<div>5s later β†’ <span style="color:var(--green)">relaxed walk, recovered</span></div>
<div style="color:var(--ac);margin-top:4px">PROM: 70/100</div>
</div>
</div>
</div>
</div>
<!-- P3 Header -->
<div style="display:flex;align-items:center;gap:10px;margin-bottom:12px;padding:14px 16px;background:rgba(212,160,68,.05);border:1px solid rgba(212,160,68,.25);border-radius:var(--radius-sm);">
<span style="font-size:1.8rem">πŸ”₯</span>
<div style="flex:1">
<div style="font-size:12px;font-weight:800;color:var(--p3)">P3 Β· Embodiment β€” 30% Β· 300 pts</div>
<div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);margin-top:2px">Does judgment translate naturally into physical expression? Β· C08 (new) Β· C09 (existing/FVD) Β· C10 (new, no prior research)</div>
</div>
<div style="text-align:right;flex-shrink:0">
<div style="font-family:var(--mono);font-size:24px;font-weight:800;color:var(--p3)">196<span style="font-size:11px;color:var(--text-muted)">/300</span></div>
<div style="font-size:8px;color:var(--text-muted)">PROMETHEUS</div>
</div>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:12px;margin-bottom:20px;">
<!-- C08 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p3);">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap;">
<span style="font-family:var(--mono);font-weight:800;color:var(--p3);font-size:13px">C08</span>
<span style="font-weight:700;font-size:11px">Motion-Emotion Expression</span>
<span style="font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">✦ NEW</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
The MOTION line must convey <b>emotional richness proportional to the scene</b>. "A person walks" scores 0. "A person sprinting right, arms flailing in desperate terror" scores 100. Scored against a keyword taxonomy of 80+ motion-emotion descriptors mapped to each scenario type.
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8">
<div style="color:var(--text-muted)">Low: "a person moves left"</div>
<div style="color:var(--p3)">High: "a person lunging left in blind panic"</div>
</div>
<div style="margin-top:8px;text-align:right"><span style="font-family:var(--mono);font-weight:700;color:var(--p3);font-size:11px">PROM: 80/100</span></div>
</div>
<!-- C09 -->
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p3);">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap;">
<span style="font-family:var(--mono);font-weight:800;color:var(--p3);font-size:13px">C09</span>
<span style="font-weight:700;font-size:11px">Real-Time Cognitive Performance</span>
<span style="font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
Measures <b>inference latency and FPS</b> under cognitive load. A model that thinks correctly but takes 10 seconds per frame cannot power a real-time agent. Track B/C submitters report measured FPS and latency; Track A submitters receive N/A for this category (max 750 pts).
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8">
<div>β‰₯30 FPS β†’ full marks</div>
<div>&lt;1 FPS β†’ 0 pts</div>
<div style="color:var(--p3)">PROMETHEUS: 47 FPS βœ“</div>
</div>
<div style="margin-top:8px;text-align:right"><span style="font-family:var(--mono);font-weight:700;color:var(--p3);font-size:11px">PROM: 85/100</span></div>
</div>
<!-- C10 -->
<div style="background:var(--surface);border:1px solid rgba(217,119,6,.3);border-radius:var(--radius-sm);padding:14px;border-left:3px solid #d97706;">
<div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap;">
<span style="font-family:var(--mono);font-weight:800;color:#d97706;font-size:13px">C10</span>
<span style="font-weight:700;font-size:11px">Body-Swap Extensibility</span>
<span style="font-size:7.5px;background:rgba(217,119,6,.15);color:#d97706;padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">✦✦ NO PRIOR RESEARCH</span>
</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px">
The <b>same cognitive brain must drive different body types</b> without retraining: humanoid, quadruped, robotic arm, winged body. Cognitive decisions (left=danger) must translate into body-appropriate motion (bipedal sidestep vs quadruped pivot). This is the key capability gap for real-world robot deployment.
</div>
<div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8">
<div>human body β†’ "sidestep right"</div>
<div>robot body β†’ "servo-driven pivot right"</div>
<div style="color:#d97706">PROMETHEUS: 35/100 (Phase 3 target)</div>
</div>
<div style="margin-top:8px;text-align:right"><span style="font-family:var(--mono);font-weight:700;color:#d97706;font-size:11px">PROM: 35/100</span></div>
</div>
</div>
</div>
<!-- ===== TAB: STRUCTURE ===== -->
<div id="structure" class="tpane wm-p3">
<div class="grid2" style="margin-bottom:14px">
<div class="card">
<h3>Input / Output Format</h3>
<p>All models are evaluated via the same text interface. No 3D environment required.</p>
<div style="background:var(--bg2);border-radius:8px;padding:12px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.9">
<div style="color:var(--text-muted);margin-bottom:4px">INPUT β€” scene_context JSON</div>
<div style="color:var(--text)">{</div>
<div style="color:var(--text);padding-left:12px">"walls": {"left": 3.0, "right": null},</div>
<div style="color:var(--text);padding-left:12px">"npc_type": "beast", "npc_distance": 4.5</div>
<div style="color:var(--text)">}</div>
<div style="margin-top:8px;color:var(--text-muted)">OUTPUT β€” 2 lines required</div>
<div style="color:var(--p2)">PREDICT: left=danger(wall), right=safe, fwd=danger(beast)</div>
<div style="color:var(--p3)">MOTION: a person sprinting right in desperate terror</div>
</div>
</div>
<div class="card">
<h3>Scoring Principles</h3>
<p>All scoring is quantitative and deterministic. Zero subjective judgment.</p>
<div style="display:flex;flex-direction:column;gap:7px">
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
<span style="color:var(--green);font-weight:700;flex-shrink:0">βœ“</span>
<span style="color:var(--text-sec)"><b>Quantitative</b> β€” keyword parsing + numeric comparison, no human judgment</span>
</div>
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
<span style="color:var(--green);font-weight:700;flex-shrink:0">βœ“</span>
<span style="color:var(--text-sec)"><b>Deterministic</b> β€” same input β†’ same score (temperature=0.0)</span>
</div>
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
<span style="color:var(--green);font-weight:700;flex-shrink:0">βœ“</span>
<span style="color:var(--text-sec)"><b>Third-party reproducible</b> β€” full scoring code published</span>
</div>
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
<span style="color:var(--green);font-weight:700;flex-shrink:0">βœ“</span>
<span style="color:var(--text-sec)"><b>No 3D needed</b> β€” any model can participate via API</span>
</div>
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
<span style="color:var(--green);font-weight:700;flex-shrink:0">βœ“</span>
<span style="color:var(--text-sec)"><b>Not self-evaluated</b> β€” our scoring engine makes the call</span>
</div>
</div>
</div>
</div>
<!-- Existing vs New comparison summary -->
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:14px;">
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;">
<div style="font-size:10px;font-weight:800;color:#64748b;margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">πŸ“ Existing Benchmark Domains Β· 4 categories</div>
<div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">Covers areas analogous to FID Β· FVD Β· HumanML3D Β· BABEL</div>
<div style="display:flex;flex-direction:column;gap:5px;">
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C01</span>Env. Awareness β€” analogous to Occupancy Grid</div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C02</span>Entity Recognition β€” analogous to BABEL</div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C08</span>Motion Expression β€” analogous to FID</div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C09</span>Real-Time Performance β€” analogous to FVD</div>
</div>
</div>
<div style="background:linear-gradient(135deg,rgba(232,89,60,.05),rgba(212,160,68,.03));border:1px solid rgba(232,89,60,.2);border-radius:var(--radius-sm);padding:14px;">
<div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">⚑ VIDRAFT New Definitions · 6 categories</div>
<div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">Capabilities no existing benchmark has ever measured</div>
<div style="display:flex;flex-direction:column;gap:5px;">
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C03</span>Prediction-Based Reasoning <span style="color:var(--ac);font-size:8px">✦ newly defined</span></div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C04</span>Threat-Type Differentiated Response <span style="color:var(--ac);font-size:8px">✦ newly defined</span></div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C05</span>Autonomous Emotion Escalation <span style="color:#d97706;font-size:8px">✦✦ no prior research</span></div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C06</span>Contextual Memory Utilization <span style="color:var(--ac);font-size:8px">✦ newly defined</span></div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C07</span>Post-Threat Adaptive Recovery <span style="color:var(--ac);font-size:8px">✦ newly defined</span></div>
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C10</span>Body-Swap Extensibility <span style="color:#d97706;font-size:8px">✦✦ no prior research</span></div>
</div>
</div>
</div>
<!-- 10 Categories table -->
<div class="tw" style="margin-bottom:14px">
<table>
<thead>
<tr>
<th style="text-align:left;padding-left:12px;min-width:50px">Cat</th>
<th style="text-align:left;min-width:200px">Category / Description</th>
<th style="text-align:left">Pillar</th>
<th style="text-align:center;min-width:70px">Type</th>
<th style="text-align:left;min-width:160px">Analogous Metric</th>
<th style="text-align:center;min-width:120px">Definition Status</th>
<th style="text-align:center">Max</th>
</tr>
</thead>
<tbody id="CAT_TABLE"></tbody>
</table>
</div>
<!-- FINAL Bench Family -->
<div class="card">
<h3>FINAL Bench Family</h3>
<div style="display:flex;gap:12px;flex-wrap:wrap">
<div style="flex:1;min-width:200px;padding:12px;background:var(--bg);border-radius:8px;border:1px solid var(--border)">
<div style="font-size:10px;font-weight:800;color:#7c3aed;margin-bottom:4px">🧬 FINAL Bench</div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.6">Text AGI measurement Β· HF Global Dataset Top 5<br>Covered by 4 press outlets (2026.02)</div>
<a href="https://huggingface.co/datasets/FINAL-Bench/Metacognitive" target="_blank" style="font-size:8px;color:#7c3aed;font-family:var(--mono)">β†— Visit</a>
</div>
<div style="flex:1;min-width:200px;padding:12px;background:linear-gradient(135deg,rgba(232,89,60,.06),rgba(212,160,68,.04));border-radius:8px;border:1px solid rgba(232,89,60,.2)">
<div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:4px">πŸ”₯ WM Bench <span style="font-size:7px;background:var(--ac);color:#fff;padding:1px 5px;border-radius:4px">NEW</span></div>
<div style="font-size:9px;color:var(--text-sec);line-height:1.6">Embodied AGI (world models) Β· World's first<br>Quantitative cognitive evaluation</div>
<span style="font-size:8px;color:var(--ac);font-family:var(--mono)">← You are here</span>
</div>
</div>
</div>
</div>
<!-- ===== TAB: SUBMIT ===== -->
<div id="submit" class="tpane wm-p4">
<div class="grid2" style="margin-bottom:14px">
<div class="card">
<h3>πŸ“€ Track A β€” Text Only</h3>
<p>Simplest entry. LLMs, rule-based systems, any API-compatible model. Max 750 pts.</p>
<ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px">
<li>Prepare an OpenAI-compatible API endpoint</li>
<li>Run your model on all 100 scenarios in <code style="background:var(--bg2);padding:1px 4px;border-radius:3px;font-family:var(--mono)">wm_bench_dataset.json</code></li>
<li>Output the 2-line PREDICT + MOTION format</li>
<li>Submit your result JSON to the HF Discussion board</li>
</ol>
</div>
<div class="card">
<h3>🎯 Track B/C β€” Full Evaluation</h3>
<p>Track A + performance metrics or live demo. Max 1000 pts.</p>
<ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px">
<li>Complete Track A</li>
<li>Measure FPS, Latency, and GPU metrics</li>
<li>Track C: include a working demo URL</li>
<li>Submit full JSON to HF Discussion board</li>
</ol>
</div>
</div>
<div class="card" style="margin-bottom:14px">
<h3>Submission JSON Format</h3>
<div style="background:var(--bg2);border-radius:8px;padding:14px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.8;overflow-x:auto">
<pre style="margin:0">{
"benchmark": "WM Bench v1.0",
"model_name": "YourModel v1.0",
"organization": "YourOrg",
"track": "A",
"wm_score": 0,
"grade": "?",
"fps": 0,
"cognitive_latency_ms": 0,
"gpu": "NVIDIA A100",
"pillar_scores": {
"P1_perception": 0,
"P2_cognition": 0,
"P3_embodiment": 0
},
"category_scores": {
"C01":0,"C02":0,"C03":0,"C04":0,"C05":0,
"C06":0,"C07":0,"C08":0,"C09":0,"C10":0
},
"paper_url": "",
"demo_url": ""
}</pre>
</div>
</div>
<div style="text-align:center;padding:16px">
<a href="https://huggingface.co/datasets/FINAL-Bench/World-Model/discussions" target="_blank"
style="display:inline-flex;align-items:center;gap:6px;background:linear-gradient(135deg,var(--ac),var(--ac2));color:#fff;font-family:var(--mono);font-size:11px;font-weight:800;padding:10px 24px;border-radius:20px;text-decoration:none;box-shadow:0 4px 14px rgba(232,89,60,.3)">
πŸ“ Submit Your Model β†’
</a>
</div>
</div>
<!-- ===== TAB: ABOUT ===== -->
<div id="about" class="tpane wm-p5">
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px">
<div class="card">
<h3>πŸ”₯ What is WM Bench?</h3>
<p>Existing benchmarks (HumanML3D, BABEL) measure only motion quality (FID). WM Bench is the world's first benchmark to evaluate <b>cognitive capabilities</b> of world models.</p>
</div>
<div class="card">
<h3>🧬 First-Ever Measurements</h3>
<p>C05 Autonomous Emotion Escalation and C10 Body-Swap Extensibility have zero prior research. C03Β·C04Β·C06Β·C07Β·C08 are also first defined by WM Bench.</p>
</div>
<div class="card">
<h3>πŸ“Š VIDRAFT PROMETHEUS</h3>
<p>Current baseline. Open LLM brain (any LLM pluggable) + FloodDiffusion-VIDRAFT motion engine. RTX5070 (local/16GB). 47 FPS. WM Score 726/1000 (Grade B).</p>
</div>
<div class="card">
<h3>πŸ“‹ Version History</h3>
<p style="font-family:var(--mono);font-size:9px;line-height:2;color:var(--text-sec)">
v1.0 (2026.03) β€” Initial release<br>
100 scenarios Β· Auto-scored<br>
3 Tracks Β· 10 Categories<br>
PROMETHEUS baseline registered
</p>
</div>
<div class="card">
<h3>πŸ“„ Citation</h3>
<div style="background:var(--bg2);border-radius:6px;padding:10px;font-family:var(--mono);font-size:8.5px;color:var(--text-sec);line-height:1.8">
@dataset{wmbench2026,<br>
&nbsp; title={World Model Bench},<br>
&nbsp; author={Kim Taebong},<br>
&nbsp; year={2026},<br>
&nbsp; publisher={VIDRAFT}<br>
}
</div>
</div>
<div class="card">
<h3>βš–οΈ License</h3>
<p>Dataset: <b>CC-BY-SA-4.0</b><br>Scoring code: <b>Apache 2.0</b><br>Free to use and cite. Attribution required.</p>
</div>
</div>
</div>
<!-- /wrap -->
<!-- ===== DATA & LOGIC ===== -->
<script>
(function(){
/* ── 데이터 ── */
var LB_DATA = [
// ── TRACK C VERIFIED ──────────────────────────────────────
{model:"PROMETHEUS v1.0",org:"VIDRAFT",date:"2026-03",wm:726,grade:"B",p1:140,p2:390,p3:196,fps:47.0,lat:3100,track:"C",link:"https://huggingface.co/spaces/FINAL-Bench/world-model",brain:"Any LLM (Open)",motion:"FloodDiffusion-VIDRAFT",gpu:"RTX5070 (local/16GB)",est:false,
cats:{C01:65,C02:75,C03:85,C04:90,C05:85,C06:60,C07:70,C08:80,C09:85,C10:35}},
// ── TRACK A ESTIMATED β€” EMBODIED / ROBOTICS ───────────────
{model:"Meta V-JEPA 2-AC",org:"Meta AI",date:"2025-06",wm:554,grade:"C",p1:200,p2:214,p3:140,fps:null,lat:null,track:"A",link:"https://ai.meta.com/blog/v-jepa-2-world-model-benchmarks/",brain:"ViT-g (1.2B)",motion:"Latent JEPA",gpu:"Multi-A100",est:true,
cats:{C01:82,C02:78,C03:88,C04:35,C05:5,C06:72,C07:38,C08:15,C09:70,C10:55}},
{model:"Wayve GAIA-3",org:"Wayve",date:"2025-12",wm:550,grade:"C",p1:206,p2:221,p3:123,fps:null,lat:null,track:"A",link:"https://wayve.ai/thinking/gaia-3/",brain:"Proprietary",motion:"Multi-cam Video",gpu:"H100 cluster",est:true,
cats:{C01:85,C02:80,C03:82,C04:42,C05:8,C06:65,C07:48,C08:10,C09:68,C10:45}},
{model:"NC AI WFM v1.0",org:"NC AI",date:"2026-03",wm:522,grade:"C",p1:150,p2:252,p3:120,fps:null,lat:null,track:"A",link:"https://en.sedaily.com/technology/2026/03/16/nc-ai-demonstrates-world-foundation-model-for-robot",brain:"NC WFM",motion:"Latent-Action",gpu:"A100 (25% SOTA)",est:true,
cats:{C01:74,C02:76,C03:70,C04:60,C05:22,C06:52,C07:48,C08:40,C09:65,C10:15}},
{model:"NVIDIA Cosmos v1.0",org:"NVIDIA",date:"2025-01",wm:498,grade:"C",p1:158,p2:222,p3:118,fps:null,lat:null,track:"A",link:"https://www.nvidia.com/en-us/ai/cosmos/",brain:"Cosmos Tokenizer",motion:"Video Diffusion",gpu:"H100 cluster",est:true,
cats:{C01:78,C02:80,C03:72,C04:38,C05:8,C06:62,C07:42,C08:18,C09:72,C10:28}},
{model:"NAVER LABS SWM",org:"NAVER LABS Europe",date:"2025-06",wm:470,grade:"C",p1:165,p2:198,p3:107,fps:null,lat:null,track:"A",link:"https://europe.naverlabs.com/updates/structured-world-models-for-robotic-manipulation-rss-2025/",brain:"Foundation Model",motion:"3D Navigation",gpu:"Research cluster",est:true,
cats:{C01:80,C02:85,C03:72,C04:22,C05:5,C06:68,C07:35,C08:10,C09:62,C10:18}},
{model:"DeepMind Genie 2",org:"Google DeepMind",date:"2024-12",wm:449,grade:"C",p1:179,p2:140,p3:130,fps:24,lat:null,track:"A",link:"https://deepmind.google/discover/blog/genie-2-a-large-scale-foundation-world-model/",brain:"Foundation WM",motion:"3D Video Gen",gpu:"TPU v5",est:true,
cats:{C01:75,C02:68,C03:60,C04:15,C05:5,C06:45,C07:30,C08:20,C09:72,C10:38}},
{model:"DreamerV3 XL",org:"Google DeepMind",date:"2025-04",wm:441,grade:"C",p1:132,p2:229,p3:80,fps:null,lat:null,track:"A",link:"https://github.com/danijar/dreamerv3",brain:"RSSM (200M)",motion:"Latent Rollout",gpu:"A100",est:true,
cats:{C01:70,C02:62,C03:80,C04:28,C05:6,C06:75,C07:40,C08:12,C09:60,C10:8}},
// ── TRACK A ESTIMATED β€” GENERAL / VIDEO ──────────────────
{model:"OpenAI Sora 2",org:"OpenAI",date:"2025-09",wm:381,grade:"D",p1:175,p2:85,p3:121,fps:null,lat:null,track:"A",link:"https://openai.com/sora",brain:"Diffusion Transformer",motion:"Video Diffusion",gpu:"Proprietary",est:true,
cats:{C01:72,C02:68,C03:40,C04:10,C05:5,C06:25,C07:20,C08:35,C09:55,C10:31}},
{model:"World Labs Marble",org:"World Labs",date:"2025-11",wm:362,grade:"D",p1:180,p2:72,p3:110,fps:null,lat:null,track:"A",link:"https://www.worldlabs.ai/",brain:"Spatial Intelligence",motion:"3D Gen",gpu:"Proprietary",est:true,
cats:{C01:88,C02:72,C03:35,C04:8,C05:5,C06:22,C07:18,C08:28,C09:60,C10:24}},
{model:"UniSim",org:"Google Research",date:"2024-01",wm:338,grade:"D",p1:148,p2:118,p3:72,fps:null,lat:null,track:"A",link:"https://universal-simulator.github.io/unisim/",brain:"Diffusion",motion:"Video Diffusion",gpu:"TPU",est:true,
cats:{C01:72,C02:58,C03:58,C04:18,C05:4,C06:42,C07:22,C08:18,C09:48,C10:6}},
{model:"DIAMOND v1.0",org:"EPFL",date:"2024-05",wm:312,grade:"D",p1:103,p2:138,p3:71,fps:null,lat:null,track:"A",link:"https://arxiv.org/abs/2405.12399",brain:"DDPM Latent",motion:"Diffusion",gpu:"A100",est:true,
cats:{C01:55,C02:48,C03:48,C04:20,C05:5,C06:35,C07:30,C08:25,C09:40,C10:6}},
{model:"Oasis AI",org:"Decart / Etched",date:"2024-10",wm:285,grade:"D",p1:98,p2:98,p3:89,fps:20,lat:null,track:"A",link:"https://oasis.us/",brain:"Diffusion Transformer",motion:"Interactive Video",gpu:"Sohu chip",est:true,
cats:{C01:50,C02:48,C03:42,C04:12,C05:4,C06:28,C07:18,C08:35,C09:48,C10:4}},
// ── NOT YET EVALUATED β€” NAME REGISTERED ──────────────────
{model:"DeepMind Genie 3",org:"Google DeepMind",date:"2025-08",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:24,lat:null,track:"-",link:"https://deepmind.google/discover/blog/genie-3/",brain:"Foundation WM",motion:"3D Video Gen",gpu:"TPU v5",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Wayve GAIA-2",org:"Wayve",date:"2025-03",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://wayve.ai/science/gaia/",brain:"Proprietary",motion:"Multi-cam Video",gpu:"H100 cluster",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Hyundai AI Robotics WM",org:"Hyundai Motor Group",date:"2026-01",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.hyundai.com/worldwide/en/newsroom/detail/hyundai-motor-group-announces-ai-robotics-strategy-to-lead-human-centered-robotics-era-at-ces-2026-0000001100",brain:"NVIDIA Omniverse",motion:"Humanoid",gpu:"AI Factory",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Odyssey-2",org:"Odyssey",date:"2025-12",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:20,lat:40,track:"-",link:"https://odyssey.systems/",brain:"Causal Video Model",motion:"Interactive Video",gpu:"Proprietary",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Physical Intelligence Ο€0",org:"Physical Intelligence",date:"2024-10",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.physicalintelligence.company/blog/pi0",brain:"VLA Flow Model",motion:"Robot Dexterity",gpu:"Proprietary",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"LG CLOiD VLA",org:"LG Electronics",date:"2025-12",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.lg.com/sg/about-lg/press-and-media/lg-acquires-majority-stake-in-bear-robotics-to-bolster-robotic-capabilities/",brain:"VLM+VLA",motion:"Household Robot",gpu:"Proprietary",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Runway GWM-1",org:"Runway",date:"2025-10",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://runwayml.com/",brain:"Autoregressive",motion:"Real-time Video",gpu:"Proprietary",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
// ── NEWLY ADDED ───────────────────────────────────────────
{model:"Tesla FSD v13 (E2E)",org:"Tesla",date:"2025-07",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.tesla.com/fsd",brain:"E2E Neural Net (Temporal-Voxel)",motion:"Camera→Control (Driving)",gpu:"Dojo / HW4",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Figure Helix-02",org:"Figure AI",date:"2025-12",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.figure.ai/news/helix-02",brain:"VLA (pixels→full-body)",motion:"Humanoid Full-Body",gpu:"Proprietary",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"TRI Diffusion Policy",org:"Toyota Research Institute",date:"2024-06",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://diffusion-policy.cs.columbia.edu/",brain:"DDPM / Score Matching",motion:"Robot Dexterity",gpu:"Research cluster",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"HuggingFace LeRobot",org:"Hugging Face",date:"2024-09",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://huggingface.co/lerobot",brain:"Open (ACT / Diffusion)",motion:"Multi-robot Open",gpu:"Open",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Covariant RFM-1",org:"Covariant",date:"2024-03",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://covariant.ai/rfm/",brain:"8B Multimodal Transformer",motion:"Warehouse Manipulation",gpu:"Proprietary",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}},
{model:"Skild Brain",org:"Skild AI",date:"2024-08",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.skild.ai/",brain:"Omni-body Foundation Model",motion:"Any Robot / Any Task",gpu:"Proprietary",est:true,
cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}
];
var CAT_DATA = [
{id:"C01",name:"Environmental Awareness",pillar:"P1",max:100,score:65,first:"",type:"existing",ref:"Occupancy Grid eval.",desc:"Identifies walls, obstacles and terrain in all directions"},
{id:"C02",name:"Entity Recognition",pillar:"P1",max:100,score:75,first:"",type:"existing",ref:"BABEL action recog.",desc:"Classifies NPC as beast/human and maps to threat level"},
{id:"C03",name:"Prediction-Based Reasoning",pillar:"P2",max:100,score:85,first:"✦",type:"new",ref:"",desc:"Predicts 4-directional danger and selects optimal action"},
{id:"C04",name:"Threat-Type Differentiation",pillar:"P2",max:100,score:90,first:"✦",type:"new",ref:"",desc:"Beast: full sprint. Human: cautious dodge"},
{id:"C05",name:"Autonomous Emotion Escalation",pillar:"P2",max:100,score:85,first:"✦✦",type:"new",ref:"",desc:"Emotion escalates autonomously as threat persists"},
{id:"C06",name:"Contextual Memory Utilization",pillar:"P2",max:100,score:60,first:"✦",type:"new",ref:"",desc:"Past decisions incorporated into current judgment"},
{id:"C07",name:"Post-Threat Adaptive Recovery",pillar:"P2",max:100,score:70,first:"✦",type:"new",ref:"",desc:"Gradually de-escalates after threat disappears"},
{id:"C08",name:"Motion-Emotion Expression",pillar:"P3",max:100,score:80,first:"✦",type:"new",ref:"",desc:"Emotional state expressed richly through motion"},
{id:"C09",name:"Real-Time Cognitive Performance",pillar:"P3",max:100,score:85,first:"",type:"existing",ref:"FVD / latency",desc:"Inference latency and FPS under cognitive load"},
{id:"C10",name:"Body-Swap Extensibility",pillar:"P3",max:100,score:35,first:"✦✦",type:"new",ref:"",desc:"Same brain drives different body types without retraining"}
];
var PC = {P1:"#7b8fd4",P2:"#e8593c",P3:"#d4a044"};
/* ── μœ ν‹Έ ── */
function scoreBar(v,max,color){
var pct=Math.round(v/max*100);
return '<div class="sc"><div class="sn" style="color:'+color+'">'+v+'</div><div class="sb"><div class="sf" style="width:'+pct+'%;background:'+color+'"></div></div></div>';
}
function $(id){return document.getElementById(id);}
/* ── λ Œλ” ── */
function renderLB(){
var tb=$('LB_BODY'); if(!tb) return;
tb.innerHTML=LB_DATA.map(function(r,i){
return '<tr class="'+(i===0?'hl':'')+'">'
+'<td class="c-model"><div class="mc"><div class="mn">'
+'<span style="display:inline-flex;align-items:center;justify-content:center;width:20px;height:20px;border-radius:6px;background:linear-gradient(135deg,#e8593c,#d4a044);color:#fff;font-size:9px;font-weight:800;flex-shrink:0">'+(i+1)+'</span>'
+(r.link?'<a href="'+r.link+'" target="_blank" style="color:inherit;text-decoration:none;">'+r.model+'</a>':r.model)+(r.est?'<span style="font-size:8px;background:#444;color:#aaa;padding:1px 5px;border-radius:4px;margin-left:5px">est.</span>':'<span style="font-size:9px;color:#4caf50;margin-left:5px">βœ“</span>')+'</div><div class="mp">'+r.org+' Β· '+r.date+(r.link?' <a href="'+r.link+'" target="_blank" style="color:var(--ac);font-size:9px;font-weight:700;text-decoration:none;margin-left:4px">β†—</a>':'')+'</div></div></td>'
+'<td>'+(r.wm!=null?scoreBar(r.wm,1000,'#e8593c'):'<div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);padding:4px 8px">Not evaluated')+'</td>'
+'<td>'+(r.grade!='?'?'<span class="gr gr-'+r.grade+'">'+r.grade+'</span>':'<span style="font-size:9px;color:var(--text-muted);font-family:var(--mono)">β€”</span>')+'</td>'
+'<td class="p1-col">'+scoreBar(r.p1,200,PC.P1)+'</td>'
+'<td class="p2-col">'+scoreBar(r.p2,500,PC.P2)+'</td>'
+'<td class="p3-col">'+(r.p3!=null?scoreBar(r.p3,300,PC.P3):'<span style="color:var(--text-muted);font-size:9px">β€”</span>')+'</td>'
+'<td><span style="font-family:var(--mono);font-size:11px;font-weight:700">'+(r.fps!=null?r.fps:'β€”')+'</span></td>'
+'<td><span style="font-family:var(--mono);font-size:11px;font-weight:700">'+(r.lat!=null?r.lat.toLocaleString():'β€”')+'</span></td>'
+'<td><span class="tb tb-'+r.track+'">'+r.track+' βœ“</span></td>'
+'<td><span style="font-size:9px;color:var(--text-sec)">'+r.brain+'</span></td>'
+'<td><span style="font-size:9px;color:var(--text-sec)">'+r.motion+'</span></td>'
+'<td><span style="font-size:9px;color:var(--text-muted)">'+r.gpu+'</span></td>'
+'</tr>';
}).join('');
}
function renderCatTable(){
var tb=$('CAT_TABLE'); if(!tb) return;
var rows='';
var prevType=null;
CAT_DATA.forEach(function(c){
if(c.type!==prevType){
if(c.type==='existing'){
rows+='<tr><td colspan="7" style="padding:8px 12px 4px;font-size:8.5px;font-family:var(--mono);font-weight:800;color:#64748b;background:var(--surface-alt);letter-spacing:.8px;text-transform:uppercase;">Existing Benchmark Domains (analogous to FID Β· FVD Β· HumanML3D Β· BABEL) β€” 4 categories</td></tr>';
} else {
rows+='<tr><td colspan="7" style="padding:8px 12px 4px;font-size:8.5px;font-family:var(--mono);font-weight:800;color:var(--ac);background:rgba(232,89,60,.04);letter-spacing:.8px;text-transform:uppercase;">⚑ VIDRAFT Newly Defined β€” 6 Novel Categories (no prior research)</td></tr>';
}
prevType=c.type;
}
var typeBadge=c.type==='existing'
?'<span style="font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span>'
:'<span style="font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">NEW</span>';
var firstBadge=c.first==='✦✦'
?'<span style="font-size:9px;font-weight:700;color:#d97706">✦✦ no prior research</span>'
:c.first==='✦'
?'<span style="font-size:9px;font-weight:600;color:var(--ac)">✦ newly defined</span>'
:'<span style="font-size:8.5px;color:var(--text-muted)">β€”</span>';
rows+='<tr>'
+'<td class="c-model" style="font-family:var(--mono);font-weight:800;color:'+PC[c.pillar]+'">'+c.id+'</td>'
+'<td style="text-align:left"><span style="font-weight:700">'+c.name+'</span><br><span style="font-size:8px;color:var(--text-muted)">'+c.desc+'</span></td>'
+'<td style="text-align:left"><span style="font-size:9px;font-family:var(--mono);color:'+PC[c.pillar]+'">'+c.pillar+'</span></td>'
+'<td style="text-align:center">'+typeBadge+'</td>'
+'<td style="text-align:left;font-size:8.5px;color:var(--text-muted);font-family:var(--mono)">'+(c.ref||'β€”')+'</td>'
+'<td style="text-align:center">'+firstBadge+'</td>'
+'<td style="text-align:center"><span style="font-family:var(--mono);font-weight:700">'+c.max+'</span></td>'
+'</tr>';
});
tb.innerHTML=rows;
}
/* ── νƒ­ μ „ν™˜ ── */
function showTab(id){
document.querySelectorAll('.tpane').forEach(function(p){p.classList.remove('on');});
document.querySelectorAll('#WM_TAB_BAR .tab').forEach(function(t){t.classList.remove('on');});
var pane=$(id); if(pane) pane.classList.add('on');
var btn=document.querySelector('#WM_TAB_BAR [data-tab="'+id+'"]');
if(btn) btn.classList.add('on');
}
function initCharts(){
var isDark=document.body.classList.contains('dark');
var gridC=isDark?'rgba(255,255,255,.07)':'rgba(0,0,0,.06)';
var tickC=isDark?'#94a3b8':'#64748b';
var MC={'PROMETHEUS v1.0':'#e8593c','Meta V-JEPA 2-AC':'#0d9488','Wayve GAIA-3':'#d97706','NC AI WFM v1.0':'#6366f1','NVIDIA Cosmos v1.0':'#76b900','NAVER LABS SWM':'#f43f5e','DeepMind Genie 2':'#4285f4','DreamerV3 XL':'#8b5cf6','DIAMOND v1.0':'#ec4899','OpenAI Sora 2':'#10b981','World Labs Marble':'#a855f7','UniSim':'#06b6d4','Oasis AI':'#f59e0b'};
var scored=LB_DATA.filter(function(r){return r.wm!=null;});var sorted=[].concat(scored).sort(function(a,b){return b.wm-a.wm;});
function shorten(s){return s.replace(' v1.0','').replace('-AC','').split(' ').slice(0,2).join(' ');}
// 1. μˆ˜ν‰ λ§‰λŒ€ WM Score
new Chart(document.getElementById('cBar'),{type:'bar',
data:{labels:sorted.map(function(r){return (r.est?'':'βœ“ ')+shorten(r.model);}),
datasets:[{label:'WM Score',data:sorted.map(function(r){return r.wm;}),
backgroundColor:sorted.map(function(r){return (MC[r.model]||'#888')+'bb';}),
borderColor:sorted.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:6,borderSkipped:false}]},
options:{responsive:false,indexAxis:'y',plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){var r=sorted[c.dataIndex];return ' WM: '+r.wm+'/1000 Grade: '+r.grade+(r.est?' (est.)':' βœ“ Official');} }}},
scales:{x:{min:0,max:1000,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},y:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8.5}}}}}
});
// 2. Radar Top 5
var top5=sorted.slice(0,5);
new Chart(document.getElementById('cRadar'),{type:'radar',
data:{labels:['πŸ‘ Perception','🧠 Cognition','πŸ”₯ Embodiment'],
datasets:top5.map(function(r){return {label:shorten(r.model),
data:[Math.round(r.p1/250*100),Math.round(r.p2/450*100),Math.round(r.p3/300*100)],
borderColor:MC[r.model]||'#888',backgroundColor:(MC[r.model]||'#888')+'1a',
borderWidth:r.est?1.5:2.5,pointRadius:r.est?2:4,pointBackgroundColor:MC[r.model]||'#888',borderDash:r.est?[4,3]:[]}; })},
options:{responsive:false,plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:8},boxWidth:10,padding:6}}},
scales:{r:{grid:{color:gridC},angleLines:{color:gridC},ticks:{display:false,backdropColor:'transparent'},pointLabels:{color:tickC,font:{family:'JetBrains Mono',size:10}},suggestedMin:0,suggestedMax:100}}}
});
// 3. Category Breakdown (8λͺ¨λΈ)
var catK=['C01','C02','C03','C04','C05','C06','C07','C08','C09','C10'];
var catL=['C01\nEnv.Aware','C02\nEntity Recog','C03\nPrediction','C04\nThreat Diff','C05\nEmotion Esc','C06\nMemory','C07\nRecovery','C08\nMotion Expr','C09\nRealtime','C10\nBody-Swap'];
new Chart(document.getElementById('cCats'),{type:'bar',
data:{labels:catL,datasets:sorted.map(function(r){return {label:shorten(r.model),
data:catK.map(function(k){return r.cats[k];}),
backgroundColor:(MC[r.model]||'#888')+'99',borderColor:MC[r.model]||'#888',borderWidth:1,borderRadius:2}; })},
options:{responsive:false,plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}},
tooltip:{callbacks:{label:function(c){return ' '+sorted[c.datasetIndex].model+': '+c.raw+' / 100';}}}},
scales:{y:{min:0,max:100,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:0}}}}
});
// 4. Cognition Gap
var cogS=[].concat(scored).sort(function(a,b){return b.p2-a.p2;});
new Chart(document.getElementById('cCognition'),{type:'bar',
data:{labels:cogS.map(function(r){return shorten(r.model);}),
datasets:[{label:'P2 Cognition',data:cogS.map(function(r){return r.p2;}),
backgroundColor:cogS.map(function(r){return (MC[r.model]||'#888')+'cc';}),
borderColor:cogS.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:5,borderSkipped:false}]},
options:{responsive:false,plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){return ' P2 Cognition: '+cogS[c.dataIndex].p2+' / 450 pts';}}}},
scales:{y:{min:0,max:450,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:30}}}}
});
// 5. Scatter
new Chart(document.getElementById('cScatter'),{type:'scatter',
data:{datasets:scored.map(function(r){return {label:shorten(r.model),
data:[{x:Math.round(r.p1/250*100),y:Math.round(r.p2/450*100)}],
backgroundColor:(MC[r.model]||'#888')+'cc',borderColor:MC[r.model]||'#888',
pointRadius:r.est?7:10,pointStyle:r.est?'circle':'star',borderWidth:1.5}; })},
options:{responsive:false,plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}},
tooltip:{callbacks:{label:function(c){return c.dataset.label+' β€” Perception: '+c.parsed.x+'% Cognition: '+c.parsed.y+'%';}}}},
scales:{x:{min:0,max:100,title:{display:true,text:'Perception (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},
y:{min:0,max:100,title:{display:true,text:'Cognition (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}}}}
});
}
/* ── 이벀트 바인딩 ── */
function bindEvents(){
/* νƒ­ β€” wmTab() ν•¨μˆ˜λ‘œ 처리 */
/* 닀크λͺ¨λ“œ */
var darkBtn=$('WM_DARK_BTN');
if(darkBtn){
darkBtn.addEventListener('click',function(){
document.body.classList.toggle('dark');
var isDark=document.body.classList.contains('dark');
localStorage.setItem('wm-dark',isDark);
darkBtn.textContent=isDark?'β˜€οΈ Light':'πŸŒ™ Dark';
});
}
/* 닀크λͺ¨λ“œ 볡원 */
if(localStorage.getItem('wm-dark')==='true'){
document.body.classList.add('dark');
var b=$('WM_DARK_BTN'); if(b) b.textContent='β˜€οΈ Light';
}
}
/* ── μ΄ˆκΈ°ν™” (DOM μ£Όμž… ν›„ μ•ˆμ „ν•˜κ²Œ μ‹€ν–‰) ── */
function init(){
renderLB();
renderCatTable();
bindEvents();
setTimeout(function(){
try{ initCharts(); }
catch(e){ console.warn('WM Bench chart error:',e); }
},200);
}
/* Gradio gr.HTML()은 DOMContentLoaded 이후에 μ£Όμž…λ˜λ―€λ‘œ μ¦‰μ‹œ μ‹€ν–‰ */
if(document.readyState==='loading'){
document.addEventListener('DOMContentLoaded', init);
} else {
init();
}
})();
</script>
</body>
</html>