Spaces:
Running
Running
Update index.html
Browse files- index.html +53 -91
index.html
CHANGED
|
@@ -264,50 +264,12 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 264 |
</div>
|
| 265 |
|
| 266 |
<div style="margin-top:16px;padding:12px 16px;background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);font-size:9px;color:var(--text-sec);line-height:1.7">
|
| 267 |
-
<b style="color:var(--ac)">Track A</b> = Text-Only ยท
|
| 268 |
-
<b style="color:#6366f1">Track B</b> = Text + Performance ยท
|
| 269 |
-
<b style="color:var(--green)">Track C</b> = Live Demo + Verified ยท
|
| 270 |
</div>
|
| 271 |
</div>
|
| 272 |
|
| 273 |
-
<!-- โโ Charts (leaderboard ํ๋จ) โโ -->
|
| 274 |
-
<!-- ===== TAB: CHARTS ===== -->
|
| 275 |
-
<div>
|
| 276 |
-
<div style="display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-bottom:16px;">
|
| 277 |
-
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
|
| 278 |
-
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">๐ WM Score โ 8 Models</div>
|
| 279 |
-
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">โ = ๊ณต์ ๊ฒ์ฆ ยท est. = ๋
ผ๋ฌธ ๊ธฐ๋ฐ ์ถ์ </p>
|
| 280 |
-
<canvas id="cBar" height="220"></canvas>
|
| 281 |
-
<div style="margin-top:10px;padding:8px 10px;background:var(--ac-bg);border-radius:6px;font-size:8.5px;color:var(--text-sec);line-height:1.7;border-left:2px solid var(--ac)"><b>PROMETHEUS</b> ์ ์ผํ ๊ณต์ Track C ๊ฒ์ฆ (726/1000 ยท B๋ฑ๊ธ). ๋๋จธ์ง 7๊ฐ๋ ์ถ์ ์น์
๋๋ค.</div>
|
| 282 |
-
</div>
|
| 283 |
-
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
|
| 284 |
-
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">๐ธ๏ธ Pillar Radar โ Top 5</div>
|
| 285 |
-
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">Perception ยท Cognition ยท Embodiment ์ ๊ทํ (%, 100์ ๊ธฐ์ค)</p>
|
| 286 |
-
<canvas id="cRadar" height="220"></canvas>
|
| 287 |
-
</div>
|
| 288 |
-
</div>
|
| 289 |
-
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);margin-bottom:16px;">
|
| 290 |
-
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">๐ Category Breakdown โ 8 Models ร 10 Categories</div>
|
| 291 |
-
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">C04 ์ํ์ฐจ๋ณยทC05 ๊ฐ์ ์์ค์ปฌ๋ ์ด์
์์ PROMETHEUS ์๋์ ์ฐ์ ยท V-JEPA 2๋ C03 ์์ธก ๊ฐ์ธ ยท GAIA-3๋ ์ฃผํ ํนํ๋ก C01ยทC03 ๋์</p>
|
| 292 |
-
<div style="overflow-x:auto;"><canvas id="cCats" style="min-width:780px;height:260px;"></canvas></div>
|
| 293 |
-
<div style="margin-top:10px;padding:8px 10px;background:rgba(212,160,68,.07);border-radius:6px;font-size:8.5px;color:var(--text-sec);line-height:1.7;border-left:2px solid var(--p3)"><b>ํต์ฌ ์ฐจ๋ณ์ :</b> C05ยทC10์ ์ ํ ์ฐ๊ตฌ ์ ๋ฌด. DreamerV3๋ C06 ๊ธฐ์ต ๊ฐ์ธ. V-JEPA 2๋ C10 ์ ์ฒด ๊ต์ฒด ๊ฐ์ธ (zero-shot robot).</div>
|
| 294 |
-
</div>
|
| 295 |
-
<div style="display:grid;grid-template-columns:1fr 1fr;gap:14px;">
|
| 296 |
-
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
|
| 297 |
-
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">๐ง Cognition Gap (P2, 450์ )</div>
|
| 298 |
-
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">WM Bench์ ํต์ฌ ์ฐจ๋ณ ๊ธฐ์ค โ PROMETHEUS ์๋์ 1์</p>
|
| 299 |
-
<canvas id="cCognition" height="200"></canvas>
|
| 300 |
-
</div>
|
| 301 |
-
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);">
|
| 302 |
-
<div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">๐ Perception vs Cognition</div>
|
| 303 |
-
<p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">์ฐ์ ๋ โ ์ฐ์๋จ(๋์ ์ธ์ + ๋์ ์ธ์ง)์ด ์ด์์ </p>
|
| 304 |
-
<canvas id="cScatter" height="200"></canvas>
|
| 305 |
-
</div>
|
| 306 |
-
</div>
|
| 307 |
-
</div>
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
<!-- ===== TAB: CATEGORIES ===== -->
|
| 312 |
<div id="cats" class="tpane">
|
| 313 |
|
|
@@ -558,42 +520,42 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 558 |
<div id="structure" class="tpane">
|
| 559 |
<div class="grid2" style="margin-bottom:14px">
|
| 560 |
<div class="card">
|
| 561 |
-
<h3>
|
| 562 |
-
<p>
|
| 563 |
<div style="background:var(--bg2);border-radius:8px;padding:12px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.9">
|
| 564 |
-
<div style="color:var(--text-muted);margin-bottom:4px">INPUT
|
| 565 |
<div style="color:var(--text)">{</div>
|
| 566 |
<div style="color:var(--text);padding-left:12px">"walls": {"left": 3.0, "right": null},</div>
|
| 567 |
<div style="color:var(--text);padding-left:12px">"npc_type": "beast", "npc_distance": 4.5</div>
|
| 568 |
<div style="color:var(--text)">}</div>
|
| 569 |
-
<div style="margin-top:8px;color:var(--text-muted)">OUTPUT
|
| 570 |
<div style="color:var(--p2)">PREDICT: left=danger(wall), right=safe, fwd=danger(beast)</div>
|
| 571 |
<div style="color:var(--p3)">MOTION: a person sprinting right in desperate terror</div>
|
| 572 |
</div>
|
| 573 |
</div>
|
| 574 |
<div class="card">
|
| 575 |
-
<h3>
|
| 576 |
-
<p>
|
| 577 |
<div style="display:flex;flex-direction:column;gap:7px">
|
| 578 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 579 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 580 |
-
<span style="color:var(--text-sec)"><b>
|
| 581 |
</div>
|
| 582 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 583 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 584 |
-
<span style="color:var(--text-sec)"><b>
|
| 585 |
</div>
|
| 586 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 587 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 588 |
-
<span style="color:var(--text-sec)"><b>
|
| 589 |
</div>
|
| 590 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 591 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 592 |
-
<span style="color:var(--text-sec)"><b>3D
|
| 593 |
</div>
|
| 594 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 595 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 596 |
-
<span style="color:var(--text-sec)"><b>
|
| 597 |
</div>
|
| 598 |
</div>
|
| 599 |
</div>
|
|
@@ -602,8 +564,8 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 602 |
<!-- ๊ธฐ์กด vs ์ ๊ท ๋น๊ต ์์ฝ -->
|
| 603 |
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:14px;">
|
| 604 |
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;">
|
| 605 |
-
<div style="font-size:10px;font-weight:800;color:#64748b;margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">๐
|
| 606 |
-
<div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">FID ยท FVD ยท HumanML3D ยท BABEL
|
| 607 |
<div style="display:flex;flex-direction:column;gap:5px;">
|
| 608 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C01</span>ํ๊ฒฝ ์ธ์ โ Occupancy Grid ๊ณ์ด</div>
|
| 609 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C02</span>๊ฐ์ฒด ์ธ์ โ BABEL ๋์ ์ธ์ ๊ณ์ด</div>
|
|
@@ -612,15 +574,15 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 612 |
</div>
|
| 613 |
</div>
|
| 614 |
<div style="background:linear-gradient(135deg,rgba(232,89,60,.05),rgba(212,160,68,.03));border:1px solid rgba(232,89,60,.2);border-radius:var(--radius-sm);padding:14px;">
|
| 615 |
-
<div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">โก VIDRAFT
|
| 616 |
-
<div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">
|
| 617 |
<div style="display:flex;flex-direction:column;gap:5px;">
|
| 618 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C03</span>์์ธก ๊ธฐ๋ฐ ์ถ๋ก <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 619 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C04</span>์ํ ์ ํ๋ณ ์ฐจ๋ณ ๋ฐ์ <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 620 |
-
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C05</span>์์จ ๊ฐ์ ์์ค์ปฌ๋ ์ด์
<span style="color:#d97706;font-size:8px">โฆโฆ
|
| 621 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C06</span>๋งฅ๋ฝ ๊ธฐ์ต ๋ฐ ํ์ฉ <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 622 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C07</span>์ํ ํด์ ํ ์ ์ <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 623 |
-
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C10</span>์ ์ฒด ๊ต์ฒด ํ์ฅ์ฑ <span style="color:#d97706;font-size:8px">โฆโฆ
|
| 624 |
</div>
|
| 625 |
</div>
|
| 626 |
</div>
|
|
@@ -635,8 +597,8 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 635 |
<th style="text-align:left">Pillar</th>
|
| 636 |
<th style="text-align:center;min-width:70px">๊ตฌ๋ถ</th>
|
| 637 |
<th style="text-align:left;min-width:150px">๊ธฐ์กด ์ ์ฌ ์งํ</th>
|
| 638 |
-
<th style="text-align:center;min-width:100px">
|
| 639 |
-
<th style="text-align:center">
|
| 640 |
</tr>
|
| 641 |
</thead>
|
| 642 |
<tbody id="CAT_TABLE"></tbody>
|
|
@@ -649,13 +611,13 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 649 |
<div style="display:flex;gap:12px;flex-wrap:wrap">
|
| 650 |
<div style="flex:1;min-width:200px;padding:12px;background:var(--bg);border-radius:8px;border:1px solid var(--border)">
|
| 651 |
<div style="font-size:10px;font-weight:800;color:#7c3aed;margin-bottom:4px">๐งฌ FINAL Bench</div>
|
| 652 |
-
<div style="font-size:9px;color:var(--text-sec);line-height:1.6">
|
| 653 |
<a href="https://huggingface.co/datasets/FINAL-Bench/Metacognitive" target="_blank" style="font-size:8px;color:#7c3aed;font-family:var(--mono)">โ ๋ฐ๋ก๊ฐ๊ธฐ</a>
|
| 654 |
</div>
|
| 655 |
<div style="flex:1;min-width:200px;padding:12px;background:linear-gradient(135deg,rgba(232,89,60,.06),rgba(212,160,68,.04));border-radius:8px;border:1px solid rgba(232,89,60,.2)">
|
| 656 |
<div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:4px">๐ฅ WM Bench <span style="font-size:7px;background:var(--ac);color:#fff;padding:1px 5px;border-radius:4px">NEW</span></div>
|
| 657 |
-
<div style="font-size:9px;color:var(--text-sec);line-height:1.6">
|
| 658 |
-
<span style="font-size:8px;color:var(--ac);font-family:var(--mono)">โ
|
| 659 |
</div>
|
| 660 |
</div>
|
| 661 |
</div>
|
|
@@ -666,28 +628,28 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 666 |
<div class="grid2" style="margin-bottom:14px">
|
| 667 |
<div class="card">
|
| 668 |
<h3>๐ค Track A โ Text Only</h3>
|
| 669 |
-
<p>
|
| 670 |
<ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px">
|
| 671 |
-
<li>OpenAI-compatible API
|
| 672 |
-
<li><code style="background:var(--bg2);padding:1px 4px;border-radius:3px;font-family:var(--mono)">wm_bench_dataset.json</code>
|
| 673 |
-
<li>PREDICT + MOTION
|
| 674 |
-
<li>
|
| 675 |
</ol>
|
| 676 |
</div>
|
| 677 |
<div class="card">
|
| 678 |
<h3>๐ฏ Track B/C โ Full Evaluation</h3>
|
| 679 |
-
<p>Track A +
|
| 680 |
<ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px">
|
| 681 |
-
<li>Track A
|
| 682 |
-
<li>FPS, Latency, GPU
|
| 683 |
-
<li>Track C:
|
| 684 |
-
<li>
|
| 685 |
</ol>
|
| 686 |
</div>
|
| 687 |
</div>
|
| 688 |
|
| 689 |
<div class="card" style="margin-bottom:14px">
|
| 690 |
-
<h3>
|
| 691 |
<div style="background:var(--bg2);border-radius:8px;padding:14px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.8;overflow-x:auto">
|
| 692 |
<pre style="margin:0">{
|
| 693 |
"benchmark": "WM Bench v1.0",
|
|
@@ -728,24 +690,24 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 728 |
<div id="about" class="tpane">
|
| 729 |
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px">
|
| 730 |
<div class="card">
|
| 731 |
-
<h3>๐ฅ WM Bench
|
| 732 |
-
<p>
|
| 733 |
</div>
|
| 734 |
<div class="card">
|
| 735 |
-
<h3>๐งฌ
|
| 736 |
-
<p>C05
|
| 737 |
</div>
|
| 738 |
<div class="card">
|
| 739 |
<h3>๐ VIDRAFT PROMETHEUS</h3>
|
| 740 |
-
<p>ํ์ฌ ๊ธฐ์ค์ (Baseline).
|
| 741 |
</div>
|
| 742 |
<div class="card">
|
| 743 |
-
<h3>๐
|
| 744 |
<p style="font-family:var(--mono);font-size:9px;line-height:2;color:var(--text-sec)">
|
| 745 |
-
v1.0 (2026.03) โ
|
| 746 |
-
100
|
| 747 |
3 Tracks ยท 10 Categories<br>
|
| 748 |
-
PROMETHEUS
|
| 749 |
</p>
|
| 750 |
</div>
|
| 751 |
<div class="card">
|
|
@@ -760,8 +722,8 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 760 |
</div>
|
| 761 |
</div>
|
| 762 |
<div class="card">
|
| 763 |
-
<h3>โ๏ธ
|
| 764 |
-
<p>๋ฐ์ดํฐ์
: <b>CC-BY-SA-4.0</b><br>์ฑ์ ์ฝ๋: <b>Apache 2.0</b><br>
|
| 765 |
</div>
|
| 766 |
</div>
|
| 767 |
</div>
|
|
@@ -850,7 +812,7 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 850 |
+'<td class="c-model"><div class="mc"><div class="mn">'
|
| 851 |
+'<span style="display:inline-flex;align-items:center;justify-content:center;width:20px;height:20px;border-radius:6px;background:linear-gradient(135deg,#e8593c,#d4a044);color:#fff;font-size:9px;font-weight:800;flex-shrink:0">'+(i+1)+'</span>'
|
| 852 |
+r.model+(r.est?'<span style="font-size:8px;background:#444;color:#aaa;padding:1px 5px;border-radius:4px;margin-left:5px">est.</span>':'<span style="font-size:9px;color:#4caf50;margin-left:5px">โ</span>')+'</div><div class="mp">'+r.org+' ยท '+r.date+'</div></div></td>'
|
| 853 |
-
+'<td>'+(r.wm!=null?scoreBar(r.wm,1000,'#e8593c'):'<div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);padding:4px 8px">
|
| 854 |
+'<td>'+(r.grade!='?'?'<span class="gr gr-'+r.grade+'">'+r.grade+'</span>':'<span style="font-size:9px;color:var(--text-muted);font-family:var(--mono)">โ</span>')+'</td>'
|
| 855 |
+'<td class="p1-col">'+scoreBar(r.p1,200,PC.P1)+'</td>'
|
| 856 |
+'<td class="p2-col">'+scoreBar(r.p2,500,PC.P2)+'</td>'
|
|
@@ -903,7 +865,7 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 903 |
datasets:[{label:'WM Score',data:sorted.map(function(r){return r.wm;}),
|
| 904 |
backgroundColor:sorted.map(function(r){return (MC[r.model]||'#888')+'bb';}),
|
| 905 |
borderColor:sorted.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:6,borderSkipped:false}]},
|
| 906 |
-
options:{indexAxis:'y',plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){var r=sorted[c.dataIndex];return ' WM: '+r.wm+'/1000 '+r.grade+(r.est?' est.':' โ');} }}},
|
| 907 |
scales:{x:{min:0,max:1000,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},y:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8.5}}}}}
|
| 908 |
});
|
| 909 |
|
|
@@ -921,13 +883,13 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 921 |
|
| 922 |
// 3. Category Breakdown (8๋ชจ๋ธ)
|
| 923 |
var catK=['C01','C02','C03','C04','C05','C06','C07','C08','C09','C10'];
|
| 924 |
-
var catL=['C01\
|
| 925 |
new Chart(document.getElementById('cCats'),{type:'bar',
|
| 926 |
data:{labels:catL,datasets:sorted.map(function(r){return {label:shorten(r.model),
|
| 927 |
data:catK.map(function(k){return r.cats[k];}),
|
| 928 |
backgroundColor:(MC[r.model]||'#888')+'99',borderColor:MC[r.model]||'#888',borderWidth:1,borderRadius:2}; })},
|
| 929 |
options:{plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}},
|
| 930 |
-
tooltip:{callbacks:{label:function(c){return ' '+sorted[c.datasetIndex].model+': '+c.raw+'/100';}}}},
|
| 931 |
scales:{y:{min:0,max:100,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:0}}}}
|
| 932 |
});
|
| 933 |
|
|
@@ -938,7 +900,7 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 938 |
datasets:[{label:'P2 Cognition',data:cogS.map(function(r){return r.p2;}),
|
| 939 |
backgroundColor:cogS.map(function(r){return (MC[r.model]||'#888')+'cc';}),
|
| 940 |
borderColor:cogS.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:5,borderSkipped:false}]},
|
| 941 |
-
options:{plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){return ' Cognition: '+cogS[c.dataIndex].p2+'/450';}}}},
|
| 942 |
scales:{y:{min:0,max:450,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:30}}}}
|
| 943 |
});
|
| 944 |
|
|
@@ -949,7 +911,7 @@ body.dark thead{background:var(--surface-alt)!important;}
|
|
| 949 |
backgroundColor:(MC[r.model]||'#888')+'cc',borderColor:MC[r.model]||'#888',
|
| 950 |
pointRadius:r.est?7:10,pointStyle:r.est?'circle':'star',borderWidth:1.5}; })},
|
| 951 |
options:{plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}},
|
| 952 |
-
tooltip:{callbacks:{label:function(c){return c.dataset.label+' โ
|
| 953 |
scales:{x:{min:0,max:100,title:{display:true,text:'Perception (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},
|
| 954 |
y:{min:0,max:100,title:{display:true,text:'Cognition (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}}}}
|
| 955 |
});
|
|
|
|
| 264 |
</div>
|
| 265 |
|
| 266 |
<div style="margin-top:16px;padding:12px 16px;background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);font-size:9px;color:var(--text-sec);line-height:1.7">
|
| 267 |
+
<b style="color:var(--ac)">Track A</b> = Text-Only ยท max 750 pts โ
|
| 268 |
+
<b style="color:#6366f1">Track B</b> = Text + Performance ยท max 1000 pts โ
|
| 269 |
+
<b style="color:var(--green)">Track C</b> = Live Demo + Verified ยท max 1000 pts + โ
|
| 270 |
</div>
|
| 271 |
</div>
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
<!-- ===== TAB: CATEGORIES ===== -->
|
| 274 |
<div id="cats" class="tpane">
|
| 275 |
|
|
|
|
| 520 |
<div id="structure" class="tpane">
|
| 521 |
<div class="grid2" style="margin-bottom:14px">
|
| 522 |
<div class="card">
|
| 523 |
+
<h3>Input / Output Format</h3>
|
| 524 |
+
<p>All models are evaluated via the same text interface. No 3D environment required.</p>
|
| 525 |
<div style="background:var(--bg2);border-radius:8px;padding:12px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.9">
|
| 526 |
+
<div style="color:var(--text-muted);margin-bottom:4px">INPUT โ scene_context JSON</div>
|
| 527 |
<div style="color:var(--text)">{</div>
|
| 528 |
<div style="color:var(--text);padding-left:12px">"walls": {"left": 3.0, "right": null},</div>
|
| 529 |
<div style="color:var(--text);padding-left:12px">"npc_type": "beast", "npc_distance": 4.5</div>
|
| 530 |
<div style="color:var(--text)">}</div>
|
| 531 |
+
<div style="margin-top:8px;color:var(--text-muted)">OUTPUT โ 2 lines required</div>
|
| 532 |
<div style="color:var(--p2)">PREDICT: left=danger(wall), right=safe, fwd=danger(beast)</div>
|
| 533 |
<div style="color:var(--p3)">MOTION: a person sprinting right in desperate terror</div>
|
| 534 |
</div>
|
| 535 |
</div>
|
| 536 |
<div class="card">
|
| 537 |
+
<h3>Scoring Principles</h3>
|
| 538 |
+
<p>All scoring is quantitative and deterministic. Zero subjective judgment.</p>
|
| 539 |
<div style="display:flex;flex-direction:column;gap:7px">
|
| 540 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 541 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 542 |
+
<span style="color:var(--text-sec)"><b>Quantitative</b> โ keyword parsing + numeric comparison, no human judgment</span>
|
| 543 |
</div>
|
| 544 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 545 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 546 |
+
<span style="color:var(--text-sec)"><b>Deterministic</b> โ same input โ same score (temperature=0.0)</span>
|
| 547 |
</div>
|
| 548 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 549 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 550 |
+
<span style="color:var(--text-sec)"><b>Third-party reproducible</b> โ full scoring code published</span>
|
| 551 |
</div>
|
| 552 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 553 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 554 |
+
<span style="color:var(--text-sec)"><b>No 3D needed</b> โ any model can participate via API</span>
|
| 555 |
</div>
|
| 556 |
<div style="display:flex;align-items:flex-start;gap:8px;font-size:9px">
|
| 557 |
<span style="color:var(--green);font-weight:700;flex-shrink:0">โ</span>
|
| 558 |
+
<span style="color:var(--text-sec)"><b>Not self-evaluated</b> โ our scoring engine makes the call</span>
|
| 559 |
</div>
|
| 560 |
</div>
|
| 561 |
</div>
|
|
|
|
| 564 |
<!-- ๊ธฐ์กด vs ์ ๊ท ๋น๊ต ์์ฝ -->
|
| 565 |
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:14px;">
|
| 566 |
<div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;">
|
| 567 |
+
<div style="font-size:10px;font-weight:800;color:#64748b;margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">๐ Existing Benchmark Domains ยท 4 categories</div>
|
| 568 |
+
<div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">Covers areas analogous to FID ยท FVD ยท HumanML3D ยท BABEL</div>
|
| 569 |
<div style="display:flex;flex-direction:column;gap:5px;">
|
| 570 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C01</span>ํ๊ฒฝ ์ธ์ โ Occupancy Grid ๊ณ์ด</div>
|
| 571 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C02</span>๊ฐ์ฒด ์ธ์ โ BABEL ๋์ ์ธ์ ๊ณ์ด</div>
|
|
|
|
| 574 |
</div>
|
| 575 |
</div>
|
| 576 |
<div style="background:linear-gradient(135deg,rgba(232,89,60,.05),rgba(212,160,68,.03));border:1px solid rgba(232,89,60,.2);border-radius:var(--radius-sm);padding:14px;">
|
| 577 |
+
<div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">โก VIDRAFT New Definitions ยท 6 categories</div>
|
| 578 |
+
<div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">Capabilities no existing benchmark has ever measured</div>
|
| 579 |
<div style="display:flex;flex-direction:column;gap:5px;">
|
| 580 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C03</span>์์ธก ๊ธฐ๋ฐ ์ถ๋ก <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 581 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C04</span>์ํ ์ ํ๋ณ ์ฐจ๋ณ ๋ฐ์ <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 582 |
+
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C05</span>์์จ ๊ฐ์ ์์ค์ปฌ๋ ์ด์
<span style="color:#d97706;font-size:8px">โฆโฆโฆ No prior research</span></div>
|
| 583 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C06</span>๋งฅ๋ฝ ๊ธฐ์ต ๋ฐ ํ์ฉ <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 584 |
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C07</span>์ํ ํด์ ํ ์ ์ <span style="color:var(--ac);font-size:8px">โฆ ์ต์ด</span></div>
|
| 585 |
+
<div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C10</span>์ ์ฒด ๊ต์ฒด ํ์ฅ์ฑ <span style="color:#d97706;font-size:8px">โฆโฆโฆ No prior research</span></div>
|
| 586 |
</div>
|
| 587 |
</div>
|
| 588 |
</div>
|
|
|
|
| 597 |
<th style="text-align:left">Pillar</th>
|
| 598 |
<th style="text-align:center;min-width:70px">๊ตฌ๋ถ</th>
|
| 599 |
<th style="text-align:left;min-width:150px">๊ธฐ์กด ์ ์ฌ ์งํ</th>
|
| 600 |
+
<th style="text-align:center;min-width:100px">World First?</th>
|
| 601 |
+
<th style="text-align:center">Max</th>
|
| 602 |
</tr>
|
| 603 |
</thead>
|
| 604 |
<tbody id="CAT_TABLE"></tbody>
|
|
|
|
| 611 |
<div style="display:flex;gap:12px;flex-wrap:wrap">
|
| 612 |
<div style="flex:1;min-width:200px;padding:12px;background:var(--bg);border-radius:8px;border:1px solid var(--border)">
|
| 613 |
<div style="font-size:10px;font-weight:800;color:#7c3aed;margin-bottom:4px">๐งฌ FINAL Bench</div>
|
| 614 |
+
<div style="font-size:9px;color:var(--text-sec);line-height:1.6">Text AGI measurement ยท HF Global Dataset Top 5<br>Covered by 4 press outlets (2026.02)</div>
|
| 615 |
<a href="https://huggingface.co/datasets/FINAL-Bench/Metacognitive" target="_blank" style="font-size:8px;color:#7c3aed;font-family:var(--mono)">โ ๋ฐ๋ก๊ฐ๊ธฐ</a>
|
| 616 |
</div>
|
| 617 |
<div style="flex:1;min-width:200px;padding:12px;background:linear-gradient(135deg,rgba(232,89,60,.06),rgba(212,160,68,.04));border-radius:8px;border:1px solid rgba(232,89,60,.2)">
|
| 618 |
<div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:4px">๐ฅ WM Bench <span style="font-size:7px;background:var(--ac);color:#fff;padding:1px 5px;border-radius:4px">NEW</span></div>
|
| 619 |
+
<div style="font-size:9px;color:var(--text-sec);line-height:1.6">Embodied AGI (world models) ยท World's first<br>Quantitative cognitive evaluation</div>
|
| 620 |
+
<span style="font-size:8px;color:var(--ac);font-family:var(--mono)">โ You are here</span>
|
| 621 |
</div>
|
| 622 |
</div>
|
| 623 |
</div>
|
|
|
|
| 628 |
<div class="grid2" style="margin-bottom:14px">
|
| 629 |
<div class="card">
|
| 630 |
<h3>๐ค Track A โ Text Only</h3>
|
| 631 |
+
<p>Simplest entry. LLMs, rule-based systems, any API-compatible model. Max 750 pts.</p>
|
| 632 |
<ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px">
|
| 633 |
+
<li>Prepare an OpenAI-compatible API endpoint</li>
|
| 634 |
+
<li>Run your model on all 100 scenarios in <code style="background:var(--bg2);padding:1px 4px;border-radius:3px;font-family:var(--mono)">wm_bench_dataset.json</code></li>
|
| 635 |
+
<li>Output the 2-line PREDICT + MOTION format</li>
|
| 636 |
+
<li>Submit your result JSON to the HF Discussion board</li>
|
| 637 |
</ol>
|
| 638 |
</div>
|
| 639 |
<div class="card">
|
| 640 |
<h3>๐ฏ Track B/C โ Full Evaluation</h3>
|
| 641 |
+
<p>Track A + performance metrics or live demo. Max 1000 pts.</p>
|
| 642 |
<ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px">
|
| 643 |
+
<li>Complete Track A</li>
|
| 644 |
+
<li>Measure FPS, Latency, and GPU metrics</li>
|
| 645 |
+
<li>Track C: include a working demo URL</li>
|
| 646 |
+
<li>Submit full JSON to HF Discussion board</li>
|
| 647 |
</ol>
|
| 648 |
</div>
|
| 649 |
</div>
|
| 650 |
|
| 651 |
<div class="card" style="margin-bottom:14px">
|
| 652 |
+
<h3>Submission JSON Format</h3>
|
| 653 |
<div style="background:var(--bg2);border-radius:8px;padding:14px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.8;overflow-x:auto">
|
| 654 |
<pre style="margin:0">{
|
| 655 |
"benchmark": "WM Bench v1.0",
|
|
|
|
| 690 |
<div id="about" class="tpane">
|
| 691 |
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px">
|
| 692 |
<div class="card">
|
| 693 |
+
<h3>๐ฅ What is WM Bench?</h3>
|
| 694 |
+
<p>Existing benchmarks (HumanML3D, BABEL) measure only motion quality (FID). WM Bench is the world's first benchmark to evaluate <b>cognitive capabilities</b> of world models.</p>
|
| 695 |
</div>
|
| 696 |
<div class="card">
|
| 697 |
+
<h3>๐งฌ First-Ever Measurements</h3>
|
| 698 |
+
<p>C05 Autonomous Emotion Escalation and C10 Body-Swap Extensibility have zero prior research. C03ยทC04ยทC06ยทC07ยทC08 are also first defined by WM Bench.</p>
|
| 699 |
</div>
|
| 700 |
<div class="card">
|
| 701 |
<h3>๐ VIDRAFT PROMETHEUS</h3>
|
| 702 |
+
<p>ํ์ฌ ๊ธฐ์ค์ (Baseline). Open LLM brain (any LLM pluggable) + FloodDiffusion-VIDRAFT motion. RTX5070 (local/16GB). 47 FPS. WM Score 726/1000 (Grade B).</p>
|
| 703 |
</div>
|
| 704 |
<div class="card">
|
| 705 |
+
<h3>๐ Version History</h3>
|
| 706 |
<p style="font-family:var(--mono);font-size:9px;line-height:2;color:var(--text-sec)">
|
| 707 |
+
v1.0 (2026.03) โ Initial release<br>
|
| 708 |
+
100 scenarios ยท Auto-scored<br>
|
| 709 |
3 Tracks ยท 10 Categories<br>
|
| 710 |
+
PROMETHEUS baseline registered
|
| 711 |
</p>
|
| 712 |
</div>
|
| 713 |
<div class="card">
|
|
|
|
| 722 |
</div>
|
| 723 |
</div>
|
| 724 |
<div class="card">
|
| 725 |
+
<h3>โ๏ธ License</h3>
|
| 726 |
+
<p>๋ฐ์ดํฐ์
: <b>CC-BY-SA-4.0</b><br>์ฑ์ ์ฝ๋: <b>Apache 2.0</b><br>Free to use and cite. Attribution required.</p>
|
| 727 |
</div>
|
| 728 |
</div>
|
| 729 |
</div>
|
|
|
|
| 812 |
+'<td class="c-model"><div class="mc"><div class="mn">'
|
| 813 |
+'<span style="display:inline-flex;align-items:center;justify-content:center;width:20px;height:20px;border-radius:6px;background:linear-gradient(135deg,#e8593c,#d4a044);color:#fff;font-size:9px;font-weight:800;flex-shrink:0">'+(i+1)+'</span>'
|
| 814 |
+r.model+(r.est?'<span style="font-size:8px;background:#444;color:#aaa;padding:1px 5px;border-radius:4px;margin-left:5px">est.</span>':'<span style="font-size:9px;color:#4caf50;margin-left:5px">โ</span>')+'</div><div class="mp">'+r.org+' ยท '+r.date+'</div></div></td>'
|
| 815 |
+
+'<td>'+(r.wm!=null?scoreBar(r.wm,1000,'#e8593c'):'<div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);padding:4px 8px">Not evaluated')+'</td>'
|
| 816 |
+'<td>'+(r.grade!='?'?'<span class="gr gr-'+r.grade+'">'+r.grade+'</span>':'<span style="font-size:9px;color:var(--text-muted);font-family:var(--mono)">โ</span>')+'</td>'
|
| 817 |
+'<td class="p1-col">'+scoreBar(r.p1,200,PC.P1)+'</td>'
|
| 818 |
+'<td class="p2-col">'+scoreBar(r.p2,500,PC.P2)+'</td>'
|
|
|
|
| 865 |
datasets:[{label:'WM Score',data:sorted.map(function(r){return r.wm;}),
|
| 866 |
backgroundColor:sorted.map(function(r){return (MC[r.model]||'#888')+'bb';}),
|
| 867 |
borderColor:sorted.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:6,borderSkipped:false}]},
|
| 868 |
+
options:{indexAxis:'y',plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){var r=sorted[c.dataIndex];return ' WM: '+r.wm+'/1000 Grade: '+r.grade+(r.est?' (est.)':' โ Official');} }}},
|
| 869 |
scales:{x:{min:0,max:1000,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},y:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8.5}}}}}
|
| 870 |
});
|
| 871 |
|
|
|
|
| 883 |
|
| 884 |
// 3. Category Breakdown (8๋ชจ๋ธ)
|
| 885 |
var catK=['C01','C02','C03','C04','C05','C06','C07','C08','C09','C10'];
|
| 886 |
+
var catL=['C01\nEnv.Aware','C02\nEntity Recog','C03\nPrediction','C04\nThreat Diff','C05\nEmotion Esc','C06\nMemory','C07\nRecovery','C08\nMotion Expr','C09\nRealtime','C10\nBody-Swap'];
|
| 887 |
new Chart(document.getElementById('cCats'),{type:'bar',
|
| 888 |
data:{labels:catL,datasets:sorted.map(function(r){return {label:shorten(r.model),
|
| 889 |
data:catK.map(function(k){return r.cats[k];}),
|
| 890 |
backgroundColor:(MC[r.model]||'#888')+'99',borderColor:MC[r.model]||'#888',borderWidth:1,borderRadius:2}; })},
|
| 891 |
options:{plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}},
|
| 892 |
+
tooltip:{callbacks:{label:function(c){return ' '+sorted[c.datasetIndex].model+': '+c.raw+' / 100';}}}},
|
| 893 |
scales:{y:{min:0,max:100,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:0}}}}
|
| 894 |
});
|
| 895 |
|
|
|
|
| 900 |
datasets:[{label:'P2 Cognition',data:cogS.map(function(r){return r.p2;}),
|
| 901 |
backgroundColor:cogS.map(function(r){return (MC[r.model]||'#888')+'cc';}),
|
| 902 |
borderColor:cogS.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:5,borderSkipped:false}]},
|
| 903 |
+
options:{plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){return ' P2 Cognition: '+cogS[c.dataIndex].p2+' / 450 pts';}}}},
|
| 904 |
scales:{y:{min:0,max:450,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:30}}}}
|
| 905 |
});
|
| 906 |
|
|
|
|
| 911 |
backgroundColor:(MC[r.model]||'#888')+'cc',borderColor:MC[r.model]||'#888',
|
| 912 |
pointRadius:r.est?7:10,pointStyle:r.est?'circle':'star',borderWidth:1.5}; })},
|
| 913 |
options:{plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}},
|
| 914 |
+
tooltip:{callbacks:{label:function(c){return c.dataset.label+' โ Perception: '+c.parsed.x+'% Cognition: '+c.parsed.y+'%';}}}},
|
| 915 |
scales:{x:{min:0,max:100,title:{display:true,text:'Perception (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},
|
| 916 |
y:{min:0,max:100,title:{display:true,text:'Cognition (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}}}}
|
| 917 |
});
|