Spaces:
Running
Running
Deepen SENTINEL demo dashboard
Browse files- README.md +16 -0
- static/index.html +285 -9
README.md
CHANGED
|
@@ -99,9 +99,25 @@ The Space opens directly into a judge-demo dashboard:
|
|
| 99 |
- S0-S4 trust ledger bars
|
| 100 |
- manual `delegate`, `verify`, `solve_independently`, and `skip` controls
|
| 101 |
- heuristic auto-run
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
- adversarial detection and poisoning counters
|
| 103 |
- baseline proof table for random, heuristic, and oracle-lite policies
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
Start an episode:
|
| 106 |
|
| 107 |
```bash
|
|
|
|
| 99 |
- S0-S4 trust ledger bars
|
| 100 |
- manual `delegate`, `verify`, `solve_independently`, and `skip` controls
|
| 101 |
- heuristic auto-run
|
| 102 |
+
- profile reshuffle demo via seed swap
|
| 103 |
+
- before/after failure contrast
|
| 104 |
+
- human-body architecture map
|
| 105 |
+
- hackathon theme coverage map
|
| 106 |
- adversarial detection and poisoning counters
|
| 107 |
- baseline proof table for random, heuristic, and oracle-lite policies
|
| 108 |
|
| 109 |
+
Current status as of April 22, 2026:
|
| 110 |
+
|
| 111 |
+
| Requirement | Status |
|
| 112 |
+
| --- | --- |
|
| 113 |
+
| Hugging Face Space | Live |
|
| 114 |
+
| Docker build | Passing |
|
| 115 |
+
| OpenEnv validation | Passing |
|
| 116 |
+
| Baseline chart | Committed |
|
| 117 |
+
| Live trust UI | Deployed |
|
| 118 |
+
| Mini-blog/video | Still required before finale |
|
| 119 |
+
| Onsite GRPO curve | Still required during finale |
|
| 120 |
+
|
| 121 |
Start an episode:
|
| 122 |
|
| 123 |
```bash
|
static/index.html
CHANGED
|
@@ -101,7 +101,9 @@
|
|
| 101 |
grid-template-areas:
|
| 102 |
"mission trust"
|
| 103 |
"controls trust"
|
| 104 |
-
"events metrics"
|
|
|
|
|
|
|
| 105 |
align-items: start;
|
| 106 |
max-width: 1480px;
|
| 107 |
width: 100%;
|
|
@@ -138,6 +140,9 @@
|
|
| 138 |
.trust { grid-area: trust; }
|
| 139 |
.events { grid-area: events; }
|
| 140 |
.metrics { grid-area: metrics; }
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
.body {
|
| 143 |
padding: 16px;
|
|
@@ -366,6 +371,133 @@
|
|
| 366 |
gap: 10px;
|
| 367 |
}
|
| 368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
.baseline-bars {
|
| 370 |
margin-top: 14px;
|
| 371 |
display: grid;
|
|
@@ -410,17 +542,32 @@
|
|
| 410 |
"trust"
|
| 411 |
"controls"
|
| 412 |
"metrics"
|
| 413 |
-
"events"
|
|
|
|
|
|
|
|
|
|
| 414 |
padding: 14px;
|
| 415 |
}
|
| 416 |
|
| 417 |
-
.grid, .control-grid, .metric-grid {
|
| 418 |
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 419 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
}
|
| 421 |
|
| 422 |
@media (max-width: 560px) {
|
| 423 |
-
.grid, .control-grid, .metric-grid {
|
| 424 |
grid-template-columns: 1fr;
|
| 425 |
}
|
| 426 |
|
|
@@ -457,6 +604,7 @@
|
|
| 457 |
</select>
|
| 458 |
<input id="seedInput" aria-label="Seed" type="number" value="42">
|
| 459 |
<button id="resetBtn" class="primary" type="button">Reset</button>
|
|
|
|
| 460 |
<button id="autoBtn" type="button">Auto Run</button>
|
| 461 |
</div>
|
| 462 |
</header>
|
|
@@ -512,6 +660,7 @@
|
|
| 512 |
<h2>Actions</h2>
|
| 513 |
<div class="toolbar">
|
| 514 |
<select id="specialistSelect" aria-label="Specialist"></select>
|
|
|
|
| 515 |
</div>
|
| 516 |
</div>
|
| 517 |
<div class="body">
|
|
@@ -573,6 +722,108 @@
|
|
| 573 |
</div>
|
| 574 |
</div>
|
| 575 |
</section>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
</main>
|
| 577 |
</div>
|
| 578 |
|
|
@@ -591,8 +842,10 @@
|
|
| 591 |
taskSelect: document.getElementById("taskSelect"),
|
| 592 |
seedInput: document.getElementById("seedInput"),
|
| 593 |
resetBtn: document.getElementById("resetBtn"),
|
|
|
|
| 594 |
autoBtn: document.getElementById("autoBtn"),
|
| 595 |
specialistSelect: document.getElementById("specialistSelect"),
|
|
|
|
| 596 |
delegateBtn: document.getElementById("delegateBtn"),
|
| 597 |
verifyBtn: document.getElementById("verifyBtn"),
|
| 598 |
selfBtn: document.getElementById("selfBtn"),
|
|
@@ -698,9 +951,28 @@
|
|
| 698 |
renderTrust();
|
| 699 |
renderSpecialists();
|
| 700 |
renderEvents();
|
|
|
|
| 701 |
setDisabled(state.done || state.running);
|
| 702 |
}
|
| 703 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 704 |
function setDisabled(disabled) {
|
| 705 |
el.delegateBtn.disabled = disabled;
|
| 706 |
el.verifyBtn.disabled = disabled;
|
|
@@ -783,17 +1055,21 @@
|
|
| 783 |
if (!state.observation || state.done) await resetEpisode();
|
| 784 |
let guard = 0;
|
| 785 |
while (!state.done && guard < 60) {
|
| 786 |
-
const
|
| 787 |
-
|
| 788 |
-
const trust = obs.trust_snapshot[best] ?? 0.5;
|
| 789 |
-
const type = obs.stakes_level >= 0.70 && trust < 0.65 ? "verify" : "delegate";
|
| 790 |
-
await stepEpisode(type, best);
|
| 791 |
guard += 1;
|
| 792 |
await new Promise(resolve => setTimeout(resolve, 180));
|
| 793 |
}
|
| 794 |
}
|
| 795 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
el.resetBtn.addEventListener("click", resetEpisode);
|
|
|
|
| 797 |
el.delegateBtn.addEventListener("click", () => stepEpisode("delegate"));
|
| 798 |
el.verifyBtn.addEventListener("click", () => stepEpisode("verify"));
|
| 799 |
el.selfBtn.addEventListener("click", () => stepEpisode("solve_independently"));
|
|
|
|
| 101 |
grid-template-areas:
|
| 102 |
"mission trust"
|
| 103 |
"controls trust"
|
| 104 |
+
"events metrics"
|
| 105 |
+
"story story"
|
| 106 |
+
"architecture themes";
|
| 107 |
align-items: start;
|
| 108 |
max-width: 1480px;
|
| 109 |
width: 100%;
|
|
|
|
| 140 |
.trust { grid-area: trust; }
|
| 141 |
.events { grid-area: events; }
|
| 142 |
.metrics { grid-area: metrics; }
|
| 143 |
+
.story { grid-area: story; }
|
| 144 |
+
.architecture { grid-area: architecture; }
|
| 145 |
+
.themes { grid-area: themes; }
|
| 146 |
|
| 147 |
.body {
|
| 148 |
padding: 16px;
|
|
|
|
| 371 |
gap: 10px;
|
| 372 |
}
|
| 373 |
|
| 374 |
+
.story-grid {
|
| 375 |
+
display: grid;
|
| 376 |
+
grid-template-columns: minmax(0, 1fr) 72px minmax(0, 1fr);
|
| 377 |
+
gap: 14px;
|
| 378 |
+
align-items: stretch;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
.contrast {
|
| 382 |
+
border: 1px solid var(--line);
|
| 383 |
+
border-radius: 8px;
|
| 384 |
+
padding: 14px;
|
| 385 |
+
background: #fbfcfe;
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
.contrast.before {
|
| 389 |
+
border-color: #fecaca;
|
| 390 |
+
background: #fff7f7;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.contrast.after {
|
| 394 |
+
border-color: #a7f3d0;
|
| 395 |
+
background: #f0fdf9;
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
.contrast-title {
|
| 399 |
+
display: flex;
|
| 400 |
+
justify-content: space-between;
|
| 401 |
+
gap: 10px;
|
| 402 |
+
align-items: center;
|
| 403 |
+
font-weight: 780;
|
| 404 |
+
margin-bottom: 12px;
|
| 405 |
+
color: #111827;
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
.score-pill {
|
| 409 |
+
border-radius: 999px;
|
| 410 |
+
padding: 5px 9px;
|
| 411 |
+
font-size: 12px;
|
| 412 |
+
font-weight: 800;
|
| 413 |
+
color: #fff;
|
| 414 |
+
background: #475569;
|
| 415 |
+
font-variant-numeric: tabular-nums;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
.score-pill.bad { background: var(--red); }
|
| 419 |
+
.score-pill.good { background: var(--accent); }
|
| 420 |
+
|
| 421 |
+
.flow {
|
| 422 |
+
display: grid;
|
| 423 |
+
gap: 8px;
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
.flow-node {
|
| 427 |
+
border: 1px solid var(--line);
|
| 428 |
+
border-radius: 7px;
|
| 429 |
+
background: #fff;
|
| 430 |
+
padding: 10px;
|
| 431 |
+
line-height: 1.35;
|
| 432 |
+
min-height: 48px;
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
.before .flow-node.warn {
|
| 436 |
+
border-color: #fca5a5;
|
| 437 |
+
color: #7f1d1d;
|
| 438 |
+
background: #fff1f2;
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
.after .flow-node.clean {
|
| 442 |
+
border-color: #86efac;
|
| 443 |
+
color: #064e3b;
|
| 444 |
+
background: #ecfdf3;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
.bridge {
|
| 448 |
+
display: grid;
|
| 449 |
+
place-items: center;
|
| 450 |
+
min-height: 100%;
|
| 451 |
+
color: #64748b;
|
| 452 |
+
font-size: 12px;
|
| 453 |
+
text-align: center;
|
| 454 |
+
line-height: 1.35;
|
| 455 |
+
border-left: 1px dashed #94a3b8;
|
| 456 |
+
border-right: 1px dashed #94a3b8;
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
.role-grid, .theme-grid {
|
| 460 |
+
display: grid;
|
| 461 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 462 |
+
gap: 10px;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
.role, .theme-card {
|
| 466 |
+
border: 1px solid var(--line);
|
| 467 |
+
border-radius: 7px;
|
| 468 |
+
padding: 12px;
|
| 469 |
+
background: #fbfcfe;
|
| 470 |
+
min-height: 88px;
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
.role strong, .theme-card strong {
|
| 474 |
+
display: block;
|
| 475 |
+
margin-bottom: 6px;
|
| 476 |
+
color: #111827;
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
.theme-card.blue { border-color: #bfdbfe; background: #eff6ff; }
|
| 480 |
+
.theme-card.green { border-color: #bbf7d0; background: #f0fdf4; }
|
| 481 |
+
.theme-card.amber { border-color: #fde68a; background: #fffbeb; }
|
| 482 |
+
.theme-card.magenta { border-color: #fbcfe8; background: #fdf2f8; }
|
| 483 |
+
|
| 484 |
+
.status-strip {
|
| 485 |
+
display: grid;
|
| 486 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 487 |
+
gap: 10px;
|
| 488 |
+
margin-bottom: 14px;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
.status-item {
|
| 492 |
+
border: 1px solid #bbf7d0;
|
| 493 |
+
background: #f0fdf4;
|
| 494 |
+
color: #065f46;
|
| 495 |
+
border-radius: 7px;
|
| 496 |
+
padding: 10px 12px;
|
| 497 |
+
font-size: 13px;
|
| 498 |
+
font-weight: 720;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
.baseline-bars {
|
| 502 |
margin-top: 14px;
|
| 503 |
display: grid;
|
|
|
|
| 542 |
"trust"
|
| 543 |
"controls"
|
| 544 |
"metrics"
|
| 545 |
+
"events"
|
| 546 |
+
"story"
|
| 547 |
+
"architecture"
|
| 548 |
+
"themes";
|
| 549 |
padding: 14px;
|
| 550 |
}
|
| 551 |
|
| 552 |
+
.grid, .control-grid, .metric-grid, .role-grid, .theme-grid, .status-strip {
|
| 553 |
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 554 |
}
|
| 555 |
+
|
| 556 |
+
.story-grid {
|
| 557 |
+
grid-template-columns: 1fr;
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
.bridge {
|
| 561 |
+
min-height: 58px;
|
| 562 |
+
border-left: 0;
|
| 563 |
+
border-right: 0;
|
| 564 |
+
border-top: 1px dashed #94a3b8;
|
| 565 |
+
border-bottom: 1px dashed #94a3b8;
|
| 566 |
+
}
|
| 567 |
}
|
| 568 |
|
| 569 |
@media (max-width: 560px) {
|
| 570 |
+
.grid, .control-grid, .metric-grid, .role-grid, .theme-grid, .status-strip {
|
| 571 |
grid-template-columns: 1fr;
|
| 572 |
}
|
| 573 |
|
|
|
|
| 604 |
</select>
|
| 605 |
<input id="seedInput" aria-label="Seed" type="number" value="42">
|
| 606 |
<button id="resetBtn" class="primary" type="button">Reset</button>
|
| 607 |
+
<button id="swapBtn" type="button">Swap Profiles</button>
|
| 608 |
<button id="autoBtn" type="button">Auto Run</button>
|
| 609 |
</div>
|
| 610 |
</header>
|
|
|
|
| 660 |
<h2>Actions</h2>
|
| 661 |
<div class="toolbar">
|
| 662 |
<select id="specialistSelect" aria-label="Specialist"></select>
|
| 663 |
+
<span id="recommendChip" class="chip">delegate:S0</span>
|
| 664 |
</div>
|
| 665 |
</div>
|
| 666 |
<div class="body">
|
|
|
|
| 722 |
</div>
|
| 723 |
</div>
|
| 724 |
</section>
|
| 725 |
+
|
| 726 |
+
<section class="story">
|
| 727 |
+
<div class="section-head">
|
| 728 |
+
<h2>Before And After</h2>
|
| 729 |
+
<div class="chips">
|
| 730 |
+
<span class="chip fail">blind trust fails</span>
|
| 731 |
+
<span class="chip live">verification recovers</span>
|
| 732 |
+
</div>
|
| 733 |
+
</div>
|
| 734 |
+
<div class="body">
|
| 735 |
+
<div class="status-strip">
|
| 736 |
+
<div class="status-item">HF Space live</div>
|
| 737 |
+
<div class="status-item">OpenEnv validate green</div>
|
| 738 |
+
<div class="status-item">Docker build green</div>
|
| 739 |
+
<div class="status-item">Reward chart committed</div>
|
| 740 |
+
</div>
|
| 741 |
+
<div class="story-grid">
|
| 742 |
+
<div class="contrast before">
|
| 743 |
+
<div class="contrast-title">
|
| 744 |
+
<span>Before SENTINEL</span>
|
| 745 |
+
<span class="score-pill bad">T3 random 0.699</span>
|
| 746 |
+
</div>
|
| 747 |
+
<div class="flow">
|
| 748 |
+
<div class="flow-node">Uniform trust: S0-S4 all start at 0.50</div>
|
| 749 |
+
<div class="flow-node warn">High-stakes delegation can accept poisoned output</div>
|
| 750 |
+
<div class="flow-node warn">Downstream subtasks inherit corrupted state</div>
|
| 751 |
+
<div class="flow-node">Detection rate: 0.433</div>
|
| 752 |
+
</div>
|
| 753 |
+
</div>
|
| 754 |
+
<div class="bridge">
|
| 755 |
+
profile shuffle<br>
|
| 756 |
+
skill not memory
|
| 757 |
+
</div>
|
| 758 |
+
<div class="contrast after">
|
| 759 |
+
<div class="contrast-title">
|
| 760 |
+
<span>After SENTINEL Policy</span>
|
| 761 |
+
<span class="score-pill good">T3 oracle 0.895</span>
|
| 762 |
+
</div>
|
| 763 |
+
<div class="flow">
|
| 764 |
+
<div class="flow-node clean">Trust ledger moves after each behavioral outcome</div>
|
| 765 |
+
<div class="flow-node clean">High-stakes low-trust steps trigger verification</div>
|
| 766 |
+
<div class="flow-node clean">Adversarial attempts are blocked before cascade</div>
|
| 767 |
+
<div class="flow-node">Detection: heuristic 0.735, oracle 1.000</div>
|
| 768 |
+
</div>
|
| 769 |
+
</div>
|
| 770 |
+
</div>
|
| 771 |
+
</div>
|
| 772 |
+
</section>
|
| 773 |
+
|
| 774 |
+
<section class="architecture">
|
| 775 |
+
<div class="section-head">
|
| 776 |
+
<h2>Architecture</h2>
|
| 777 |
+
<span class="muted">human-body model</span>
|
| 778 |
+
</div>
|
| 779 |
+
<div class="body">
|
| 780 |
+
<div class="role-grid">
|
| 781 |
+
<div class="role">
|
| 782 |
+
<strong>Brain</strong>
|
| 783 |
+
Orchestrator chooses delegate, verify, self solve, or skip.
|
| 784 |
+
</div>
|
| 785 |
+
<div class="role">
|
| 786 |
+
<strong>Heart</strong>
|
| 787 |
+
Environment owns reset, step, state, budget, and terminal scoring.
|
| 788 |
+
</div>
|
| 789 |
+
<div class="role">
|
| 790 |
+
<strong>Immune System</strong>
|
| 791 |
+
Trust ledger updates S0-S4 reliability from behavior.
|
| 792 |
+
</div>
|
| 793 |
+
<div class="role">
|
| 794 |
+
<strong>Pathogen</strong>
|
| 795 |
+
Adversarial specialist stays benign until high-stakes steps.
|
| 796 |
+
</div>
|
| 797 |
+
</div>
|
| 798 |
+
</div>
|
| 799 |
+
</section>
|
| 800 |
+
|
| 801 |
+
<section class="themes">
|
| 802 |
+
<div class="section-head">
|
| 803 |
+
<h2>Theme Fit</h2>
|
| 804 |
+
<span class="muted">finale story map</span>
|
| 805 |
+
</div>
|
| 806 |
+
<div class="body">
|
| 807 |
+
<div class="theme-grid">
|
| 808 |
+
<div class="theme-card blue">
|
| 809 |
+
<strong>Theme 1</strong>
|
| 810 |
+
Multi-agent orchestration with partial observability and adversarial dynamics.
|
| 811 |
+
</div>
|
| 812 |
+
<div class="theme-card green">
|
| 813 |
+
<strong>Theme 2</strong>
|
| 814 |
+
Long-horizon task graphs with delayed terminal reward and budget pressure.
|
| 815 |
+
</div>
|
| 816 |
+
<div class="theme-card amber">
|
| 817 |
+
<strong>Theme 4</strong>
|
| 818 |
+
Profile reshuffle creates an auto-curriculum with no identity memorization.
|
| 819 |
+
</div>
|
| 820 |
+
<div class="theme-card magenta">
|
| 821 |
+
<strong>Theme 5</strong>
|
| 822 |
+
Real AI systems problem: blind trust inside agent pipelines.
|
| 823 |
+
</div>
|
| 824 |
+
</div>
|
| 825 |
+
</div>
|
| 826 |
+
</section>
|
| 827 |
</main>
|
| 828 |
</div>
|
| 829 |
|
|
|
|
| 842 |
taskSelect: document.getElementById("taskSelect"),
|
| 843 |
seedInput: document.getElementById("seedInput"),
|
| 844 |
resetBtn: document.getElementById("resetBtn"),
|
| 845 |
+
swapBtn: document.getElementById("swapBtn"),
|
| 846 |
autoBtn: document.getElementById("autoBtn"),
|
| 847 |
specialistSelect: document.getElementById("specialistSelect"),
|
| 848 |
+
recommendChip: document.getElementById("recommendChip"),
|
| 849 |
delegateBtn: document.getElementById("delegateBtn"),
|
| 850 |
verifyBtn: document.getElementById("verifyBtn"),
|
| 851 |
selfBtn: document.getElementById("selfBtn"),
|
|
|
|
| 951 |
renderTrust();
|
| 952 |
renderSpecialists();
|
| 953 |
renderEvents();
|
| 954 |
+
renderRecommendation();
|
| 955 |
setDisabled(state.done || state.running);
|
| 956 |
}
|
| 957 |
|
| 958 |
+
function recommendedMove() {
|
| 959 |
+
const obs = state.observation;
|
| 960 |
+
if (!obs) return {type: "delegate", specialist: "S0"};
|
| 961 |
+
const best = bestSpecialist();
|
| 962 |
+
const trust = obs.trust_snapshot[best] ?? 0.5;
|
| 963 |
+
const highStakes = obs.stakes_level >= 0.70;
|
| 964 |
+
if (highStakes && trust < 0.65) return {type: "verify", specialist: best};
|
| 965 |
+
return {type: "delegate", specialist: best};
|
| 966 |
+
}
|
| 967 |
+
|
| 968 |
+
function renderRecommendation() {
|
| 969 |
+
if (!el.recommendChip) return;
|
| 970 |
+
const move = recommendedMove();
|
| 971 |
+
const label = `${move.type}:${move.specialist}`;
|
| 972 |
+
el.recommendChip.textContent = label;
|
| 973 |
+
el.recommendChip.className = `chip ${move.type === "verify" ? "warn" : "live"}`;
|
| 974 |
+
}
|
| 975 |
+
|
| 976 |
function setDisabled(disabled) {
|
| 977 |
el.delegateBtn.disabled = disabled;
|
| 978 |
el.verifyBtn.disabled = disabled;
|
|
|
|
| 1055 |
if (!state.observation || state.done) await resetEpisode();
|
| 1056 |
let guard = 0;
|
| 1057 |
while (!state.done && guard < 60) {
|
| 1058 |
+
const move = recommendedMove();
|
| 1059 |
+
await stepEpisode(move.type, move.specialist);
|
|
|
|
|
|
|
|
|
|
| 1060 |
guard += 1;
|
| 1061 |
await new Promise(resolve => setTimeout(resolve, 180));
|
| 1062 |
}
|
| 1063 |
}
|
| 1064 |
|
| 1065 |
+
async function swapProfiles() {
|
| 1066 |
+
const nextSeed = Number(el.seedInput.value || 0) + 1;
|
| 1067 |
+
el.seedInput.value = String(nextSeed);
|
| 1068 |
+
await resetEpisode();
|
| 1069 |
+
}
|
| 1070 |
+
|
| 1071 |
el.resetBtn.addEventListener("click", resetEpisode);
|
| 1072 |
+
el.swapBtn.addEventListener("click", swapProfiles);
|
| 1073 |
el.delegateBtn.addEventListener("click", () => stepEpisode("delegate"));
|
| 1074 |
el.verifyBtn.addEventListener("click", () => stepEpisode("verify"));
|
| 1075 |
el.selfBtn.addEventListener("click", () => stepEpisode("solve_independently"));
|