Spaces:
Running
Running
Add SENTINEL API playground and judge story
Browse files- README.md +3 -0
- static/index.html +375 -2
README.md
CHANGED
|
@@ -100,7 +100,10 @@ The Space opens directly into **SENTINEL Trust Mission Control**, a judge-demo d
|
|
| 100 |
- S0-S4 network theater with trust state per public slot
|
| 101 |
- manual `delegate`, `verify`, `solve_independently`, and `skip` controls
|
| 102 |
- heuristic auto-policy and one-click recommended move
|
|
|
|
| 103 |
- profile reshuffle demo via seed swap
|
|
|
|
|
|
|
| 104 |
- risk gate for high-stakes subtasks
|
| 105 |
- flight recorder of step rewards and decisions
|
| 106 |
- code-flow map from `reset()` to reward
|
|
|
|
| 100 |
- S0-S4 network theater with trust state per public slot
|
| 101 |
- manual `delegate`, `verify`, `solve_independently`, and `skip` controls
|
| 102 |
- heuristic auto-policy and one-click recommended move
|
| 103 |
+
- API playground showing raw request and response payloads
|
| 104 |
- profile reshuffle demo via seed swap
|
| 105 |
+
- before-and-after story lane for judge presentation
|
| 106 |
+
- hackathon readiness panel for what is done vs still pending
|
| 107 |
- risk gate for high-stakes subtasks
|
| 108 |
- flight recorder of step rewards and decisions
|
| 109 |
- code-flow map from `reset()` to reward
|
static/index.html
CHANGED
|
@@ -202,7 +202,9 @@
|
|
| 202 |
grid-template-columns: minmax(420px, 1.35fr) minmax(340px, 0.85fr);
|
| 203 |
grid-template-areas:
|
| 204 |
"theater command"
|
| 205 |
-
"mission
|
|
|
|
|
|
|
| 206 |
"proof events"
|
| 207 |
"flow themes";
|
| 208 |
align-items: start;
|
|
@@ -221,6 +223,9 @@
|
|
| 221 |
.command { grid-area: command; }
|
| 222 |
.mission { grid-area: mission; }
|
| 223 |
.trust { grid-area: trust; }
|
|
|
|
|
|
|
|
|
|
| 224 |
.proof { grid-area: proof; }
|
| 225 |
.events { grid-area: events; }
|
| 226 |
.flow { grid-area: flow; }
|
|
@@ -748,6 +753,189 @@
|
|
| 748 |
border-radius: 4px;
|
| 749 |
}
|
| 750 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 751 |
.event-list {
|
| 752 |
display: grid;
|
| 753 |
gap: 8px;
|
|
@@ -878,6 +1066,9 @@
|
|
| 878 |
"command"
|
| 879 |
"mission"
|
| 880 |
"trust"
|
|
|
|
|
|
|
|
|
|
| 881 |
"proof"
|
| 882 |
"events"
|
| 883 |
"flow"
|
|
@@ -905,6 +1096,9 @@
|
|
| 905 |
.stage-topline,
|
| 906 |
.outcome-strip,
|
| 907 |
.proof-grid,
|
|
|
|
|
|
|
|
|
|
| 908 |
.flow-line,
|
| 909 |
.theme-grid,
|
| 910 |
.stats-grid {
|
|
@@ -1148,6 +1342,117 @@
|
|
| 1148 |
</div>
|
| 1149 |
</section>
|
| 1150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1151 |
<section class="proof">
|
| 1152 |
<div class="section-head">
|
| 1153 |
<h2>Reward Signal Proof</h2>
|
|
@@ -1266,7 +1571,10 @@
|
|
| 1266 |
observation: null,
|
| 1267 |
done: true,
|
| 1268 |
running: false,
|
| 1269 |
-
events: []
|
|
|
|
|
|
|
|
|
|
| 1270 |
};
|
| 1271 |
|
| 1272 |
const el = {
|
|
@@ -1296,6 +1604,12 @@
|
|
| 1296 |
subtaskText: document.getElementById("subtaskText"),
|
| 1297 |
trustList: document.getElementById("trustList"),
|
| 1298 |
trustMean: document.getElementById("trustMean"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1299 |
detectChip: document.getElementById("detectChip"),
|
| 1300 |
poisonChip: document.getElementById("poisonChip"),
|
| 1301 |
sessionText: document.getElementById("sessionText"),
|
|
@@ -1320,6 +1634,10 @@
|
|
| 1320 |
return "quarantine";
|
| 1321 |
}
|
| 1322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1323 |
function bestSpecialist() {
|
| 1324 |
const obs = state.observation;
|
| 1325 |
if (!obs) return "S0";
|
|
@@ -1391,6 +1709,41 @@
|
|
| 1391 |
`).join("");
|
| 1392 |
}
|
| 1393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1394 |
function renderRecommendation() {
|
| 1395 |
const move = recommendedMove();
|
| 1396 |
const obs = state.observation;
|
|
@@ -1426,6 +1779,7 @@
|
|
| 1426 |
renderSpecialists();
|
| 1427 |
renderEvents();
|
| 1428 |
renderRecommendation();
|
|
|
|
| 1429 |
setDisabled(true);
|
| 1430 |
return;
|
| 1431 |
}
|
|
@@ -1459,6 +1813,7 @@
|
|
| 1459 |
renderSpecialists();
|
| 1460 |
renderEvents();
|
| 1461 |
renderRecommendation();
|
|
|
|
| 1462 |
setDisabled(state.done || state.running);
|
| 1463 |
}
|
| 1464 |
|
|
@@ -1484,6 +1839,12 @@
|
|
| 1484 |
el.resetPanelBtn.disabled = true;
|
| 1485 |
try {
|
| 1486 |
const seed = Number(el.seedInput.value || 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1487 |
const response = await fetch("/reset", {
|
| 1488 |
method: "POST",
|
| 1489 |
headers: {"Content-Type": "application/json"},
|
|
@@ -1491,6 +1852,7 @@
|
|
| 1491 |
});
|
| 1492 |
const result = await response.json();
|
| 1493 |
if (!response.ok) throw new Error(result.detail || "reset failed");
|
|
|
|
| 1494 |
state.taskType = result.observation.task_type;
|
| 1495 |
state.sessionId = result.info.session_id;
|
| 1496 |
state.events = [];
|
|
@@ -1498,7 +1860,9 @@
|
|
| 1498 |
addEvent(0, "reset", "Episode initialized with shuffled hidden profiles.", "0.00");
|
| 1499 |
render(result);
|
| 1500 |
} catch (error) {
|
|
|
|
| 1501 |
addEvent(0, "error", error.message, "0.00");
|
|
|
|
| 1502 |
renderEvents();
|
| 1503 |
} finally {
|
| 1504 |
state.running = false;
|
|
@@ -1515,6 +1879,12 @@
|
|
| 1515 |
try {
|
| 1516 |
const chosen = specialist || el.specialistSelect.value || bestSpecialist();
|
| 1517 |
const payload = actionPayload(type, type === "delegate" || type === "verify" ? chosen : null);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1518 |
const response = await fetch(`/step?session_id=${encodeURIComponent(state.sessionId)}`, {
|
| 1519 |
method: "POST",
|
| 1520 |
headers: {"Content-Type": "application/json"},
|
|
@@ -1522,12 +1892,15 @@
|
|
| 1522 |
});
|
| 1523 |
const result = await response.json();
|
| 1524 |
if (!response.ok) throw new Error(result.detail || "step failed");
|
|
|
|
| 1525 |
const reward = Number(result.reward.value || 0).toFixed(2);
|
| 1526 |
const label = payload.specialist_id ? `${type}:${payload.specialist_id}` : type;
|
| 1527 |
addEvent(result.info.step_count, label, result.reward.reason, reward);
|
| 1528 |
render(result);
|
| 1529 |
} catch (error) {
|
|
|
|
| 1530 |
addEvent(state.observation?.step_count || 0, "error", error.message, "0.00");
|
|
|
|
| 1531 |
renderEvents();
|
| 1532 |
} finally {
|
| 1533 |
state.running = false;
|
|
|
|
| 202 |
grid-template-columns: minmax(420px, 1.35fr) minmax(340px, 0.85fr);
|
| 203 |
grid-template-areas:
|
| 204 |
"theater command"
|
| 205 |
+
"mission playground"
|
| 206 |
+
"trust playground"
|
| 207 |
+
"story readiness"
|
| 208 |
"proof events"
|
| 209 |
"flow themes";
|
| 210 |
align-items: start;
|
|
|
|
| 223 |
.command { grid-area: command; }
|
| 224 |
.mission { grid-area: mission; }
|
| 225 |
.trust { grid-area: trust; }
|
| 226 |
+
.playground { grid-area: playground; }
|
| 227 |
+
.story { grid-area: story; }
|
| 228 |
+
.readiness { grid-area: readiness; }
|
| 229 |
.proof { grid-area: proof; }
|
| 230 |
.events { grid-area: events; }
|
| 231 |
.flow { grid-area: flow; }
|
|
|
|
| 753 |
border-radius: 4px;
|
| 754 |
}
|
| 755 |
|
| 756 |
+
.json-grid {
|
| 757 |
+
display: grid;
|
| 758 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 759 |
+
gap: 12px;
|
| 760 |
+
}
|
| 761 |
+
|
| 762 |
+
.json-panel {
|
| 763 |
+
min-height: 248px;
|
| 764 |
+
border: 1px solid #394132;
|
| 765 |
+
border-radius: 8px;
|
| 766 |
+
background: #0c100a;
|
| 767 |
+
overflow: hidden;
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
.json-head {
|
| 771 |
+
min-height: 44px;
|
| 772 |
+
padding: 10px 12px;
|
| 773 |
+
border-bottom: 1px solid #394132;
|
| 774 |
+
display: flex;
|
| 775 |
+
align-items: center;
|
| 776 |
+
justify-content: space-between;
|
| 777 |
+
gap: 10px;
|
| 778 |
+
color: var(--cream);
|
| 779 |
+
font-size: 13px;
|
| 780 |
+
font-weight: 780;
|
| 781 |
+
background: rgba(255, 255, 255, 0.02);
|
| 782 |
+
}
|
| 783 |
+
|
| 784 |
+
.json-head span {
|
| 785 |
+
color: var(--muted);
|
| 786 |
+
font-weight: 620;
|
| 787 |
+
font-size: 12px;
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
.json-block {
|
| 791 |
+
margin: 0;
|
| 792 |
+
min-height: 204px;
|
| 793 |
+
padding: 12px;
|
| 794 |
+
overflow: auto;
|
| 795 |
+
color: #d7fbe8;
|
| 796 |
+
font-size: 12px;
|
| 797 |
+
line-height: 1.48;
|
| 798 |
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
|
| 799 |
+
white-space: pre-wrap;
|
| 800 |
+
word-break: break-word;
|
| 801 |
+
}
|
| 802 |
+
|
| 803 |
+
.playground-meta {
|
| 804 |
+
margin-top: 12px;
|
| 805 |
+
display: grid;
|
| 806 |
+
grid-template-columns: 168px 1fr;
|
| 807 |
+
gap: 12px;
|
| 808 |
+
}
|
| 809 |
+
|
| 810 |
+
.playground-card {
|
| 811 |
+
min-height: 96px;
|
| 812 |
+
border: 1px solid #394132;
|
| 813 |
+
border-radius: 8px;
|
| 814 |
+
padding: 12px;
|
| 815 |
+
background: var(--panel-2);
|
| 816 |
+
}
|
| 817 |
+
|
| 818 |
+
.playground-card strong {
|
| 819 |
+
display: block;
|
| 820 |
+
color: var(--cream);
|
| 821 |
+
margin-bottom: 7px;
|
| 822 |
+
font-size: 14px;
|
| 823 |
+
}
|
| 824 |
+
|
| 825 |
+
.playground-card span {
|
| 826 |
+
display: block;
|
| 827 |
+
color: var(--muted);
|
| 828 |
+
font-size: 13px;
|
| 829 |
+
line-height: 1.4;
|
| 830 |
+
}
|
| 831 |
+
|
| 832 |
+
.story-grid {
|
| 833 |
+
display: grid;
|
| 834 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 835 |
+
gap: 12px;
|
| 836 |
+
}
|
| 837 |
+
|
| 838 |
+
.story-lane {
|
| 839 |
+
min-height: 250px;
|
| 840 |
+
border: 1px solid #394132;
|
| 841 |
+
border-radius: 8px;
|
| 842 |
+
padding: 14px;
|
| 843 |
+
background: var(--panel-2);
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
.story-lane.before {
|
| 847 |
+
border-color: rgba(255, 95, 69, 0.42);
|
| 848 |
+
background: var(--flame-soft);
|
| 849 |
+
}
|
| 850 |
+
|
| 851 |
+
.story-lane.after {
|
| 852 |
+
border-color: rgba(39, 224, 161, 0.42);
|
| 853 |
+
background: var(--jade-soft);
|
| 854 |
+
}
|
| 855 |
+
|
| 856 |
+
.story-title {
|
| 857 |
+
display: flex;
|
| 858 |
+
align-items: center;
|
| 859 |
+
justify-content: space-between;
|
| 860 |
+
gap: 10px;
|
| 861 |
+
margin-bottom: 12px;
|
| 862 |
+
}
|
| 863 |
+
|
| 864 |
+
.story-title strong {
|
| 865 |
+
color: var(--cream);
|
| 866 |
+
font-size: 16px;
|
| 867 |
+
}
|
| 868 |
+
|
| 869 |
+
.story-score {
|
| 870 |
+
border-radius: 999px;
|
| 871 |
+
padding: 4px 9px;
|
| 872 |
+
font-size: 12px;
|
| 873 |
+
font-weight: 820;
|
| 874 |
+
color: #0b0d08;
|
| 875 |
+
background: var(--cream);
|
| 876 |
+
font-variant-numeric: tabular-nums;
|
| 877 |
+
}
|
| 878 |
+
|
| 879 |
+
.story-flow {
|
| 880 |
+
display: grid;
|
| 881 |
+
gap: 9px;
|
| 882 |
+
}
|
| 883 |
+
|
| 884 |
+
.story-step {
|
| 885 |
+
min-height: 48px;
|
| 886 |
+
border: 1px solid #394132;
|
| 887 |
+
border-radius: 8px;
|
| 888 |
+
padding: 10px;
|
| 889 |
+
background: rgba(10, 12, 8, 0.68);
|
| 890 |
+
color: #ece6cf;
|
| 891 |
+
font-size: 13px;
|
| 892 |
+
line-height: 1.35;
|
| 893 |
+
}
|
| 894 |
+
|
| 895 |
+
.story-note {
|
| 896 |
+
margin-top: 12px;
|
| 897 |
+
min-height: 58px;
|
| 898 |
+
border: 1px dashed #4a5241;
|
| 899 |
+
border-radius: 8px;
|
| 900 |
+
padding: 11px;
|
| 901 |
+
color: var(--muted);
|
| 902 |
+
font-size: 13px;
|
| 903 |
+
line-height: 1.4;
|
| 904 |
+
background: rgba(10, 12, 8, 0.35);
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
+
.readiness-list {
|
| 908 |
+
display: grid;
|
| 909 |
+
gap: 10px;
|
| 910 |
+
}
|
| 911 |
+
|
| 912 |
+
.readiness-item {
|
| 913 |
+
min-height: 64px;
|
| 914 |
+
border: 1px solid rgba(39, 224, 161, 0.38);
|
| 915 |
+
border-radius: 8px;
|
| 916 |
+
padding: 11px 12px;
|
| 917 |
+
background: rgba(39, 224, 161, 0.08);
|
| 918 |
+
}
|
| 919 |
+
|
| 920 |
+
.readiness-item.pending {
|
| 921 |
+
border-color: rgba(245, 186, 65, 0.38);
|
| 922 |
+
background: rgba(245, 186, 65, 0.09);
|
| 923 |
+
}
|
| 924 |
+
|
| 925 |
+
.readiness-item strong {
|
| 926 |
+
display: block;
|
| 927 |
+
color: var(--cream);
|
| 928 |
+
margin-bottom: 5px;
|
| 929 |
+
font-size: 14px;
|
| 930 |
+
}
|
| 931 |
+
|
| 932 |
+
.readiness-item span {
|
| 933 |
+
display: block;
|
| 934 |
+
color: var(--muted);
|
| 935 |
+
font-size: 13px;
|
| 936 |
+
line-height: 1.35;
|
| 937 |
+
}
|
| 938 |
+
|
| 939 |
.event-list {
|
| 940 |
display: grid;
|
| 941 |
gap: 8px;
|
|
|
|
| 1066 |
"command"
|
| 1067 |
"mission"
|
| 1068 |
"trust"
|
| 1069 |
+
"playground"
|
| 1070 |
+
"story"
|
| 1071 |
+
"readiness"
|
| 1072 |
"proof"
|
| 1073 |
"events"
|
| 1074 |
"flow"
|
|
|
|
| 1096 |
.stage-topline,
|
| 1097 |
.outcome-strip,
|
| 1098 |
.proof-grid,
|
| 1099 |
+
.json-grid,
|
| 1100 |
+
.playground-meta,
|
| 1101 |
+
.story-grid,
|
| 1102 |
.flow-line,
|
| 1103 |
.theme-grid,
|
| 1104 |
.stats-grid {
|
|
|
|
| 1342 |
</div>
|
| 1343 |
</section>
|
| 1344 |
|
| 1345 |
+
<section class="playground">
|
| 1346 |
+
<div class="section-head">
|
| 1347 |
+
<h2>API Playground</h2>
|
| 1348 |
+
<div class="chips">
|
| 1349 |
+
<span id="endpointChip" class="chip">POST /reset</span>
|
| 1350 |
+
<span class="chip live">backend visible</span>
|
| 1351 |
+
</div>
|
| 1352 |
+
</div>
|
| 1353 |
+
<div class="body">
|
| 1354 |
+
<div class="json-grid">
|
| 1355 |
+
<div class="json-panel">
|
| 1356 |
+
<div class="json-head">
|
| 1357 |
+
<strong>Last Request</strong>
|
| 1358 |
+
<span>what UI sent</span>
|
| 1359 |
+
</div>
|
| 1360 |
+
<pre id="requestJson" class="json-block">{
|
| 1361 |
+
"status": "waiting",
|
| 1362 |
+
"message": "Reset or step to inspect backend payloads."
|
| 1363 |
+
}</pre>
|
| 1364 |
+
</div>
|
| 1365 |
+
<div class="json-panel">
|
| 1366 |
+
<div class="json-head">
|
| 1367 |
+
<strong>Last Response</strong>
|
| 1368 |
+
<span>what backend returned</span>
|
| 1369 |
+
</div>
|
| 1370 |
+
<pre id="responseJson" class="json-block">{
|
| 1371 |
+
"status": "waiting",
|
| 1372 |
+
"message": "Observation, reward, and info will appear here."
|
| 1373 |
+
}</pre>
|
| 1374 |
+
</div>
|
| 1375 |
+
</div>
|
| 1376 |
+
<div class="playground-meta">
|
| 1377 |
+
<div class="playground-card">
|
| 1378 |
+
<strong id="playgroundMode">reset()</strong>
|
| 1379 |
+
<span id="playgroundCopy">Start a new episode, get the first observation, then choose actions step by step.</span>
|
| 1380 |
+
</div>
|
| 1381 |
+
<div class="playground-card">
|
| 1382 |
+
<strong>What To Watch</strong>
|
| 1383 |
+
<span id="playgroundSummary">Observation tells you the next subtask and public trust scores. Reward tells you whether routing was correct. Info tells you the normalized episode score and adversarial counters.</span>
|
| 1384 |
+
</div>
|
| 1385 |
+
</div>
|
| 1386 |
+
</div>
|
| 1387 |
+
</section>
|
| 1388 |
+
|
| 1389 |
+
<section class="story">
|
| 1390 |
+
<div class="section-head">
|
| 1391 |
+
<h2>Before And After</h2>
|
| 1392 |
+
<div class="chips">
|
| 1393 |
+
<span class="chip fail">blind trust</span>
|
| 1394 |
+
<span class="chip live">trained skepticism</span>
|
| 1395 |
+
</div>
|
| 1396 |
+
</div>
|
| 1397 |
+
<div class="body">
|
| 1398 |
+
<div class="story-grid">
|
| 1399 |
+
<div class="story-lane before">
|
| 1400 |
+
<div class="story-title">
|
| 1401 |
+
<strong>Without SENTINEL</strong>
|
| 1402 |
+
<span class="story-score">task3 random 0.699</span>
|
| 1403 |
+
</div>
|
| 1404 |
+
<div class="story-flow">
|
| 1405 |
+
<div class="story-step">All public slots start near the same trust. The orchestrator delegates with weak evidence.</div>
|
| 1406 |
+
<div class="story-step">A high-confidence specialist can slip poisoned output into a high-stakes node.</div>
|
| 1407 |
+
<div class="story-step">Downstream subtasks inherit bad state, so the mission drifts before anyone notices.</div>
|
| 1408 |
+
<div class="story-step">Detection stays weak and the agent cannot explain which public slot became dangerous.</div>
|
| 1409 |
+
</div>
|
| 1410 |
+
<div class="story-note">Judge takeaway: good-looking multi-agent systems still fail if trust is static or role-based.</div>
|
| 1411 |
+
</div>
|
| 1412 |
+
<div class="story-lane after">
|
| 1413 |
+
<div class="story-title">
|
| 1414 |
+
<strong>With SENTINEL</strong>
|
| 1415 |
+
<span class="story-score">task3 heuristic 0.784</span>
|
| 1416 |
+
</div>
|
| 1417 |
+
<div class="story-flow">
|
| 1418 |
+
<div class="story-step">Behavior updates the TrustLedger after every step, so public slots diverge quickly.</div>
|
| 1419 |
+
<div class="story-step">When stakes rise and trust is shaky, the orchestrator switches from delegate to verify.</div>
|
| 1420 |
+
<div class="story-step">Adversarial attempts are detected before they cascade through the task graph.</div>
|
| 1421 |
+
<div class="story-step">Profile swap forces re-learning from evidence, proving skill instead of memorized identity.</div>
|
| 1422 |
+
</div>
|
| 1423 |
+
<div class="story-note">Judge takeaway: this environment teaches oversight, recovery, and calibrated delegation under uncertainty.</div>
|
| 1424 |
+
</div>
|
| 1425 |
+
</div>
|
| 1426 |
+
</div>
|
| 1427 |
+
</section>
|
| 1428 |
+
|
| 1429 |
+
<section class="readiness">
|
| 1430 |
+
<div class="section-head">
|
| 1431 |
+
<h2>Hackathon Readiness</h2>
|
| 1432 |
+
<span class="muted">what is done vs what is left</span>
|
| 1433 |
+
</div>
|
| 1434 |
+
<div class="body">
|
| 1435 |
+
<div class="readiness-list">
|
| 1436 |
+
<div class="readiness-item">
|
| 1437 |
+
<strong>Environment Core Ready</strong>
|
| 1438 |
+
<span>OpenEnv shape works: reset, step, state, normalized score, Docker, Space, and live dashboard.</span>
|
| 1439 |
+
</div>
|
| 1440 |
+
<div class="readiness-item">
|
| 1441 |
+
<strong>Reward Proof Ready</strong>
|
| 1442 |
+
<span>Random, heuristic, and oracle-lite comparisons are committed and visible in the UI.</span>
|
| 1443 |
+
</div>
|
| 1444 |
+
<div class="readiness-item">
|
| 1445 |
+
<strong>Training Harness Ready</strong>
|
| 1446 |
+
<span>TRL and Unsloth dry-run path exists; onsite job is to capture the real reward-improvement curve.</span>
|
| 1447 |
+
</div>
|
| 1448 |
+
<div class="readiness-item pending">
|
| 1449 |
+
<strong>Still Needed For Finale</strong>
|
| 1450 |
+
<span>Mini-blog or video, onsite GRPO run, and one polished 3-minute story using this dashboard plus before/after evidence.</span>
|
| 1451 |
+
</div>
|
| 1452 |
+
</div>
|
| 1453 |
+
</div>
|
| 1454 |
+
</section>
|
| 1455 |
+
|
| 1456 |
<section class="proof">
|
| 1457 |
<div class="section-head">
|
| 1458 |
<h2>Reward Signal Proof</h2>
|
|
|
|
| 1571 |
observation: null,
|
| 1572 |
done: true,
|
| 1573 |
running: false,
|
| 1574 |
+
events: [],
|
| 1575 |
+
lastRequest: null,
|
| 1576 |
+
lastResult: null,
|
| 1577 |
+
lastMode: "reset()"
|
| 1578 |
};
|
| 1579 |
|
| 1580 |
const el = {
|
|
|
|
| 1604 |
subtaskText: document.getElementById("subtaskText"),
|
| 1605 |
trustList: document.getElementById("trustList"),
|
| 1606 |
trustMean: document.getElementById("trustMean"),
|
| 1607 |
+
endpointChip: document.getElementById("endpointChip"),
|
| 1608 |
+
requestJson: document.getElementById("requestJson"),
|
| 1609 |
+
responseJson: document.getElementById("responseJson"),
|
| 1610 |
+
playgroundMode: document.getElementById("playgroundMode"),
|
| 1611 |
+
playgroundCopy: document.getElementById("playgroundCopy"),
|
| 1612 |
+
playgroundSummary: document.getElementById("playgroundSummary"),
|
| 1613 |
detectChip: document.getElementById("detectChip"),
|
| 1614 |
poisonChip: document.getElementById("poisonChip"),
|
| 1615 |
sessionText: document.getElementById("sessionText"),
|
|
|
|
| 1634 |
return "quarantine";
|
| 1635 |
}
|
| 1636 |
|
| 1637 |
+
function prettyJson(value) {
|
| 1638 |
+
return JSON.stringify(value, null, 2);
|
| 1639 |
+
}
|
| 1640 |
+
|
| 1641 |
function bestSpecialist() {
|
| 1642 |
const obs = state.observation;
|
| 1643 |
if (!obs) return "S0";
|
|
|
|
| 1709 |
`).join("");
|
| 1710 |
}
|
| 1711 |
|
| 1712 |
+
function renderPlayground() {
|
| 1713 |
+
if (el.requestJson) {
|
| 1714 |
+
el.requestJson.textContent = prettyJson(state.lastRequest || {
|
| 1715 |
+
status: "waiting",
|
| 1716 |
+
message: "Reset or step to inspect backend payloads."
|
| 1717 |
+
});
|
| 1718 |
+
}
|
| 1719 |
+
if (el.responseJson) {
|
| 1720 |
+
el.responseJson.textContent = prettyJson(state.lastResult || {
|
| 1721 |
+
status: "waiting",
|
| 1722 |
+
message: "Observation, reward, and info will appear here."
|
| 1723 |
+
});
|
| 1724 |
+
}
|
| 1725 |
+
if (el.playgroundMode) {
|
| 1726 |
+
el.playgroundMode.textContent = state.lastMode;
|
| 1727 |
+
}
|
| 1728 |
+
if (el.endpointChip) {
|
| 1729 |
+
const path = state.lastRequest?.path || "/reset";
|
| 1730 |
+
el.endpointChip.textContent = `POST ${path}`;
|
| 1731 |
+
}
|
| 1732 |
+
if (el.playgroundCopy) {
|
| 1733 |
+
el.playgroundCopy.textContent = state.lastMode === "step()"
|
| 1734 |
+
? "A step sends one action into the environment and returns the next observation, reward, done flag, and info."
|
| 1735 |
+
: "Reset starts a new episode, samples a scenario, reshuffles hidden profiles, and returns the first observation.";
|
| 1736 |
+
}
|
| 1737 |
+
if (el.playgroundSummary) {
|
| 1738 |
+
const obs = state.lastResult?.observation;
|
| 1739 |
+
const reward = state.lastResult?.reward;
|
| 1740 |
+
const info = state.lastResult?.info;
|
| 1741 |
+
el.playgroundSummary.textContent = obs
|
| 1742 |
+
? `Current subtask: ${obs.current_subtask} | Reward: ${Number(reward?.value ?? 0).toFixed(2)} | Score: ${Number(info?.score ?? 0).toFixed(3)} | Detections: ${info?.adversarial_detections ?? 0}`
|
| 1743 |
+
: "Observation tells you the next subtask and public trust scores. Reward tells you whether routing was correct. Info tells you the normalized episode score and adversarial counters.";
|
| 1744 |
+
}
|
| 1745 |
+
}
|
| 1746 |
+
|
| 1747 |
function renderRecommendation() {
|
| 1748 |
const move = recommendedMove();
|
| 1749 |
const obs = state.observation;
|
|
|
|
| 1779 |
renderSpecialists();
|
| 1780 |
renderEvents();
|
| 1781 |
renderRecommendation();
|
| 1782 |
+
renderPlayground();
|
| 1783 |
setDisabled(true);
|
| 1784 |
return;
|
| 1785 |
}
|
|
|
|
| 1813 |
renderSpecialists();
|
| 1814 |
renderEvents();
|
| 1815 |
renderRecommendation();
|
| 1816 |
+
renderPlayground();
|
| 1817 |
setDisabled(state.done || state.running);
|
| 1818 |
}
|
| 1819 |
|
|
|
|
| 1839 |
el.resetPanelBtn.disabled = true;
|
| 1840 |
try {
|
| 1841 |
const seed = Number(el.seedInput.value || 0);
|
| 1842 |
+
state.lastMode = "reset()";
|
| 1843 |
+
state.lastRequest = {
|
| 1844 |
+
method: "POST",
|
| 1845 |
+
path: "/reset",
|
| 1846 |
+
body: {task_type: el.taskSelect.value, seed}
|
| 1847 |
+
};
|
| 1848 |
const response = await fetch("/reset", {
|
| 1849 |
method: "POST",
|
| 1850 |
headers: {"Content-Type": "application/json"},
|
|
|
|
| 1852 |
});
|
| 1853 |
const result = await response.json();
|
| 1854 |
if (!response.ok) throw new Error(result.detail || "reset failed");
|
| 1855 |
+
state.lastResult = result;
|
| 1856 |
state.taskType = result.observation.task_type;
|
| 1857 |
state.sessionId = result.info.session_id;
|
| 1858 |
state.events = [];
|
|
|
|
| 1860 |
addEvent(0, "reset", "Episode initialized with shuffled hidden profiles.", "0.00");
|
| 1861 |
render(result);
|
| 1862 |
} catch (error) {
|
| 1863 |
+
state.lastResult = {error: error.message};
|
| 1864 |
addEvent(0, "error", error.message, "0.00");
|
| 1865 |
+
renderPlayground();
|
| 1866 |
renderEvents();
|
| 1867 |
} finally {
|
| 1868 |
state.running = false;
|
|
|
|
| 1879 |
try {
|
| 1880 |
const chosen = specialist || el.specialistSelect.value || bestSpecialist();
|
| 1881 |
const payload = actionPayload(type, type === "delegate" || type === "verify" ? chosen : null);
|
| 1882 |
+
state.lastMode = "step()";
|
| 1883 |
+
state.lastRequest = {
|
| 1884 |
+
method: "POST",
|
| 1885 |
+
path: `/step?session_id=${state.sessionId}`,
|
| 1886 |
+
body: payload
|
| 1887 |
+
};
|
| 1888 |
const response = await fetch(`/step?session_id=${encodeURIComponent(state.sessionId)}`, {
|
| 1889 |
method: "POST",
|
| 1890 |
headers: {"Content-Type": "application/json"},
|
|
|
|
| 1892 |
});
|
| 1893 |
const result = await response.json();
|
| 1894 |
if (!response.ok) throw new Error(result.detail || "step failed");
|
| 1895 |
+
state.lastResult = result;
|
| 1896 |
const reward = Number(result.reward.value || 0).toFixed(2);
|
| 1897 |
const label = payload.specialist_id ? `${type}:${payload.specialist_id}` : type;
|
| 1898 |
addEvent(result.info.step_count, label, result.reward.reason, reward);
|
| 1899 |
render(result);
|
| 1900 |
} catch (error) {
|
| 1901 |
+
state.lastResult = {error: error.message};
|
| 1902 |
addEvent(state.observation?.step_count || 0, "error", error.message, "0.00");
|
| 1903 |
+
renderPlayground();
|
| 1904 |
renderEvents();
|
| 1905 |
} finally {
|
| 1906 |
state.running = false;
|