XcodeAddy commited on
Commit
bddc179
·
1 Parent(s): 1c10148

Rebuild SENTINEL trust mission control UI

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -0
  2. README.md +8 -6
  3. app.py +9 -0
  4. static/index.html +1014 -522
Dockerfile CHANGED
@@ -22,6 +22,7 @@ COPY README.md .
22
  COPY pyproject.toml .
23
  COPY server ./server
24
  COPY static ./static
 
25
 
26
  # Create outputs directory for baseline scores
27
  RUN mkdir -p outputs
 
22
  COPY pyproject.toml .
23
  COPY server ./server
24
  COPY static ./static
25
+ COPY outputs ./outputs
26
 
27
  # Create outputs directory for baseline scores
28
  RUN mkdir -p outputs
README.md CHANGED
@@ -90,21 +90,23 @@ curl http://localhost:7860/schema
90
 
91
  The root route `/` serves the live SENTINEL dashboard on Hugging Face Spaces.
92
  Use `/api` for the JSON route index.
 
93
 
94
  ## Live Dashboard
95
 
96
- The Space opens directly into a judge-demo dashboard:
97
 
98
  - live task progress and score
99
- - S0-S4 trust ledger bars
100
  - manual `delegate`, `verify`, `solve_independently`, and `skip` controls
101
- - heuristic auto-run
102
  - profile reshuffle demo via seed swap
103
- - before/after failure contrast
104
- - human-body architecture map
 
105
  - hackathon theme coverage map
106
  - adversarial detection and poisoning counters
107
- - baseline proof table for random, heuristic, and oracle-lite policies
108
 
109
  Current status as of April 22, 2026:
110
 
 
90
 
91
  The root route `/` serves the live SENTINEL dashboard on Hugging Face Spaces.
92
  Use `/api` for the JSON route index.
93
+ Use `/assets/baseline_comparison.png` for the committed baseline chart used in the dashboard.
94
 
95
  ## Live Dashboard
96
 
97
+ The Space opens directly into **SENTINEL Trust Mission Control**, a judge-demo dashboard:
98
 
99
  - live task progress and score
100
+ - S0-S4 network theater with trust state per public slot
101
  - manual `delegate`, `verify`, `solve_independently`, and `skip` controls
102
+ - heuristic auto-policy and one-click recommended move
103
  - profile reshuffle demo via seed swap
104
+ - risk gate for high-stakes subtasks
105
+ - flight recorder of step rewards and decisions
106
+ - code-flow map from `reset()` to reward
107
  - hackathon theme coverage map
108
  - adversarial detection and poisoning counters
109
+ - baseline proof table and chart for random, heuristic, and oracle-lite policies
110
 
111
  Current status as of April 22, 2026:
112
 
app.py CHANGED
@@ -27,6 +27,7 @@ app = FastAPI(
27
  # One env instance per session_id
28
  _sessions: dict[str, SentinelEnv] = {}
29
  _STATIC_DIR = Path(__file__).resolve().parent / "static"
 
30
 
31
  def _get_env(session_id: str) -> SentinelEnv:
32
  if session_id not in _sessions:
@@ -79,6 +80,14 @@ def root():
79
  )
80
 
81
 
 
 
 
 
 
 
 
 
82
  @app.get("/api")
83
  def api_root():
84
  return {
 
27
  # One env instance per session_id
28
  _sessions: dict[str, SentinelEnv] = {}
29
  _STATIC_DIR = Path(__file__).resolve().parent / "static"
30
+ _OUTPUTS_DIR = Path(__file__).resolve().parent / "outputs"
31
 
32
  def _get_env(session_id: str) -> SentinelEnv:
33
  if session_id not in _sessions:
 
80
  )
81
 
82
 
83
+ @app.get("/assets/baseline_comparison.png")
84
+ def baseline_comparison_chart():
85
+ chart_path = _OUTPUTS_DIR / "baseline_comparison.png"
86
+ if not chart_path.exists():
87
+ raise HTTPException(status_code=404, detail="Baseline comparison chart not found.")
88
+ return FileResponse(chart_path, media_type="image/png")
89
+
90
+
91
  @app.get("/api")
92
  def api_root():
93
  return {
static/index.html CHANGED
@@ -3,52 +3,142 @@
3
  <head>
4
  <meta charset="utf-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1">
6
- <title>SENTINEL</title>
7
  <style>
8
  :root {
9
- --bg: #f7f8fb;
10
- --panel: #ffffff;
11
- --ink: #172033;
12
- --muted: #667085;
13
- --line: #d9dee8;
14
- --accent: #0f766e;
15
- --blue: #2563eb;
16
- --red: #dc2626;
17
- --amber: #b45309;
18
- --green: #059669;
19
- --shadow: 0 10px 30px rgba(16, 24, 40, 0.08);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
21
 
22
- * { box-sizing: border-box; }
23
-
24
  body {
25
  margin: 0;
26
  min-height: 100vh;
27
  color: var(--ink);
28
- background: var(--bg);
 
 
 
 
29
  font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
30
  letter-spacing: 0;
31
  }
32
 
33
- button, select, input {
 
 
34
  font: inherit;
35
  }
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  .shell {
38
  min-height: 100vh;
39
- display: grid;
40
- grid-template-rows: auto 1fr;
41
  }
42
 
43
  header {
44
- display: flex;
 
 
 
 
 
45
  align-items: center;
46
- justify-content: space-between;
47
- gap: 16px;
48
- padding: 18px 24px;
49
- background: #111827;
50
- color: #f8fafc;
51
- border-bottom: 1px solid #0b1220;
52
  }
53
 
54
  .brand {
@@ -59,528 +149,819 @@
59
  }
60
 
61
  .mark {
62
- width: 34px;
63
- height: 34px;
 
 
64
  display: grid;
65
  place-items: center;
66
- border: 1px solid rgba(255,255,255,0.22);
67
- background: #0f766e;
68
- border-radius: 7px;
69
- font-weight: 800;
 
 
 
 
 
 
 
 
 
70
  }
71
 
72
  h1 {
73
- margin: 0;
74
  font-size: 20px;
75
- line-height: 1.1;
76
- font-weight: 780;
77
  }
78
 
79
  .subhead {
80
- margin-top: 3px;
81
- color: #cbd5e1;
82
  font-size: 13px;
83
- white-space: nowrap;
84
  overflow: hidden;
85
  text-overflow: ellipsis;
 
86
  }
87
 
88
  .header-actions {
89
  display: flex;
90
  align-items: center;
91
- gap: 10px;
92
- flex-wrap: wrap;
93
  justify-content: flex-end;
 
 
94
  }
95
 
96
- main {
97
- padding: 20px;
 
 
98
  display: grid;
99
- gap: 16px;
100
- grid-template-columns: minmax(320px, 1.05fr) minmax(320px, 1fr);
101
  grid-template-areas:
 
102
  "mission trust"
103
- "controls trust"
104
- "events metrics"
105
- "story story"
106
- "architecture themes";
107
  align-items: start;
108
- max-width: 1480px;
109
- width: 100%;
110
- margin: 0 auto;
111
  }
112
 
113
  section {
114
- background: var(--panel);
 
115
  border: 1px solid var(--line);
116
  border-radius: 8px;
117
  box-shadow: var(--shadow);
118
- min-width: 0;
119
  }
120
 
 
 
 
 
 
 
 
 
 
121
  .section-head {
122
- padding: 14px 16px 10px;
123
- border-bottom: 1px solid var(--line);
124
  display: flex;
125
  align-items: center;
126
  justify-content: space-between;
127
  gap: 12px;
 
 
 
128
  }
129
 
130
  h2 {
131
- margin: 0;
132
- font-size: 14px;
133
  text-transform: uppercase;
134
- color: #475467;
135
  letter-spacing: 0;
 
 
136
  }
137
 
138
- .mission { grid-area: mission; }
139
- .controls { grid-area: controls; }
140
- .trust { grid-area: trust; }
141
- .events { grid-area: events; }
142
- .metrics { grid-area: metrics; }
143
- .story { grid-area: story; }
144
- .architecture { grid-area: architecture; }
145
- .themes { grid-area: themes; }
146
-
147
  .body {
148
- padding: 16px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  }
150
 
151
- .grid {
 
 
 
 
152
  display: grid;
153
- grid-template-columns: repeat(4, minmax(0, 1fr));
154
  gap: 10px;
 
155
  }
156
 
157
- .stat {
158
- border: 1px solid var(--line);
159
- border-radius: 7px;
160
- padding: 12px;
161
- background: #fbfcfe;
162
- min-height: 76px;
163
  }
164
 
165
- .label {
 
166
  color: var(--muted);
167
- font-size: 12px;
168
  line-height: 1.2;
169
- margin-bottom: 8px;
170
  }
171
 
172
- .value {
173
- font-size: 22px;
174
- font-weight: 760;
175
- line-height: 1.05;
 
176
  overflow-wrap: anywhere;
177
  }
178
 
179
- .subtask {
180
- margin-top: 14px;
181
- padding: 14px;
182
- border: 1px solid var(--line);
183
- border-radius: 7px;
184
- background: #f8fafc;
185
- line-height: 1.45;
186
- color: #1f2937;
187
- min-height: 82px;
 
 
 
 
 
 
 
188
  }
189
 
190
- .toolbar {
191
- display: flex;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  gap: 10px;
193
- flex-wrap: wrap;
194
- align-items: center;
195
  }
196
 
197
- select, input {
198
- height: 38px;
199
- border: 1px solid #cbd5e1;
200
- background: #fff;
201
- border-radius: 7px;
202
- padding: 0 10px;
203
- color: #111827;
 
204
  }
205
 
206
- input {
207
- width: 84px;
 
 
 
 
 
 
208
  }
209
 
210
- button {
211
- height: 38px;
212
- border: 1px solid #cbd5e1;
213
- background: #fff;
214
- color: #111827;
215
- border-radius: 7px;
216
- padding: 0 12px;
217
- cursor: pointer;
218
- display: inline-flex;
 
 
 
 
 
 
219
  align-items: center;
 
220
  gap: 8px;
221
- white-space: nowrap;
 
 
222
  }
223
 
224
- button:hover {
225
- border-color: #94a3b8;
226
- background: #f8fafc;
 
227
  }
228
 
229
- button.primary {
230
- background: var(--accent);
231
- border-color: var(--accent);
232
- color: #fff;
 
233
  }
234
 
235
- button.danger {
236
- border-color: #fca5a5;
237
- color: #991b1b;
238
- background: #fff5f5;
 
 
 
239
  }
240
 
241
- button:disabled {
242
- cursor: not-allowed;
243
- opacity: 0.55;
 
 
 
 
244
  }
245
 
246
- .control-grid {
 
 
 
 
247
  display: grid;
248
- grid-template-columns: repeat(4, minmax(0, 1fr));
249
  gap: 10px;
 
250
  }
251
 
252
- .control-grid button {
253
- justify-content: center;
254
- width: 100%;
 
 
 
255
  }
256
 
257
- .trust-list {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  display: grid;
 
259
  gap: 10px;
260
  }
261
 
262
- .specialist {
 
 
 
 
 
 
263
  display: grid;
264
- grid-template-columns: 56px 1fr 54px;
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  align-items: center;
266
- gap: 12px;
267
- padding: 12px;
268
- border: 1px solid var(--line);
269
- border-radius: 7px;
270
- background: #fbfcfe;
271
  }
272
 
273
- .sid {
274
- font-weight: 760;
275
- color: #111827;
276
  }
277
 
278
- .bar {
279
- height: 12px;
280
- border-radius: 999px;
281
- background: #e5e7eb;
282
- overflow: hidden;
283
- position: relative;
284
  }
285
 
286
- .fill {
287
- height: 100%;
288
- width: 50%;
289
- background: var(--blue);
290
- border-radius: inherit;
291
- transition: width 220ms ease, background 220ms ease;
 
 
 
292
  }
293
 
294
- .score {
295
- text-align: right;
296
- font-variant-numeric: tabular-nums;
297
- color: #334155;
298
- font-weight: 700;
299
  }
300
 
301
- .chips {
302
- display: flex;
303
- flex-wrap: wrap;
304
- gap: 8px;
 
 
305
  }
306
 
307
- .chip {
308
- border-radius: 999px;
309
- padding: 5px 9px;
310
- background: #eef2ff;
311
- color: #3730a3;
312
  font-size: 12px;
313
- font-weight: 700;
 
314
  }
315
 
316
- .chip.live { background: #ecfdf3; color: #027a48; }
317
- .chip.warn { background: #fff7ed; color: #9a3412; }
318
- .chip.fail { background: #fef2f2; color: #b91c1c; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
  .progress {
321
- height: 10px;
322
- background: #e5e7eb;
323
  border-radius: 999px;
324
  overflow: hidden;
325
- margin-top: 12px;
326
  }
327
 
328
  .progress span {
329
  display: block;
330
  height: 100%;
331
  width: 0%;
332
- background: var(--accent);
333
  border-radius: inherit;
334
  transition: width 220ms ease;
335
  }
336
 
337
- .event-list {
338
  display: grid;
339
  gap: 8px;
340
- max-height: 360px;
341
- overflow: auto;
342
- padding-right: 4px;
343
  }
344
 
345
- .event {
346
- display: grid;
347
- grid-template-columns: 54px 1fr 64px;
348
- gap: 10px;
349
- align-items: start;
350
- padding: 10px;
351
- border: 1px solid var(--line);
352
- border-radius: 7px;
353
- background: #fbfcfe;
354
- font-size: 13px;
355
  }
356
 
357
- .event strong {
358
- color: #111827;
 
 
 
 
 
359
  }
360
 
361
- .event .reward {
362
- text-align: right;
363
- font-variant-numeric: tabular-nums;
364
- font-weight: 760;
365
- color: var(--accent);
366
  }
367
 
368
- .metric-grid {
369
  display: grid;
370
- grid-template-columns: repeat(3, minmax(0, 1fr));
371
- gap: 10px;
 
 
 
 
 
 
372
  }
373
 
374
- .story-grid {
375
- display: grid;
376
- grid-template-columns: minmax(0, 1fr) 72px minmax(0, 1fr);
377
- gap: 14px;
378
- align-items: stretch;
379
  }
380
 
381
- .contrast {
382
- border: 1px solid var(--line);
383
- border-radius: 8px;
384
- padding: 14px;
385
- background: #fbfcfe;
 
 
 
 
 
 
 
386
  }
387
 
388
- .contrast.before {
389
- border-color: #fecaca;
390
- background: #fff7f7;
 
 
391
  }
392
 
393
- .contrast.after {
394
- border-color: #a7f3d0;
395
- background: #f0fdf9;
 
 
396
  }
397
 
398
- .contrast-title {
399
- display: flex;
400
- justify-content: space-between;
401
  gap: 10px;
 
 
 
 
 
402
  align-items: center;
403
- font-weight: 780;
404
- margin-bottom: 12px;
405
- color: #111827;
 
406
  }
407
 
408
- .score-pill {
 
409
  border-radius: 999px;
410
- padding: 5px 9px;
411
- font-size: 12px;
412
- font-weight: 800;
413
- color: #fff;
414
- background: #475569;
415
- font-variant-numeric: tabular-nums;
 
 
 
 
 
 
 
 
 
 
 
 
416
  }
417
 
418
- .score-pill.bad { background: var(--red); }
419
- .score-pill.good { background: var(--accent); }
 
 
 
 
 
420
 
421
- .flow {
422
  display: grid;
423
  gap: 8px;
 
 
 
424
  }
425
 
426
- .flow-node {
427
- border: 1px solid var(--line);
428
- border-radius: 7px;
429
- background: #fff;
430
- padding: 10px;
431
- line-height: 1.35;
432
- min-height: 48px;
 
 
 
 
433
  }
434
 
435
- .before .flow-node.warn {
436
- border-color: #fca5a5;
437
- color: #7f1d1d;
438
- background: #fff1f2;
439
  }
440
 
441
- .after .flow-node.clean {
442
- border-color: #86efac;
443
- color: #064e3b;
444
- background: #ecfdf3;
 
445
  }
446
 
447
- .bridge {
448
  display: grid;
449
- place-items: center;
450
- min-height: 100%;
451
- color: #64748b;
452
- font-size: 12px;
453
- text-align: center;
454
- line-height: 1.35;
455
- border-left: 1px dashed #94a3b8;
456
- border-right: 1px dashed #94a3b8;
457
  }
458
 
459
- .role-grid, .theme-grid {
460
- display: grid;
461
- grid-template-columns: repeat(2, minmax(0, 1fr));
462
- gap: 10px;
 
 
 
463
  }
464
 
465
- .role, .theme-card {
466
- border: 1px solid var(--line);
467
- border-radius: 7px;
468
- padding: 12px;
469
- background: #fbfcfe;
470
- min-height: 88px;
 
 
 
 
 
 
471
  }
472
 
473
- .role strong, .theme-card strong {
474
  display: block;
475
- margin-bottom: 6px;
476
- color: #111827;
477
  }
478
 
479
- .theme-card.blue { border-color: #bfdbfe; background: #eff6ff; }
480
- .theme-card.green { border-color: #bbf7d0; background: #f0fdf4; }
481
- .theme-card.amber { border-color: #fde68a; background: #fffbeb; }
482
- .theme-card.magenta { border-color: #fbcfe8; background: #fdf2f8; }
 
 
483
 
484
- .status-strip {
485
  display: grid;
486
  grid-template-columns: repeat(4, minmax(0, 1fr));
487
  gap: 10px;
488
- margin-bottom: 14px;
489
  }
490
 
491
- .status-item {
492
- border: 1px solid #bbf7d0;
493
- background: #f0fdf4;
494
- color: #065f46;
495
- border-radius: 7px;
496
- padding: 10px 12px;
497
- font-size: 13px;
498
- font-weight: 720;
499
  }
500
 
501
- .baseline-bars {
502
- margin-top: 14px;
503
- display: grid;
504
- gap: 10px;
 
505
  }
506
 
507
- .baseline-row {
508
- display: grid;
509
- grid-template-columns: 118px 1fr 48px;
510
- align-items: center;
511
- gap: 10px;
512
  font-size: 13px;
513
  }
514
 
515
- .mini-bar {
516
- height: 10px;
517
- background: #e5e7eb;
518
- border-radius: 999px;
519
- overflow: hidden;
520
  }
521
 
522
- .mini-bar span {
523
- display: block;
524
- height: 100%;
525
- border-radius: inherit;
526
  }
527
 
528
- .muted {
529
- color: var(--muted);
 
530
  }
531
 
532
- @media (max-width: 980px) {
533
- header {
534
- align-items: flex-start;
535
- flex-direction: column;
536
- }
537
 
538
- main {
 
539
  grid-template-columns: 1fr;
540
  grid-template-areas:
 
 
541
  "mission"
542
  "trust"
543
- "controls"
544
- "metrics"
545
  "events"
546
- "story"
547
- "architecture"
548
  "themes";
549
- padding: 14px;
550
  }
551
 
552
- .grid, .control-grid, .metric-grid, .role-grid, .theme-grid, .status-strip {
553
- grid-template-columns: repeat(2, minmax(0, 1fr));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  }
555
 
556
- .story-grid {
 
 
 
 
 
557
  grid-template-columns: 1fr;
558
  }
559
 
560
- .bridge {
561
- min-height: 58px;
562
- border-left: 0;
563
- border-right: 0;
564
- border-top: 1px dashed #94a3b8;
565
- border-bottom: 1px dashed #94a3b8;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
  }
567
  }
568
 
569
  @media (max-width: 560px) {
570
- .grid, .control-grid, .metric-grid, .role-grid, .theme-grid, .status-strip {
 
 
 
 
 
 
 
 
 
571
  grid-template-columns: 1fr;
572
  }
573
 
574
- .specialist {
575
- grid-template-columns: 44px 1fr 48px;
 
 
 
 
576
  }
577
 
578
- .event {
579
- grid-template-columns: 44px 1fr;
580
  }
581
 
582
- .event .reward {
583
- grid-column: 2;
584
  text-align: left;
585
  }
586
  }
@@ -592,8 +973,8 @@
592
  <div class="brand">
593
  <div class="mark">S</div>
594
  <div>
595
- <h1>SENTINEL</h1>
596
- <div class="subhead">Multi-agent trust calibration environment</div>
597
  </div>
598
  </div>
599
  <div class="header-actions">
@@ -603,33 +984,144 @@
603
  <option value="task3" selected>Task 3 - Hard</option>
604
  </select>
605
  <input id="seedInput" aria-label="Seed" type="number" value="42">
606
- <button id="resetBtn" class="primary" type="button">Reset</button>
607
- <button id="swapBtn" type="button">Swap Profiles</button>
608
- <button id="autoBtn" type="button">Auto Run</button>
609
  </div>
610
  </header>
611
 
612
- <main>
613
- <section class="mission">
614
  <div class="section-head">
615
- <h2>Mission</h2>
616
  <div class="chips">
617
  <span id="statusChip" class="chip live">READY</span>
618
  <span id="scenarioChip" class="chip">SCENARIO</span>
619
  </div>
620
  </div>
621
  <div class="body">
622
- <div class="grid">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
  <div class="stat">
624
  <div class="label">Score</div>
625
  <div id="scoreValue" class="value">0.000</div>
626
  </div>
627
  <div class="stat">
628
- <div class="label">Step</div>
629
  <div id="stepValue" class="value">0/45</div>
630
  </div>
631
  <div class="stat">
632
- <div class="label">Complete</div>
633
  <div id="completeValue" class="value">0/20</div>
634
  </div>
635
  <div class="stat">
@@ -638,161 +1130,100 @@
638
  </div>
639
  </div>
640
  <div class="progress"><span id="progressFill"></span></div>
 
 
 
 
641
  <div id="subtaskText" class="subtask">Reset an episode to begin.</div>
642
  </div>
643
  </section>
644
 
645
  <section class="trust">
646
  <div class="section-head">
647
- <h2>Trust Ledger</h2>
648
- <div class="chips">
649
- <span id="detectChip" class="chip">0 detected</span>
650
- <span id="poisonChip" class="chip warn">0 poison</span>
651
- </div>
652
  </div>
653
  <div class="body">
654
  <div id="trustList" class="trust-list"></div>
655
  </div>
656
  </section>
657
 
658
- <section class="controls">
659
  <div class="section-head">
660
- <h2>Actions</h2>
661
- <div class="toolbar">
662
- <select id="specialistSelect" aria-label="Specialist"></select>
663
- <span id="recommendChip" class="chip">delegate:S0</span>
664
- </div>
665
  </div>
666
  <div class="body">
667
- <div class="control-grid">
668
- <button id="delegateBtn" class="primary" type="button">Delegate</button>
669
- <button id="verifyBtn" type="button">Verify</button>
670
- <button id="selfBtn" type="button">Self Solve</button>
671
- <button id="skipBtn" class="danger" type="button">Skip</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  </div>
673
  </div>
674
  </section>
675
 
676
  <section class="events">
677
  <div class="section-head">
678
- <h2>Episode Events</h2>
679
- <span id="sessionText" class="muted">No session</span>
680
  </div>
681
  <div class="body">
682
  <div id="eventList" class="event-list"></div>
683
  </div>
684
  </section>
685
 
686
- <section class="metrics">
687
- <div class="section-head">
688
- <h2>Baseline Proof</h2>
689
- <span class="muted">20 episodes per task</span>
690
- </div>
691
- <div class="body">
692
- <div class="metric-grid">
693
- <div class="stat">
694
- <div class="label">Random</div>
695
- <div class="value">0.714</div>
696
- </div>
697
- <div class="stat">
698
- <div class="label">Heuristic</div>
699
- <div class="value">0.816</div>
700
- </div>
701
- <div class="stat">
702
- <div class="label">Oracle-lite</div>
703
- <div class="value">0.872</div>
704
- </div>
705
- </div>
706
- <div class="baseline-bars">
707
- <div class="baseline-row">
708
- <span>Random</span>
709
- <div class="mini-bar"><span style="width:71.4%;background:#ef4444"></span></div>
710
- <strong>0.714</strong>
711
- </div>
712
- <div class="baseline-row">
713
- <span>Heuristic</span>
714
- <div class="mini-bar"><span style="width:81.6%;background:#2563eb"></span></div>
715
- <strong>0.816</strong>
716
- </div>
717
- <div class="baseline-row">
718
- <span>Oracle-lite</span>
719
- <div class="mini-bar"><span style="width:87.2%;background:#059669"></span></div>
720
- <strong>0.872</strong>
721
- </div>
722
- </div>
723
- </div>
724
- </section>
725
-
726
- <section class="story">
727
  <div class="section-head">
728
- <h2>Before And After</h2>
729
- <div class="chips">
730
- <span class="chip fail">blind trust fails</span>
731
- <span class="chip live">verification recovers</span>
732
- </div>
733
  </div>
734
  <div class="body">
735
- <div class="status-strip">
736
- <div class="status-item">HF Space live</div>
737
- <div class="status-item">OpenEnv validate green</div>
738
- <div class="status-item">Docker build green</div>
739
- <div class="status-item">Reward chart committed</div>
740
- </div>
741
- <div class="story-grid">
742
- <div class="contrast before">
743
- <div class="contrast-title">
744
- <span>Before SENTINEL</span>
745
- <span class="score-pill bad">T3 random 0.699</span>
746
- </div>
747
- <div class="flow">
748
- <div class="flow-node">Uniform trust: S0-S4 all start at 0.50</div>
749
- <div class="flow-node warn">High-stakes delegation can accept poisoned output</div>
750
- <div class="flow-node warn">Downstream subtasks inherit corrupted state</div>
751
- <div class="flow-node">Detection rate: 0.433</div>
752
- </div>
753
- </div>
754
- <div class="bridge">
755
- profile shuffle<br>
756
- skill not memory
757
  </div>
758
- <div class="contrast after">
759
- <div class="contrast-title">
760
- <span>After SENTINEL Policy</span>
761
- <span class="score-pill good">T3 oracle 0.895</span>
762
- </div>
763
- <div class="flow">
764
- <div class="flow-node clean">Trust ledger moves after each behavioral outcome</div>
765
- <div class="flow-node clean">High-stakes low-trust steps trigger verification</div>
766
- <div class="flow-node clean">Adversarial attempts are blocked before cascade</div>
767
- <div class="flow-node">Detection: heuristic 0.735, oracle 1.000</div>
768
- </div>
769
  </div>
770
- </div>
771
- </div>
772
- </section>
773
-
774
- <section class="architecture">
775
- <div class="section-head">
776
- <h2>Architecture</h2>
777
- <span class="muted">human-body model</span>
778
- </div>
779
- <div class="body">
780
- <div class="role-grid">
781
- <div class="role">
782
- <strong>Brain</strong>
783
- Orchestrator chooses delegate, verify, self solve, or skip.
784
  </div>
785
- <div class="role">
786
- <strong>Heart</strong>
787
- Environment owns reset, step, state, budget, and terminal scoring.
788
  </div>
789
- <div class="role">
790
- <strong>Immune System</strong>
791
- Trust ledger updates S0-S4 reliability from behavior.
792
  </div>
793
- <div class="role">
794
- <strong>Pathogen</strong>
795
- Adversarial specialist stays benign until high-stakes steps.
796
  </div>
797
  </div>
798
  </div>
@@ -800,26 +1231,26 @@
800
 
801
  <section class="themes">
802
  <div class="section-head">
803
- <h2>Theme Fit</h2>
804
- <span class="muted">finale story map</span>
805
  </div>
806
  <div class="body">
807
  <div class="theme-grid">
808
  <div class="theme-card blue">
809
  <strong>Theme 1</strong>
810
- Multi-agent orchestration with partial observability and adversarial dynamics.
811
  </div>
812
  <div class="theme-card green">
813
  <strong>Theme 2</strong>
814
- Long-horizon task graphs with delayed terminal reward and budget pressure.
815
  </div>
816
  <div class="theme-card amber">
817
  <strong>Theme 4</strong>
818
- Profile reshuffle creates an auto-curriculum with no identity memorization.
819
  </div>
820
  <div class="theme-card magenta">
821
- <strong>Theme 5</strong>
822
- Real AI systems problem: blind trust inside agent pipelines.
823
  </div>
824
  </div>
825
  </div>
@@ -842,10 +1273,14 @@
842
  taskSelect: document.getElementById("taskSelect"),
843
  seedInput: document.getElementById("seedInput"),
844
  resetBtn: document.getElementById("resetBtn"),
 
845
  swapBtn: document.getElementById("swapBtn"),
 
846
  autoBtn: document.getElementById("autoBtn"),
847
  specialistSelect: document.getElementById("specialistSelect"),
848
  recommendChip: document.getElementById("recommendChip"),
 
 
849
  delegateBtn: document.getElementById("delegateBtn"),
850
  verifyBtn: document.getElementById("verifyBtn"),
851
  selfBtn: document.getElementById("selfBtn"),
@@ -857,19 +1292,32 @@
857
  completeValue: document.getElementById("completeValue"),
858
  stakesValue: document.getElementById("stakesValue"),
859
  progressFill: document.getElementById("progressFill"),
 
860
  subtaskText: document.getElementById("subtaskText"),
861
  trustList: document.getElementById("trustList"),
 
862
  detectChip: document.getElementById("detectChip"),
863
  poisonChip: document.getElementById("poisonChip"),
864
  sessionText: document.getElementById("sessionText"),
865
- eventList: document.getElementById("eventList")
 
 
 
 
866
  };
867
 
868
  function trustColor(value) {
869
- if (value >= 0.72) return "#059669";
870
- if (value >= 0.48) return "#2563eb";
871
- if (value >= 0.30) return "#b45309";
872
- return "#dc2626";
 
 
 
 
 
 
 
873
  }
874
 
875
  function bestSpecialist() {
@@ -880,19 +1328,46 @@
880
  .sort((a, b) => (obs.trust_snapshot[b] ?? 0.5) - (obs.trust_snapshot[a] ?? 0.5))[0];
881
  }
882
 
 
 
 
 
 
 
 
 
 
 
883
  function renderTrust() {
884
  const trust = state.observation?.trust_snapshot || Object.fromEntries(ids.map(id => [id, 0.5]));
 
 
 
 
885
  el.trustList.innerHTML = ids.map(id => {
886
  const value = Number(trust[id] ?? 0.5);
887
  const pct = Math.round(value * 100);
 
888
  return `
889
  <div class="specialist">
890
  <div class="sid">${id}</div>
891
- <div class="bar"><div class="fill" style="width:${pct}%;background:${trustColor(value)}"></div></div>
892
  <div class="score">${value.toFixed(2)}</div>
893
  </div>
894
  `;
895
  }).join("");
 
 
 
 
 
 
 
 
 
 
 
 
896
  }
897
 
898
  function renderSpecialists() {
@@ -907,7 +1382,7 @@
907
  el.eventList.innerHTML = `<div class="muted">No events yet.</div>`;
908
  return;
909
  }
910
- el.eventList.innerHTML = state.events.slice(-16).reverse().map(item => `
911
  <div class="event">
912
  <strong>#${item.step}</strong>
913
  <div>${item.action}<br><span class="muted">${item.summary}</span></div>
@@ -916,6 +1391,30 @@
916
  `).join("");
917
  }
918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
919
  function render(result) {
920
  if (result) {
921
  state.observation = result.observation;
@@ -926,6 +1425,8 @@
926
  renderTrust();
927
  renderSpecialists();
928
  renderEvents();
 
 
929
  return;
930
  }
931
 
@@ -933,6 +1434,9 @@
933
  const completed = obs.subtasks_total - obs.subtasks_remaining;
934
  const progress = obs.subtasks_total ? (completed / obs.subtasks_total) * 100 : 0;
935
  const status = state.done ? "DONE" : obs.episode_status.toUpperCase();
 
 
 
936
 
937
  el.statusChip.textContent = status;
938
  el.statusChip.className = `chip ${state.done ? "live" : "live"}`;
@@ -942,11 +1446,14 @@
942
  el.completeValue.textContent = `${completed}/${obs.subtasks_total}`;
943
  el.stakesValue.textContent = Number(obs.stakes_level).toFixed(2);
944
  el.progressFill.style.width = `${Math.max(0, Math.min(100, progress))}%`;
945
- el.subtaskText.textContent = state.done ? "Episode complete." : obs.current_subtask;
946
- el.sessionText.textContent = state.sessionId ? state.sessionId.slice(0, 8) : "No session";
947
- el.detectChip.textContent = `${info.adversarial_detections ?? 0} detected`;
948
- el.poisonChip.textContent = `${info.adversarial_poisonings ?? 0} poison`;
949
- el.poisonChip.className = `chip ${(info.adversarial_poisonings ?? 0) > 0 ? "fail" : "warn"}`;
 
 
 
950
 
951
  renderTrust();
952
  renderSpecialists();
@@ -955,34 +1462,26 @@
955
  setDisabled(state.done || state.running);
956
  }
957
 
958
- function recommendedMove() {
959
- const obs = state.observation;
960
- if (!obs) return {type: "delegate", specialist: "S0"};
961
- const best = bestSpecialist();
962
- const trust = obs.trust_snapshot[best] ?? 0.5;
963
- const highStakes = obs.stakes_level >= 0.70;
964
- if (highStakes && trust < 0.65) return {type: "verify", specialist: best};
965
- return {type: "delegate", specialist: best};
966
- }
967
-
968
- function renderRecommendation() {
969
- if (!el.recommendChip) return;
970
- const move = recommendedMove();
971
- const label = `${move.type}:${move.specialist}`;
972
- el.recommendChip.textContent = label;
973
- el.recommendChip.className = `chip ${move.type === "verify" ? "warn" : "live"}`;
974
  }
975
 
976
- function setDisabled(disabled) {
977
- el.delegateBtn.disabled = disabled;
978
- el.verifyBtn.disabled = disabled;
979
- el.selfBtn.disabled = disabled;
980
- el.skipBtn.disabled = disabled;
 
 
 
 
 
981
  }
982
 
983
  async function resetEpisode() {
984
  state.running = true;
985
  el.resetBtn.disabled = true;
 
986
  try {
987
  const seed = Number(el.seedInput.value || 0);
988
  const response = await fetch("/reset", {
@@ -996,7 +1495,7 @@
996
  state.sessionId = result.info.session_id;
997
  state.events = [];
998
  state.done = false;
999
- addEvent(0, "reset", "Episode initialized.", "0.00");
1000
  render(result);
1001
  } catch (error) {
1002
  addEvent(0, "error", error.message, "0.00");
@@ -1004,26 +1503,11 @@
1004
  } finally {
1005
  state.running = false;
1006
  el.resetBtn.disabled = false;
 
1007
  setDisabled(state.done);
1008
  }
1009
  }
1010
 
1011
- function addEvent(step, action, summary, reward) {
1012
- state.events.push({step, action, summary, reward});
1013
- }
1014
-
1015
- function actionPayload(type, specialist) {
1016
- const obs = state.observation;
1017
- return {
1018
- session_id: state.sessionId,
1019
- task_type: obs.task_type,
1020
- action_type: type,
1021
- specialist_id: specialist,
1022
- subtask_response: type === "solve_independently" ? "SELF_SOLVED" : null,
1023
- reasoning: `ui-${type}${specialist ? "-" + specialist : ""}`
1024
- };
1025
- }
1026
-
1027
  async function stepEpisode(type, specialist = null) {
1028
  if (!state.sessionId || state.done || state.running) return;
1029
  state.running = true;
@@ -1054,14 +1538,19 @@
1054
  async function autoRun() {
1055
  if (!state.observation || state.done) await resetEpisode();
1056
  let guard = 0;
1057
- while (!state.done && guard < 60) {
1058
  const move = recommendedMove();
1059
  await stepEpisode(move.type, move.specialist);
1060
  guard += 1;
1061
- await new Promise(resolve => setTimeout(resolve, 180));
1062
  }
1063
  }
1064
 
 
 
 
 
 
1065
  async function swapProfiles() {
1066
  const nextSeed = Number(el.seedInput.value || 0) + 1;
1067
  el.seedInput.value = String(nextSeed);
@@ -1069,12 +1558,15 @@
1069
  }
1070
 
1071
  el.resetBtn.addEventListener("click", resetEpisode);
 
1072
  el.swapBtn.addEventListener("click", swapProfiles);
 
1073
  el.delegateBtn.addEventListener("click", () => stepEpisode("delegate"));
1074
  el.verifyBtn.addEventListener("click", () => stepEpisode("verify"));
1075
  el.selfBtn.addEventListener("click", () => stepEpisode("solve_independently"));
1076
  el.skipBtn.addEventListener("click", () => stepEpisode("skip"));
1077
  el.autoBtn.addEventListener("click", autoRun);
 
1078
 
1079
  render();
1080
  resetEpisode();
 
3
  <head>
4
  <meta charset="utf-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>SENTINEL Trust Mission Control</title>
7
  <style>
8
  :root {
9
+ --bg: #060706;
10
+ --panel: #10130f;
11
+ --panel-2: #151911;
12
+ --panel-3: #1b2017;
13
+ --ink: #f6f1df;
14
+ --muted: #aaa48e;
15
+ --faint: #6e705f;
16
+ --line: #343a2d;
17
+ --jade: #27e0a1;
18
+ --jade-soft: rgba(39, 224, 161, 0.14);
19
+ --amber: #f5ba41;
20
+ --amber-soft: rgba(245, 186, 65, 0.14);
21
+ --flame: #ff5f45;
22
+ --flame-soft: rgba(255, 95, 69, 0.14);
23
+ --blue: #73a7ff;
24
+ --blue-soft: rgba(115, 167, 255, 0.14);
25
+ --magenta: #e879f9;
26
+ --magenta-soft: rgba(232, 121, 249, 0.12);
27
+ --cream: #fff6d8;
28
+ --shadow: 0 22px 70px rgba(0, 0, 0, 0.38);
29
+ }
30
+
31
+ * {
32
+ box-sizing: border-box;
33
+ }
34
+
35
+ html {
36
+ background: var(--bg);
37
  }
38
 
 
 
39
  body {
40
  margin: 0;
41
  min-height: 100vh;
42
  color: var(--ink);
43
+ background:
44
+ linear-gradient(rgba(255, 255, 255, 0.025) 1px, transparent 1px),
45
+ linear-gradient(90deg, rgba(255, 255, 255, 0.022) 1px, transparent 1px),
46
+ linear-gradient(145deg, #060706 0%, #11130d 44%, #0b0e0a 100%);
47
+ background-size: 32px 32px, 32px 32px, auto;
48
  font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
49
  letter-spacing: 0;
50
  }
51
 
52
+ button,
53
+ select,
54
+ input {
55
  font: inherit;
56
  }
57
 
58
+ button {
59
+ min-height: 40px;
60
+ border: 1px solid #4a5241;
61
+ background: #171b13;
62
+ color: var(--ink);
63
+ border-radius: 8px;
64
+ padding: 0 13px;
65
+ cursor: pointer;
66
+ display: inline-flex;
67
+ align-items: center;
68
+ justify-content: center;
69
+ gap: 8px;
70
+ white-space: nowrap;
71
+ transition: transform 160ms ease, border-color 160ms ease, background 160ms ease;
72
+ }
73
+
74
+ button:hover {
75
+ border-color: #80906f;
76
+ background: #202719;
77
+ transform: translateY(-1px);
78
+ }
79
+
80
+ button:disabled {
81
+ cursor: not-allowed;
82
+ opacity: 0.52;
83
+ transform: none;
84
+ }
85
+
86
+ button.primary {
87
+ border-color: rgba(39, 224, 161, 0.72);
88
+ background: linear-gradient(180deg, #1e6b54, #134736);
89
+ color: #f7fffb;
90
+ }
91
+
92
+ button.warn {
93
+ border-color: rgba(245, 186, 65, 0.72);
94
+ background: linear-gradient(180deg, #73551d, #403012);
95
+ color: #fff4d0;
96
+ }
97
+
98
+ button.danger {
99
+ border-color: rgba(255, 95, 69, 0.72);
100
+ background: #2a1510;
101
+ color: #ffd8d1;
102
+ }
103
+
104
+ select,
105
+ input {
106
+ min-height: 40px;
107
+ border: 1px solid #4a5241;
108
+ background: #0f120d;
109
+ color: var(--ink);
110
+ border-radius: 8px;
111
+ padding: 0 11px;
112
+ outline: none;
113
+ }
114
+
115
+ select:focus,
116
+ input:focus,
117
+ button:focus {
118
+ border-color: var(--jade);
119
+ box-shadow: 0 0 0 3px rgba(39, 224, 161, 0.16);
120
+ }
121
+
122
+ input {
123
+ width: 90px;
124
+ }
125
+
126
  .shell {
127
  min-height: 100vh;
 
 
128
  }
129
 
130
  header {
131
+ position: sticky;
132
+ top: 0;
133
+ z-index: 20;
134
+ display: grid;
135
+ grid-template-columns: minmax(280px, 1fr) auto;
136
+ gap: 18px;
137
  align-items: center;
138
+ padding: 16px 22px;
139
+ background: rgba(7, 8, 6, 0.94);
140
+ border-bottom: 1px solid #2e3529;
141
+ backdrop-filter: blur(14px);
 
 
142
  }
143
 
144
  .brand {
 
149
  }
150
 
151
  .mark {
152
+ width: 42px;
153
+ height: 42px;
154
+ border: 1px solid rgba(39, 224, 161, 0.64);
155
+ border-radius: 8px;
156
  display: grid;
157
  place-items: center;
158
+ background:
159
+ linear-gradient(135deg, rgba(39, 224, 161, 0.28), rgba(245, 186, 65, 0.1)),
160
+ #10150f;
161
+ color: #dfffee;
162
+ font-weight: 860;
163
+ box-shadow: inset 0 0 22px rgba(39, 224, 161, 0.16);
164
+ }
165
+
166
+ h1,
167
+ h2,
168
+ h3,
169
+ p {
170
+ margin: 0;
171
  }
172
 
173
  h1 {
 
174
  font-size: 20px;
175
+ line-height: 1.05;
176
+ font-weight: 840;
177
  }
178
 
179
  .subhead {
180
+ margin-top: 5px;
181
+ color: var(--muted);
182
  font-size: 13px;
 
183
  overflow: hidden;
184
  text-overflow: ellipsis;
185
+ white-space: nowrap;
186
  }
187
 
188
  .header-actions {
189
  display: flex;
190
  align-items: center;
 
 
191
  justify-content: flex-end;
192
+ gap: 9px;
193
+ flex-wrap: wrap;
194
  }
195
 
196
+ .console {
197
+ width: min(1540px, 100%);
198
+ margin: 0 auto;
199
+ padding: 18px;
200
  display: grid;
201
+ gap: 14px;
202
+ grid-template-columns: minmax(420px, 1.35fr) minmax(340px, 0.85fr);
203
  grid-template-areas:
204
+ "theater command"
205
  "mission trust"
206
+ "proof events"
207
+ "flow themes";
 
 
208
  align-items: start;
 
 
 
209
  }
210
 
211
  section {
212
+ min-width: 0;
213
+ background: linear-gradient(180deg, rgba(255, 255, 255, 0.035), transparent 38%), var(--panel);
214
  border: 1px solid var(--line);
215
  border-radius: 8px;
216
  box-shadow: var(--shadow);
217
+ overflow: hidden;
218
  }
219
 
220
+ .theater { grid-area: theater; }
221
+ .command { grid-area: command; }
222
+ .mission { grid-area: mission; }
223
+ .trust { grid-area: trust; }
224
+ .proof { grid-area: proof; }
225
+ .events { grid-area: events; }
226
+ .flow { grid-area: flow; }
227
+ .themes { grid-area: themes; }
228
+
229
  .section-head {
230
+ min-height: 54px;
 
231
  display: flex;
232
  align-items: center;
233
  justify-content: space-between;
234
  gap: 12px;
235
+ padding: 13px 15px;
236
+ border-bottom: 1px solid var(--line);
237
+ background: rgba(0, 0, 0, 0.14);
238
  }
239
 
240
  h2 {
241
+ font-size: 12px;
 
242
  text-transform: uppercase;
 
243
  letter-spacing: 0;
244
+ color: #d6d0b7;
245
+ font-weight: 820;
246
  }
247
 
 
 
 
 
 
 
 
 
 
248
  .body {
249
+ padding: 15px;
250
+ }
251
+
252
+ .chips {
253
+ display: flex;
254
+ flex-wrap: wrap;
255
+ gap: 8px;
256
+ align-items: center;
257
+ justify-content: flex-end;
258
+ }
259
+
260
+ .chip {
261
+ display: inline-flex;
262
+ align-items: center;
263
+ justify-content: center;
264
+ min-height: 27px;
265
+ border: 1px solid #46503d;
266
+ border-radius: 999px;
267
+ padding: 0 9px;
268
+ font-size: 12px;
269
+ font-weight: 760;
270
+ color: #d8d1ba;
271
+ background: #161a13;
272
+ font-variant-numeric: tabular-nums;
273
+ }
274
+
275
+ .chip.live {
276
+ border-color: rgba(39, 224, 161, 0.45);
277
+ color: #a7f9d8;
278
+ background: var(--jade-soft);
279
+ }
280
+
281
+ .chip.warn {
282
+ border-color: rgba(245, 186, 65, 0.46);
283
+ color: #ffe0a3;
284
+ background: var(--amber-soft);
285
+ }
286
+
287
+ .chip.fail {
288
+ border-color: rgba(255, 95, 69, 0.52);
289
+ color: #ffc3b8;
290
+ background: var(--flame-soft);
291
+ }
292
+
293
+ .muted {
294
+ color: var(--muted);
295
+ }
296
+
297
+ .stage {
298
+ position: relative;
299
+ min-height: 464px;
300
+ border: 1px solid #333c2b;
301
+ border-radius: 8px;
302
+ background:
303
+ linear-gradient(90deg, transparent 0, transparent calc(50% - 1px), rgba(245, 186, 65, 0.28) 50%, transparent calc(50% + 1px), transparent 100%),
304
+ linear-gradient(rgba(39, 224, 161, 0.055) 1px, transparent 1px),
305
+ linear-gradient(90deg, rgba(39, 224, 161, 0.04) 1px, transparent 1px),
306
+ #080a08;
307
+ background-size: auto, 26px 26px, 26px 26px, auto;
308
+ overflow: hidden;
309
+ }
310
+
311
+ .stage::after {
312
+ content: "";
313
+ position: absolute;
314
+ inset: 0;
315
+ pointer-events: none;
316
+ border: 1px solid rgba(255, 255, 255, 0.045);
317
+ border-radius: inherit;
318
  }
319
 
320
+ .stage-topline {
321
+ position: absolute;
322
+ top: 13px;
323
+ left: 14px;
324
+ right: 14px;
325
  display: grid;
326
+ grid-template-columns: repeat(3, minmax(0, 1fr));
327
  gap: 10px;
328
+ z-index: 2;
329
  }
330
 
331
+ .stage-label {
332
+ min-height: 46px;
333
+ border: 1px solid #343c2e;
334
+ border-radius: 8px;
335
+ padding: 8px 10px;
336
+ background: rgba(13, 16, 11, 0.86);
337
  }
338
 
339
+ .stage-label span {
340
+ display: block;
341
  color: var(--muted);
342
+ font-size: 11px;
343
  line-height: 1.2;
 
344
  }
345
 
346
+ .stage-label strong {
347
+ display: block;
348
+ margin-top: 4px;
349
+ font-size: 15px;
350
+ color: var(--cream);
351
  overflow-wrap: anywhere;
352
  }
353
 
354
+ .brain-card {
355
+ position: absolute;
356
+ top: 95px;
357
+ left: 50%;
358
+ transform: translateX(-50%);
359
+ width: min(360px, calc(100% - 32px));
360
+ min-height: 96px;
361
+ border: 1px solid rgba(39, 224, 161, 0.56);
362
+ border-radius: 8px;
363
+ padding: 15px;
364
+ background:
365
+ linear-gradient(180deg, rgba(39, 224, 161, 0.18), rgba(39, 224, 161, 0.035)),
366
+ #0f1711;
367
+ box-shadow: 0 0 0 1px rgba(39, 224, 161, 0.06), 0 18px 42px rgba(0, 0, 0, 0.42);
368
+ text-align: center;
369
+ z-index: 3;
370
  }
371
 
372
+ .node-title {
373
+ font-size: 18px;
374
+ font-weight: 820;
375
+ color: #ecfff5;
376
+ }
377
+
378
+ .node-copy {
379
+ margin-top: 6px;
380
+ color: #b8d7c5;
381
+ font-size: 13px;
382
+ line-height: 1.35;
383
+ }
384
+
385
+ .signal-line {
386
+ position: absolute;
387
+ left: 50%;
388
+ top: 190px;
389
+ bottom: 178px;
390
+ width: 1px;
391
+ background: linear-gradient(180deg, rgba(39, 224, 161, 0.7), rgba(245, 186, 65, 0.6), rgba(255, 95, 69, 0.55));
392
+ opacity: 0.9;
393
+ z-index: 1;
394
+ }
395
+
396
+ .specialist-grid {
397
+ position: absolute;
398
+ left: 15px;
399
+ right: 15px;
400
+ bottom: 96px;
401
+ display: grid;
402
+ grid-template-columns: repeat(5, minmax(92px, 1fr));
403
  gap: 10px;
404
+ z-index: 3;
 
405
  }
406
 
407
+ .node {
408
+ min-height: 116px;
409
+ border: 1px solid #3a4433;
410
+ border-radius: 8px;
411
+ padding: 10px;
412
+ background: rgba(16, 19, 15, 0.94);
413
+ position: relative;
414
+ overflow: hidden;
415
  }
416
 
417
+ .node::before {
418
+ content: "";
419
+ position: absolute;
420
+ left: 0;
421
+ right: 0;
422
+ top: 0;
423
+ height: 3px;
424
+ background: var(--node-color, var(--blue));
425
  }
426
 
427
+ .node.active {
428
+ border-color: var(--jade);
429
+ box-shadow: 0 0 0 2px rgba(39, 224, 161, 0.13);
430
+ }
431
+
432
+ .node.watch {
433
+ border-color: rgba(245, 186, 65, 0.72);
434
+ }
435
+
436
+ .node.quarantine {
437
+ border-color: rgba(255, 95, 69, 0.74);
438
+ }
439
+
440
+ .node-id {
441
+ display: flex;
442
  align-items: center;
443
+ justify-content: space-between;
444
  gap: 8px;
445
+ color: #f9f3d9;
446
+ font-weight: 820;
447
+ font-size: 16px;
448
  }
449
 
450
+ .node-trust {
451
+ margin-top: 14px;
452
+ display: grid;
453
+ gap: 8px;
454
  }
455
 
456
+ .trust-rail {
457
+ height: 10px;
458
+ border-radius: 999px;
459
+ background: #2a3024;
460
+ overflow: hidden;
461
  }
462
 
463
+ .trust-rail span {
464
+ display: block;
465
+ height: 100%;
466
+ width: calc(var(--trust, 0.5) * 100%);
467
+ border-radius: inherit;
468
+ background: var(--node-color, var(--blue));
469
+ transition: width 220ms ease, background 220ms ease;
470
  }
471
 
472
+ .node-meta {
473
+ display: flex;
474
+ justify-content: space-between;
475
+ gap: 8px;
476
+ color: var(--muted);
477
+ font-size: 12px;
478
+ font-variant-numeric: tabular-nums;
479
  }
480
 
481
+ .outcome-strip {
482
+ position: absolute;
483
+ left: 15px;
484
+ right: 15px;
485
+ bottom: 14px;
486
  display: grid;
487
+ grid-template-columns: repeat(3, minmax(0, 1fr));
488
  gap: 10px;
489
+ z-index: 3;
490
  }
491
 
492
+ .outcome {
493
+ min-height: 62px;
494
+ border: 1px solid #373f31;
495
+ border-radius: 8px;
496
+ padding: 10px;
497
+ background: rgba(11, 13, 9, 0.91);
498
  }
499
 
500
+ .outcome span {
501
+ color: var(--muted);
502
+ font-size: 11px;
503
+ display: block;
504
+ }
505
+
506
+ .outcome strong {
507
+ display: block;
508
+ margin-top: 5px;
509
+ font-size: 15px;
510
+ color: var(--cream);
511
+ overflow-wrap: anywhere;
512
+ }
513
+
514
+ .command-grid {
515
+ display: grid;
516
+ gap: 12px;
517
+ }
518
+
519
+ .field-row {
520
  display: grid;
521
+ grid-template-columns: 1fr 96px;
522
  gap: 10px;
523
  }
524
 
525
+ .button-row {
526
+ display: grid;
527
+ grid-template-columns: repeat(2, minmax(0, 1fr));
528
+ gap: 10px;
529
+ }
530
+
531
+ .action-row {
532
  display: grid;
533
+ grid-template-columns: repeat(4, minmax(0, 1fr));
534
+ gap: 9px;
535
+ }
536
+
537
+ .decision-card {
538
+ border: 1px solid #394132;
539
+ border-radius: 8px;
540
+ padding: 13px;
541
+ background: var(--panel-2);
542
+ }
543
+
544
+ .decision-title {
545
+ display: flex;
546
+ justify-content: space-between;
547
  align-items: center;
548
+ gap: 10px;
549
+ margin-bottom: 10px;
 
 
 
550
  }
551
 
552
+ .decision-title strong {
553
+ color: var(--cream);
 
554
  }
555
 
556
+ .recommendation {
557
+ display: grid;
558
+ grid-template-columns: 1fr auto;
559
+ gap: 10px;
560
+ align-items: center;
561
+ margin-top: 10px;
562
  }
563
 
564
+ .recommendation-output {
565
+ min-height: 44px;
566
+ border: 1px solid rgba(39, 224, 161, 0.32);
567
+ border-radius: 8px;
568
+ padding: 9px 10px;
569
+ background: rgba(39, 224, 161, 0.08);
570
+ color: #c9ffe8;
571
+ font-weight: 820;
572
+ overflow-wrap: anywhere;
573
  }
574
 
575
+ .stats-grid {
576
+ display: grid;
577
+ grid-template-columns: repeat(4, minmax(0, 1fr));
578
+ gap: 10px;
 
579
  }
580
 
581
+ .stat {
582
+ min-height: 82px;
583
+ border: 1px solid #394132;
584
+ border-radius: 8px;
585
+ padding: 11px;
586
+ background: var(--panel-2);
587
  }
588
 
589
+ .label {
590
+ color: var(--muted);
 
 
 
591
  font-size: 12px;
592
+ line-height: 1.2;
593
+ margin-bottom: 8px;
594
  }
595
 
596
+ .value {
597
+ font-size: 24px;
598
+ line-height: 1;
599
+ color: var(--cream);
600
+ font-weight: 840;
601
+ font-variant-numeric: tabular-nums;
602
+ overflow-wrap: anywhere;
603
+ }
604
+
605
+ .subtask {
606
+ margin-top: 12px;
607
+ min-height: 84px;
608
+ border: 1px solid #3a4433;
609
+ border-radius: 8px;
610
+ padding: 13px;
611
+ background: #0d100b;
612
+ color: #e8e1c9;
613
+ line-height: 1.45;
614
+ }
615
 
616
  .progress {
617
+ height: 12px;
618
+ margin-top: 12px;
619
  border-radius: 999px;
620
  overflow: hidden;
621
+ background: #2a3024;
622
  }
623
 
624
  .progress span {
625
  display: block;
626
  height: 100%;
627
  width: 0%;
628
+ background: linear-gradient(90deg, var(--flame), var(--amber), var(--jade));
629
  border-radius: inherit;
630
  transition: width 220ms ease;
631
  }
632
 
633
+ .risk-meter {
634
  display: grid;
635
  gap: 8px;
636
+ margin-top: 12px;
 
 
637
  }
638
 
639
+ .risk-track {
640
+ height: 12px;
641
+ border-radius: 999px;
642
+ background: #2a3024;
643
+ overflow: hidden;
 
 
 
 
 
644
  }
645
 
646
+ .risk-track span {
647
+ display: block;
648
+ height: 100%;
649
+ width: 0%;
650
+ background: linear-gradient(90deg, var(--jade), var(--amber), var(--flame));
651
+ border-radius: inherit;
652
+ transition: width 220ms ease;
653
  }
654
 
655
+ .trust-list {
656
+ display: grid;
657
+ gap: 9px;
 
 
658
  }
659
 
660
+ .specialist {
661
  display: grid;
662
+ grid-template-columns: 50px 1fr 58px;
663
+ gap: 11px;
664
+ align-items: center;
665
+ min-height: 52px;
666
+ border: 1px solid #394132;
667
+ border-radius: 8px;
668
+ padding: 10px;
669
+ background: var(--panel-2);
670
  }
671
 
672
+ .sid {
673
+ color: var(--cream);
674
+ font-weight: 820;
 
 
675
  }
676
 
677
+ .bar {
678
+ height: 12px;
679
+ border-radius: 999px;
680
+ background: #2a3024;
681
+ overflow: hidden;
682
+ }
683
+
684
+ .fill {
685
+ height: 100%;
686
+ width: 50%;
687
+ border-radius: inherit;
688
+ transition: width 220ms ease, background 220ms ease;
689
  }
690
 
691
+ .score {
692
+ text-align: right;
693
+ color: #e8e1c9;
694
+ font-weight: 820;
695
+ font-variant-numeric: tabular-nums;
696
  }
697
 
698
+ .proof-grid {
699
+ display: grid;
700
+ grid-template-columns: minmax(280px, 0.95fr) minmax(300px, 1.05fr);
701
+ gap: 13px;
702
+ align-items: stretch;
703
  }
704
 
705
+ .baseline-table {
706
+ display: grid;
 
707
  gap: 10px;
708
+ }
709
+
710
+ .baseline-row {
711
+ display: grid;
712
+ grid-template-columns: 98px 1fr 56px;
713
  align-items: center;
714
+ gap: 10px;
715
+ min-height: 39px;
716
+ color: #e9e2ca;
717
+ font-size: 13px;
718
  }
719
 
720
+ .mini-bar {
721
+ height: 12px;
722
  border-radius: 999px;
723
+ background: #2a3024;
724
+ overflow: hidden;
725
+ }
726
+
727
+ .mini-bar span {
728
+ display: block;
729
+ height: 100%;
730
+ border-radius: inherit;
731
+ }
732
+
733
+ .chart-frame {
734
+ min-height: 232px;
735
+ border: 1px solid #394132;
736
+ border-radius: 8px;
737
+ background: #f7f4ec;
738
+ padding: 8px;
739
+ display: grid;
740
+ place-items: center;
741
  }
742
 
743
+ .chart-frame img {
744
+ display: block;
745
+ width: 100%;
746
+ max-height: 360px;
747
+ object-fit: contain;
748
+ border-radius: 4px;
749
+ }
750
 
751
+ .event-list {
752
  display: grid;
753
  gap: 8px;
754
+ max-height: 432px;
755
+ overflow: auto;
756
+ padding-right: 2px;
757
  }
758
 
759
+ .event {
760
+ display: grid;
761
+ grid-template-columns: 48px 1fr 58px;
762
+ gap: 10px;
763
+ align-items: start;
764
+ min-height: 52px;
765
+ border: 1px solid #394132;
766
+ border-radius: 8px;
767
+ padding: 9px;
768
+ background: var(--panel-2);
769
+ font-size: 13px;
770
  }
771
 
772
+ .event strong {
773
+ color: var(--cream);
 
 
774
  }
775
 
776
+ .event .reward {
777
+ text-align: right;
778
+ color: var(--jade);
779
+ font-weight: 840;
780
+ font-variant-numeric: tabular-nums;
781
  }
782
 
783
+ .flow-line {
784
  display: grid;
785
+ grid-template-columns: repeat(6, minmax(0, 1fr));
786
+ gap: 9px;
 
 
 
 
 
 
787
  }
788
 
789
+ .flow-step {
790
+ position: relative;
791
+ min-height: 112px;
792
+ border: 1px solid #394132;
793
+ border-radius: 8px;
794
+ padding: 12px;
795
+ background: var(--panel-2);
796
  }
797
 
798
+ .flow-step::before {
799
+ content: attr(data-num);
800
+ display: grid;
801
+ place-items: center;
802
+ width: 28px;
803
+ height: 28px;
804
+ border-radius: 50%;
805
+ border: 1px solid rgba(39, 224, 161, 0.45);
806
+ color: #b7ffdf;
807
+ background: #101a13;
808
+ font-weight: 820;
809
+ margin-bottom: 10px;
810
  }
811
 
812
+ .flow-step strong {
813
  display: block;
814
+ color: var(--cream);
815
+ margin-bottom: 7px;
816
  }
817
 
818
+ .flow-step span {
819
+ display: block;
820
+ color: var(--muted);
821
+ font-size: 12px;
822
+ line-height: 1.35;
823
+ }
824
 
825
+ .theme-grid {
826
  display: grid;
827
  grid-template-columns: repeat(4, minmax(0, 1fr));
828
  gap: 10px;
 
829
  }
830
 
831
+ .theme-card {
832
+ min-height: 132px;
833
+ border: 1px solid #394132;
834
+ border-radius: 8px;
835
+ padding: 12px;
836
+ background: var(--panel-2);
 
 
837
  }
838
 
839
+ .theme-card strong {
840
+ display: block;
841
+ color: var(--cream);
842
+ margin-bottom: 8px;
843
+ font-size: 15px;
844
  }
845
 
846
+ .theme-card span {
847
+ display: block;
848
+ color: var(--muted);
849
+ line-height: 1.38;
 
850
  font-size: 13px;
851
  }
852
 
853
+ .theme-card.blue {
854
+ border-color: rgba(115, 167, 255, 0.4);
855
+ background: var(--blue-soft);
 
 
856
  }
857
 
858
+ .theme-card.green {
859
+ border-color: rgba(39, 224, 161, 0.4);
860
+ background: var(--jade-soft);
 
861
  }
862
 
863
+ .theme-card.amber {
864
+ border-color: rgba(245, 186, 65, 0.4);
865
+ background: var(--amber-soft);
866
  }
867
 
868
+ .theme-card.magenta {
869
+ border-color: rgba(232, 121, 249, 0.4);
870
+ background: var(--magenta-soft);
871
+ }
 
872
 
873
+ @media (max-width: 1180px) {
874
+ .console {
875
  grid-template-columns: 1fr;
876
  grid-template-areas:
877
+ "theater"
878
+ "command"
879
  "mission"
880
  "trust"
881
+ "proof"
 
882
  "events"
883
+ "flow"
 
884
  "themes";
 
885
  }
886
 
887
+ header {
888
+ grid-template-columns: 1fr;
889
+ }
890
+
891
+ .header-actions {
892
+ justify-content: flex-start;
893
+ }
894
+ }
895
+
896
+ @media (max-width: 860px) {
897
+ .console {
898
+ padding: 12px;
899
+ }
900
+
901
+ .stage {
902
+ min-height: 620px;
903
  }
904
 
905
+ .stage-topline,
906
+ .outcome-strip,
907
+ .proof-grid,
908
+ .flow-line,
909
+ .theme-grid,
910
+ .stats-grid {
911
  grid-template-columns: 1fr;
912
  }
913
 
914
+ .brain-card {
915
+ top: 190px;
916
+ }
917
+
918
+ .signal-line {
919
+ display: none;
920
+ }
921
+
922
+ .specialist-grid {
923
+ top: 315px;
924
+ bottom: auto;
925
+ grid-template-columns: repeat(2, minmax(0, 1fr));
926
+ }
927
+
928
+ .outcome-strip {
929
+ bottom: 12px;
930
+ }
931
+
932
+ .action-row {
933
+ grid-template-columns: repeat(2, minmax(0, 1fr));
934
  }
935
  }
936
 
937
  @media (max-width: 560px) {
938
+ header {
939
+ padding: 13px;
940
+ }
941
+
942
+ .field-row,
943
+ .button-row,
944
+ .recommendation,
945
+ .specialist,
946
+ .event,
947
+ .baseline-row {
948
  grid-template-columns: 1fr;
949
  }
950
 
951
+ input {
952
+ width: 100%;
953
+ }
954
+
955
+ .specialist-grid {
956
+ grid-template-columns: 1fr;
957
  }
958
 
959
+ .stage {
960
+ min-height: 980px;
961
  }
962
 
963
+ .event .reward,
964
+ .score {
965
  text-align: left;
966
  }
967
  }
 
973
  <div class="brand">
974
  <div class="mark">S</div>
975
  <div>
976
+ <h1>SENTINEL Trust Mission Control</h1>
977
+ <div class="subhead">OpenEnv RL environment for adversarial multi-agent trust calibration</div>
978
  </div>
979
  </div>
980
  <div class="header-actions">
 
984
  <option value="task3" selected>Task 3 - Hard</option>
985
  </select>
986
  <input id="seedInput" aria-label="Seed" type="number" value="42">
987
+ <button id="resetBtn" class="primary" type="button">Reset Episode</button>
988
+ <button id="swapBtn" class="warn" type="button">Swap Profiles</button>
989
+ <button id="autoBtn" type="button">Auto Policy</button>
990
  </div>
991
  </header>
992
 
993
+ <main class="console">
994
+ <section class="theater">
995
  <div class="section-head">
996
+ <h2>Live Trust Theater</h2>
997
  <div class="chips">
998
  <span id="statusChip" class="chip live">READY</span>
999
  <span id="scenarioChip" class="chip">SCENARIO</span>
1000
  </div>
1001
  </div>
1002
  <div class="body">
1003
+ <div class="stage">
1004
+ <div class="stage-topline">
1005
+ <div class="stage-label">
1006
+ <span>Profile rule</span>
1007
+ <strong>reshuffle on reset</strong>
1008
+ </div>
1009
+ <div class="stage-label">
1010
+ <span>Observation rule</span>
1011
+ <strong>behavior only</strong>
1012
+ </div>
1013
+ <div class="stage-label">
1014
+ <span>Failure mode</span>
1015
+ <strong>high-stakes poison</strong>
1016
+ </div>
1017
+ </div>
1018
+
1019
+ <div class="brain-card">
1020
+ <div class="node-title">Orchestrator</div>
1021
+ <div id="leadMove" class="node-copy">Reset starts a fresh trust game.</div>
1022
+ </div>
1023
+ <div class="signal-line"></div>
1024
+
1025
+ <div class="specialist-grid" id="networkGrid">
1026
+ <div id="node-S0" class="node" style="--trust:0.5;--node-color:#73a7ff">
1027
+ <div class="node-id"><span>S0</span><span>0.50</span></div>
1028
+ <div class="node-trust"><div class="trust-rail"><span></span></div></div>
1029
+ <div class="node-meta"><span>public slot</span><span>watch</span></div>
1030
+ </div>
1031
+ <div id="node-S1" class="node" style="--trust:0.5;--node-color:#73a7ff">
1032
+ <div class="node-id"><span>S1</span><span>0.50</span></div>
1033
+ <div class="node-trust"><div class="trust-rail"><span></span></div></div>
1034
+ <div class="node-meta"><span>public slot</span><span>watch</span></div>
1035
+ </div>
1036
+ <div id="node-S2" class="node" style="--trust:0.5;--node-color:#73a7ff">
1037
+ <div class="node-id"><span>S2</span><span>0.50</span></div>
1038
+ <div class="node-trust"><div class="trust-rail"><span></span></div></div>
1039
+ <div class="node-meta"><span>public slot</span><span>watch</span></div>
1040
+ </div>
1041
+ <div id="node-S3" class="node" style="--trust:0.5;--node-color:#73a7ff">
1042
+ <div class="node-id"><span>S3</span><span>0.50</span></div>
1043
+ <div class="node-trust"><div class="trust-rail"><span></span></div></div>
1044
+ <div class="node-meta"><span>public slot</span><span>watch</span></div>
1045
+ </div>
1046
+ <div id="node-S4" class="node" style="--trust:0.5;--node-color:#73a7ff">
1047
+ <div class="node-id"><span>S4</span><span>0.50</span></div>
1048
+ <div class="node-trust"><div class="trust-rail"><span></span></div></div>
1049
+ <div class="node-meta"><span>public slot</span><span>watch</span></div>
1050
+ </div>
1051
+ </div>
1052
+
1053
+ <div class="outcome-strip">
1054
+ <div class="outcome">
1055
+ <span>Recommended move</span>
1056
+ <strong id="stageMove">delegate:S0</strong>
1057
+ </div>
1058
+ <div class="outcome">
1059
+ <span>Adversarial signals</span>
1060
+ <strong id="stageSignals">0 detected / 0 poison</strong>
1061
+ </div>
1062
+ <div class="outcome">
1063
+ <span>Trust objective</span>
1064
+ <strong>skill, not identity</strong>
1065
+ </div>
1066
+ </div>
1067
+ </div>
1068
+ </div>
1069
+ </section>
1070
+
1071
+ <section class="command">
1072
+ <div class="section-head">
1073
+ <h2>Command Deck</h2>
1074
+ <span id="sessionText" class="muted">No session</span>
1075
+ </div>
1076
+ <div class="body">
1077
+ <div class="command-grid">
1078
+ <div class="decision-card">
1079
+ <div class="decision-title">
1080
+ <strong>Route Decision</strong>
1081
+ <span id="recommendChip" class="chip live">delegate:S0</span>
1082
+ </div>
1083
+ <select id="specialistSelect" aria-label="Specialist"></select>
1084
+ <div class="recommendation">
1085
+ <div id="recommendText" class="recommendation-output">Waiting for episode state.</div>
1086
+ <button id="applyRecommendBtn" class="primary" type="button">Apply</button>
1087
+ </div>
1088
+ </div>
1089
+
1090
+ <div class="action-row">
1091
+ <button id="delegateBtn" class="primary" type="button">Delegate</button>
1092
+ <button id="verifyBtn" class="warn" type="button">Verify</button>
1093
+ <button id="selfBtn" type="button">Self Solve</button>
1094
+ <button id="skipBtn" class="danger" type="button">Skip</button>
1095
+ </div>
1096
+
1097
+ <div class="button-row">
1098
+ <button id="resetPanelBtn" type="button">New Seed Run</button>
1099
+ <button id="swapPanelBtn" class="warn" type="button">Profile Swap</button>
1100
+ </div>
1101
+ </div>
1102
+ </div>
1103
+ </section>
1104
+
1105
+ <section class="mission">
1106
+ <div class="section-head">
1107
+ <h2>Mission State</h2>
1108
+ <div class="chips">
1109
+ <span id="detectChip" class="chip live">0 detected</span>
1110
+ <span id="poisonChip" class="chip warn">0 poison</span>
1111
+ </div>
1112
+ </div>
1113
+ <div class="body">
1114
+ <div class="stats-grid">
1115
  <div class="stat">
1116
  <div class="label">Score</div>
1117
  <div id="scoreValue" class="value">0.000</div>
1118
  </div>
1119
  <div class="stat">
1120
+ <div class="label">Step Budget</div>
1121
  <div id="stepValue" class="value">0/45</div>
1122
  </div>
1123
  <div class="stat">
1124
+ <div class="label">Subtasks Done</div>
1125
  <div id="completeValue" class="value">0/20</div>
1126
  </div>
1127
  <div class="stat">
 
1130
  </div>
1131
  </div>
1132
  <div class="progress"><span id="progressFill"></span></div>
1133
+ <div class="risk-meter">
1134
+ <div class="label">Risk gate</div>
1135
+ <div class="risk-track"><span id="riskFill"></span></div>
1136
+ </div>
1137
  <div id="subtaskText" class="subtask">Reset an episode to begin.</div>
1138
  </div>
1139
  </section>
1140
 
1141
  <section class="trust">
1142
  <div class="section-head">
1143
+ <h2>Bayesian Trust Ledger</h2>
1144
+ <span id="trustMean" class="muted">mean 0.50</span>
 
 
 
1145
  </div>
1146
  <div class="body">
1147
  <div id="trustList" class="trust-list"></div>
1148
  </div>
1149
  </section>
1150
 
1151
+ <section class="proof">
1152
  <div class="section-head">
1153
+ <h2>Reward Signal Proof</h2>
1154
+ <span class="muted">random to heuristic to oracle-lite</span>
 
 
 
1155
  </div>
1156
  <div class="body">
1157
+ <div class="proof-grid">
1158
+ <div class="baseline-table">
1159
+ <div class="baseline-row">
1160
+ <span>Random</span>
1161
+ <div class="mini-bar"><span style="width:71.4%;background:#ff5f45"></span></div>
1162
+ <strong>0.714</strong>
1163
+ </div>
1164
+ <div class="baseline-row">
1165
+ <span>Heuristic</span>
1166
+ <div class="mini-bar"><span style="width:81.6%;background:#73a7ff"></span></div>
1167
+ <strong>0.816</strong>
1168
+ </div>
1169
+ <div class="baseline-row">
1170
+ <span>Oracle-lite</span>
1171
+ <div class="mini-bar"><span style="width:87.2%;background:#27e0a1"></span></div>
1172
+ <strong>0.872</strong>
1173
+ </div>
1174
+ <div class="baseline-row">
1175
+ <span>T3 detect</span>
1176
+ <div class="mini-bar"><span style="width:73.5%;background:#f5ba41"></span></div>
1177
+ <strong>0.735</strong>
1178
+ </div>
1179
+ </div>
1180
+ <div class="chart-frame">
1181
+ <img src="/assets/baseline_comparison.png" alt="SENTINEL baseline comparison chart">
1182
+ </div>
1183
  </div>
1184
  </div>
1185
  </section>
1186
 
1187
  <section class="events">
1188
  <div class="section-head">
1189
+ <h2>Flight Recorder</h2>
1190
+ <span id="rewardText" class="muted">last reward 0.00</span>
1191
  </div>
1192
  <div class="body">
1193
  <div id="eventList" class="event-list"></div>
1194
  </div>
1195
  </section>
1196
 
1197
+ <section class="flow">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1198
  <div class="section-head">
1199
+ <h2>Code Flow</h2>
1200
+ <span class="muted">reset, step, state</span>
 
 
 
1201
  </div>
1202
  <div class="body">
1203
+ <div class="flow-line">
1204
+ <div class="flow-step" data-num="1">
1205
+ <strong>Reset</strong>
1206
+ <span>environment.py samples scenario, resets graph, ledger, and specialist profile.</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1207
  </div>
1208
+ <div class="flow-step" data-num="2">
1209
+ <strong>Observe</strong>
1210
+ <span>agent sees subtask, stakes, trust snapshot, step budget, and public slots.</span>
 
 
 
 
 
 
 
 
1211
  </div>
1212
+ <div class="flow-step" data-num="3">
1213
+ <strong>Act</strong>
1214
+ <span>delegate, verify, self solve, or skip through the OpenEnv step API.</span>
 
 
 
 
 
 
 
 
 
 
 
1215
  </div>
1216
+ <div class="flow-step" data-num="4">
1217
+ <strong>Specialist</strong>
1218
+ <span>scripted FSM returns outcome, confidence, cost, and adversarial flag.</span>
1219
  </div>
1220
+ <div class="flow-step" data-num="5">
1221
+ <strong>Ledger</strong>
1222
+ <span>trust_ledger.py updates public slot reliability from observed behavior.</span>
1223
  </div>
1224
+ <div class="flow-step" data-num="6">
1225
+ <strong>Reward</strong>
1226
+ <span>graders.py scores completion, detection, calibration, and efficiency.</span>
1227
  </div>
1228
  </div>
1229
  </div>
 
1231
 
1232
  <section class="themes">
1233
  <div class="section-head">
1234
+ <h2>Hackathon Fit</h2>
1235
+ <span class="muted">judge story map</span>
1236
  </div>
1237
  <div class="body">
1238
  <div class="theme-grid">
1239
  <div class="theme-card blue">
1240
  <strong>Theme 1</strong>
1241
+ <span>Orchestrator manages five partially observable actors under adversarial pressure.</span>
1242
  </div>
1243
  <div class="theme-card green">
1244
  <strong>Theme 2</strong>
1245
+ <span>Long-horizon task graph with budget pressure, retries, and delayed terminal reward.</span>
1246
  </div>
1247
  <div class="theme-card amber">
1248
  <strong>Theme 4</strong>
1249
+ <span>Profile shuffle creates an adaptive curriculum and blocks identity memorization.</span>
1250
  </div>
1251
  <div class="theme-card magenta">
1252
+ <strong>Wild Card</strong>
1253
+ <span>Turns blind agent-to-agent trust into a trainable safety and oversight skill.</span>
1254
  </div>
1255
  </div>
1256
  </div>
 
1273
  taskSelect: document.getElementById("taskSelect"),
1274
  seedInput: document.getElementById("seedInput"),
1275
  resetBtn: document.getElementById("resetBtn"),
1276
+ resetPanelBtn: document.getElementById("resetPanelBtn"),
1277
  swapBtn: document.getElementById("swapBtn"),
1278
+ swapPanelBtn: document.getElementById("swapPanelBtn"),
1279
  autoBtn: document.getElementById("autoBtn"),
1280
  specialistSelect: document.getElementById("specialistSelect"),
1281
  recommendChip: document.getElementById("recommendChip"),
1282
+ recommendText: document.getElementById("recommendText"),
1283
+ applyRecommendBtn: document.getElementById("applyRecommendBtn"),
1284
  delegateBtn: document.getElementById("delegateBtn"),
1285
  verifyBtn: document.getElementById("verifyBtn"),
1286
  selfBtn: document.getElementById("selfBtn"),
 
1292
  completeValue: document.getElementById("completeValue"),
1293
  stakesValue: document.getElementById("stakesValue"),
1294
  progressFill: document.getElementById("progressFill"),
1295
+ riskFill: document.getElementById("riskFill"),
1296
  subtaskText: document.getElementById("subtaskText"),
1297
  trustList: document.getElementById("trustList"),
1298
+ trustMean: document.getElementById("trustMean"),
1299
  detectChip: document.getElementById("detectChip"),
1300
  poisonChip: document.getElementById("poisonChip"),
1301
  sessionText: document.getElementById("sessionText"),
1302
+ eventList: document.getElementById("eventList"),
1303
+ leadMove: document.getElementById("leadMove"),
1304
+ stageMove: document.getElementById("stageMove"),
1305
+ stageSignals: document.getElementById("stageSignals"),
1306
+ rewardText: document.getElementById("rewardText")
1307
  };
1308
 
1309
  function trustColor(value) {
1310
+ if (value >= 0.72) return "#27e0a1";
1311
+ if (value >= 0.48) return "#73a7ff";
1312
+ if (value >= 0.30) return "#f5ba41";
1313
+ return "#ff5f45";
1314
+ }
1315
+
1316
+ function trustStatus(value) {
1317
+ if (value >= 0.72) return "trusted";
1318
+ if (value >= 0.48) return "watch";
1319
+ if (value >= 0.30) return "risk";
1320
+ return "quarantine";
1321
  }
1322
 
1323
  function bestSpecialist() {
 
1328
  .sort((a, b) => (obs.trust_snapshot[b] ?? 0.5) - (obs.trust_snapshot[a] ?? 0.5))[0];
1329
  }
1330
 
1331
+ function recommendedMove() {
1332
+ const obs = state.observation;
1333
+ if (!obs) return {type: "delegate", specialist: "S0", trust: 0.5};
1334
+ const best = bestSpecialist();
1335
+ const trust = obs.trust_snapshot[best] ?? 0.5;
1336
+ const highStakes = obs.stakes_level >= 0.70;
1337
+ if (highStakes && trust < 0.65) return {type: "verify", specialist: best, trust};
1338
+ return {type: "delegate", specialist: best, trust};
1339
+ }
1340
+
1341
  function renderTrust() {
1342
  const trust = state.observation?.trust_snapshot || Object.fromEntries(ids.map(id => [id, 0.5]));
1343
+ const values = ids.map(id => Number(trust[id] ?? 0.5));
1344
+ const mean = values.reduce((sum, value) => sum + value, 0) / values.length;
1345
+ el.trustMean.textContent = `mean ${mean.toFixed(2)}`;
1346
+
1347
  el.trustList.innerHTML = ids.map(id => {
1348
  const value = Number(trust[id] ?? 0.5);
1349
  const pct = Math.round(value * 100);
1350
+ const color = trustColor(value);
1351
  return `
1352
  <div class="specialist">
1353
  <div class="sid">${id}</div>
1354
+ <div class="bar"><div class="fill" style="width:${pct}%;background:${color}"></div></div>
1355
  <div class="score">${value.toFixed(2)}</div>
1356
  </div>
1357
  `;
1358
  }).join("");
1359
+
1360
+ ids.forEach(id => {
1361
+ const node = document.getElementById(`node-${id}`);
1362
+ const value = Number(trust[id] ?? 0.5);
1363
+ const status = trustStatus(value);
1364
+ const active = id === bestSpecialist();
1365
+ node.style.setProperty("--trust", value);
1366
+ node.style.setProperty("--node-color", trustColor(value));
1367
+ node.className = `node ${active ? "active" : ""} ${status === "risk" ? "watch" : ""} ${status === "quarantine" ? "quarantine" : ""}`;
1368
+ node.querySelector(".node-id").innerHTML = `<span>${id}</span><span>${value.toFixed(2)}</span>`;
1369
+ node.querySelector(".node-meta").innerHTML = `<span>public slot</span><span>${status}</span>`;
1370
+ });
1371
  }
1372
 
1373
  function renderSpecialists() {
 
1382
  el.eventList.innerHTML = `<div class="muted">No events yet.</div>`;
1383
  return;
1384
  }
1385
+ el.eventList.innerHTML = state.events.slice(-18).reverse().map(item => `
1386
  <div class="event">
1387
  <strong>#${item.step}</strong>
1388
  <div>${item.action}<br><span class="muted">${item.summary}</span></div>
 
1391
  `).join("");
1392
  }
1393
 
1394
+ function renderRecommendation() {
1395
+ const move = recommendedMove();
1396
+ const obs = state.observation;
1397
+ const label = `${move.type}:${move.specialist}`;
1398
+ const highStakes = Number(obs?.stakes_level ?? 0) >= 0.70;
1399
+ el.recommendChip.textContent = label;
1400
+ el.recommendChip.className = `chip ${move.type === "verify" ? "warn" : "live"}`;
1401
+ el.stageMove.textContent = label;
1402
+ el.leadMove.textContent = obs
1403
+ ? `${move.type.toUpperCase()} ${move.specialist} at trust ${move.trust.toFixed(2)}`
1404
+ : "Reset starts a fresh trust game.";
1405
+ el.recommendText.textContent = highStakes
1406
+ ? `High-stakes gate active. ${move.type === "verify" ? "Verify before accepting output." : "Best specialist is trusted enough to delegate."}`
1407
+ : `Route to ${move.specialist}; keep budget for later high-stakes checks.`;
1408
+ }
1409
+
1410
+ function setDisabled(disabled) {
1411
+ el.delegateBtn.disabled = disabled;
1412
+ el.verifyBtn.disabled = disabled;
1413
+ el.selfBtn.disabled = disabled;
1414
+ el.skipBtn.disabled = disabled;
1415
+ el.applyRecommendBtn.disabled = disabled;
1416
+ }
1417
+
1418
  function render(result) {
1419
  if (result) {
1420
  state.observation = result.observation;
 
1425
  renderTrust();
1426
  renderSpecialists();
1427
  renderEvents();
1428
+ renderRecommendation();
1429
+ setDisabled(true);
1430
  return;
1431
  }
1432
 
 
1434
  const completed = obs.subtasks_total - obs.subtasks_remaining;
1435
  const progress = obs.subtasks_total ? (completed / obs.subtasks_total) * 100 : 0;
1436
  const status = state.done ? "DONE" : obs.episode_status.toUpperCase();
1437
+ const detections = info.adversarial_detections ?? 0;
1438
+ const poisonings = info.adversarial_poisonings ?? 0;
1439
+ const lastReward = Number(result?.reward?.value ?? obs.last_reward ?? 0);
1440
 
1441
  el.statusChip.textContent = status;
1442
  el.statusChip.className = `chip ${state.done ? "live" : "live"}`;
 
1446
  el.completeValue.textContent = `${completed}/${obs.subtasks_total}`;
1447
  el.stakesValue.textContent = Number(obs.stakes_level).toFixed(2);
1448
  el.progressFill.style.width = `${Math.max(0, Math.min(100, progress))}%`;
1449
+ el.riskFill.style.width = `${Math.round(Number(obs.stakes_level || 0) * 100)}%`;
1450
+ el.subtaskText.textContent = state.done ? "Episode complete. Swap profiles for the generalization demo." : obs.current_subtask;
1451
+ el.sessionText.textContent = state.sessionId ? `session ${state.sessionId.slice(0, 8)}` : "No session";
1452
+ el.detectChip.textContent = `${detections} detected`;
1453
+ el.poisonChip.textContent = `${poisonings} poison`;
1454
+ el.poisonChip.className = `chip ${poisonings > 0 ? "fail" : "warn"}`;
1455
+ el.stageSignals.textContent = `${detections} detected / ${poisonings} poison`;
1456
+ el.rewardText.textContent = `last reward ${lastReward.toFixed(2)}`;
1457
 
1458
  renderTrust();
1459
  renderSpecialists();
 
1462
  setDisabled(state.done || state.running);
1463
  }
1464
 
1465
+ function addEvent(step, action, summary, reward) {
1466
+ state.events.push({step, action, summary, reward});
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1467
  }
1468
 
1469
+ function actionPayload(type, specialist) {
1470
+ const obs = state.observation;
1471
+ return {
1472
+ session_id: state.sessionId,
1473
+ task_type: obs.task_type,
1474
+ action_type: type,
1475
+ specialist_id: specialist,
1476
+ subtask_response: type === "solve_independently" ? "SELF_SOLVED" : null,
1477
+ reasoning: `ui-${type}${specialist ? "-" + specialist : ""}`
1478
+ };
1479
  }
1480
 
1481
  async function resetEpisode() {
1482
  state.running = true;
1483
  el.resetBtn.disabled = true;
1484
+ el.resetPanelBtn.disabled = true;
1485
  try {
1486
  const seed = Number(el.seedInput.value || 0);
1487
  const response = await fetch("/reset", {
 
1495
  state.sessionId = result.info.session_id;
1496
  state.events = [];
1497
  state.done = false;
1498
+ addEvent(0, "reset", "Episode initialized with shuffled hidden profiles.", "0.00");
1499
  render(result);
1500
  } catch (error) {
1501
  addEvent(0, "error", error.message, "0.00");
 
1503
  } finally {
1504
  state.running = false;
1505
  el.resetBtn.disabled = false;
1506
+ el.resetPanelBtn.disabled = false;
1507
  setDisabled(state.done);
1508
  }
1509
  }
1510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1511
  async function stepEpisode(type, specialist = null) {
1512
  if (!state.sessionId || state.done || state.running) return;
1513
  state.running = true;
 
1538
  async function autoRun() {
1539
  if (!state.observation || state.done) await resetEpisode();
1540
  let guard = 0;
1541
+ while (!state.done && guard < 70) {
1542
  const move = recommendedMove();
1543
  await stepEpisode(move.type, move.specialist);
1544
  guard += 1;
1545
+ await new Promise(resolve => setTimeout(resolve, 150));
1546
  }
1547
  }
1548
 
1549
+ async function applyRecommendation() {
1550
+ const move = recommendedMove();
1551
+ await stepEpisode(move.type, move.specialist);
1552
+ }
1553
+
1554
  async function swapProfiles() {
1555
  const nextSeed = Number(el.seedInput.value || 0) + 1;
1556
  el.seedInput.value = String(nextSeed);
 
1558
  }
1559
 
1560
  el.resetBtn.addEventListener("click", resetEpisode);
1561
+ el.resetPanelBtn.addEventListener("click", resetEpisode);
1562
  el.swapBtn.addEventListener("click", swapProfiles);
1563
+ el.swapPanelBtn.addEventListener("click", swapProfiles);
1564
  el.delegateBtn.addEventListener("click", () => stepEpisode("delegate"));
1565
  el.verifyBtn.addEventListener("click", () => stepEpisode("verify"));
1566
  el.selfBtn.addEventListener("click", () => stepEpisode("solve_independently"));
1567
  el.skipBtn.addEventListener("click", () => stepEpisode("skip"));
1568
  el.autoBtn.addEventListener("click", autoRun);
1569
+ el.applyRecommendBtn.addEventListener("click", applyRecommendation);
1570
 
1571
  render();
1572
  resetEpisode();