chane35 commited on
Commit
b4476f2
·
verified ·
1 Parent(s): 9a612b1

PERMANENCE training: 4-stage SFT -> gate -> GRPO -> eval pipeline

Browse files
Files changed (1) hide show
  1. server/app.py +93 -6
server/app.py CHANGED
@@ -214,6 +214,31 @@ section{margin-bottom:64px}
214
  .demo-btn .sub{color:var(--fg-2);font-size:11px;font-family:'JetBrains Mono',ui-monospace,monospace}
215
  .result-pane{background:#030612;border:1px solid var(--border);border-radius:12px;padding:18px;margin-top:18px;min-height:100px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12px;color:#b4fc7c;white-space:pre-wrap;overflow-x:auto;line-height:1.55}
216
  .result-pane.idle{color:var(--fg-2);font-style:italic;font-family:inherit;font-size:13px}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  .chip-row{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:24px}
218
  .chip{padding:5px 12px;border-radius:999px;font-size:12px;font-weight:500;background:rgba(34,211,238,0.08);color:#7dd3fc;border:1px solid rgba(34,211,238,0.2)}
219
  .code-block{background:#020510;border:1px solid var(--border);border-radius:12px;padding:20px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12.5px;line-height:1.7;color:#a5b4fc;overflow-x:auto}
@@ -226,6 +251,7 @@ section{margin-bottom:64px}
226
  footer{text-align:center;color:var(--fg-2);font-size:13px;padding:40px 0 20px;border-top:1px solid var(--border)}
227
  footer a{color:var(--fg-1);text-decoration:none}footer a:hover{color:var(--fg-0)}
228
  @media (max-width:680px){.container{padding:24px 20px 60px}.demo-grid{grid-template-columns:1fr}.hero h1{font-size:2rem}}
 
229
  </style>
230
  </head><body>
231
  <div class="container">
@@ -313,7 +339,10 @@ footer a{color:var(--fg-1);text-decoration:none}footer a:hover{color:var(--fg-0)
313
  </div>
314
  <textarea id="sc" style="width:100%;min-height:120px;font-family:'JetBrains Mono',ui-monospace,monospace;background:#030612;color:#c4b5fd;border:1px solid var(--border);border-radius:12px;padding:16px;font-size:13px;line-height:1.5;resize:vertical" placeholder="e.g. The release-notes commit has a typo. I want to git commit --amend and push..."></textarea>
315
  <div style="margin-top:12px"><button class="btn btn-primary" onclick="runScenario()">▶ Run scenario</button></div>
316
- <div id="result" class="result-pane idle">results will appear here.</div>
 
 
 
317
  </section>
318
  <section>
319
  <div class="section-head">
@@ -365,14 +394,72 @@ async function traj(task, prepared){
365
  async function runScenario(){
366
  const text = document.getElementById('sc').value.trim();
367
  if(!text) return;
368
- const pane = document.getElementById('result');
369
- pane.classList.remove('idle');
370
- pane.textContent = '▸ running…';
371
  try {
372
  const r = await fetch('/api/scenario',{method:'POST',headers:{'content-type':'application/json'},body:JSON.stringify({scenario:text})});
373
  const j = await r.json();
374
- pane.textContent = JSON.stringify(j, null, 2);
375
- } catch(e){ pane.textContent = 'error: ' + e.message; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  </script>
378
  </body></html>
 
214
  .demo-btn .sub{color:var(--fg-2);font-size:11px;font-family:'JetBrains Mono',ui-monospace,monospace}
215
  .result-pane{background:#030612;border:1px solid var(--border);border-radius:12px;padding:18px;margin-top:18px;min-height:100px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12px;color:#b4fc7c;white-space:pre-wrap;overflow-x:auto;line-height:1.55}
216
  .result-pane.idle{color:var(--fg-2);font-style:italic;font-family:inherit;font-size:13px}
217
+ .sbx-card{background:var(--card-bg);border:1px solid var(--border);border-radius:14px;padding:22px;margin-top:18px}
218
+ .sbx-hero{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:14px;padding-bottom:18px;border-bottom:1px solid var(--border);margin-bottom:16px}
219
+ .sbx-hero-item{display:flex;flex-direction:column;gap:4px}
220
+ .sbx-hero-label,.sbx-section-head{color:var(--fg-2);font-size:11px;font-weight:700;letter-spacing:0.14em;text-transform:uppercase}
221
+ .sbx-hero-value{font-size:16px;font-weight:600;color:var(--fg-0);line-height:1.25}
222
+ .sbx-hero-value.mono,.sbx-conf,.sbx-state-key,.sbx-state-value,.sbx-locked-row{font-family:'JetBrains Mono',ui-monospace,monospace}
223
+ .sbx-conf{font-size:12px;color:var(--fg-2)}
224
+ .sbx-reward.positive{background:linear-gradient(180deg,#a3e635,#65a30d);-webkit-background-clip:text;-webkit-text-fill-color:transparent}
225
+ .sbx-reward.zero{color:var(--fg-0)}
226
+ .sbx-reward.negative{color:var(--r5)}
227
+ .sbx-section{margin-top:16px}
228
+ .sbx-reasoning{font-size:14px;color:var(--fg-1);font-style:italic;line-height:1.55;padding:12px;background:rgba(5,7,18,0.45);border-radius:8px;border-left:3px solid var(--accent-2)}
229
+ .sbx-state-grid{display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:8px}
230
+ .sbx-state-row{display:flex;justify-content:space-between;align-items:center;padding:8px 12px;background:rgba(5,7,18,0.45);border-radius:8px;gap:10px}
231
+ .sbx-state-key{font-size:12px;color:var(--fg-1)}
232
+ .sbx-state-value{font-size:11px;font-weight:700;letter-spacing:0.08em;padding:2px 8px;border-radius:5px}
233
+ .sbx-state-value.ok{color:var(--r1);background:rgba(52,211,153,0.1)}
234
+ .sbx-state-value.bad{color:var(--r5);background:rgba(248,113,113,0.1)}
235
+ .sbx-state-value.neutral{color:var(--fg-0);background:rgba(148,163,184,0.1)}
236
+ .sbx-locked-empty{color:var(--fg-2);font-size:13px;font-style:italic}
237
+ .sbx-locked-list{display:flex;flex-direction:column;gap:6px}
238
+ .sbx-locked-row{padding:10px 12px;background:rgba(248,113,113,0.05);border:1px solid rgba(248,113,113,0.25);border-radius:8px;color:var(--r5);font-size:12px}
239
+ .sbx-toggle{margin-top:14px;background:transparent;border:none;padding:0;color:var(--fg-2);font-size:12px;cursor:pointer;text-decoration:none}
240
+ .sbx-toggle:hover{text-decoration:underline;color:var(--fg-1)}
241
+ .sbx-raw{display:none;background:#030612;border:1px solid var(--border);border-radius:12px;padding:18px;margin-top:12px;min-height:100px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12px;color:#b4fc7c;white-space:pre-wrap;overflow-x:auto;line-height:1.55}
242
  .chip-row{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:24px}
243
  .chip{padding:5px 12px;border-radius:999px;font-size:12px;font-weight:500;background:rgba(34,211,238,0.08);color:#7dd3fc;border:1px solid rgba(34,211,238,0.2)}
244
  .code-block{background:#020510;border:1px solid var(--border);border-radius:12px;padding:20px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12.5px;line-height:1.7;color:#a5b4fc;overflow-x:auto}
 
251
  footer{text-align:center;color:var(--fg-2);font-size:13px;padding:40px 0 20px;border-top:1px solid var(--border)}
252
  footer a{color:var(--fg-1);text-decoration:none}footer a:hover{color:var(--fg-0)}
253
  @media (max-width:680px){.container{padding:24px 20px 60px}.demo-grid{grid-template-columns:1fr}.hero h1{font-size:2rem}}
254
+ @media (max-width:560px){.sbx-state-grid{grid-template-columns:1fr}}
255
  </style>
256
  </head><body>
257
  <div class="container">
 
339
  </div>
340
  <textarea id="sc" style="width:100%;min-height:120px;font-family:'JetBrains Mono',ui-monospace,monospace;background:#030612;color:#c4b5fd;border:1px solid var(--border);border-radius:12px;padding:16px;font-size:13px;line-height:1.5;resize:vertical" placeholder="e.g. The release-notes commit has a typo. I want to git commit --amend and push..."></textarea>
341
  <div style="margin-top:12px"><button class="btn btn-primary" onclick="runScenario()">▶ Run scenario</button></div>
342
+ <div id="result" class="sbx-card">
343
+ <div class="result-pane idle" style="margin-top:0">results will appear here.</div>
344
+ <pre id="resultRaw" class="sbx-raw"></pre>
345
+ </div>
346
  </section>
347
  <section>
348
  <div class="section-head">
 
394
  async function runScenario(){
395
  const text = document.getElementById('sc').value.trim();
396
  if(!text) return;
397
+ const pane = document.getElementById('result');
398
+ const raw = document.getElementById('resultRaw');
399
+ pane.innerHTML = '<div class="result-pane idle" style="margin-top:0">▸ running…</div><pre id="resultRaw" class="sbx-raw"></pre>';
400
  try {
401
  const r = await fetch('/api/scenario',{method:'POST',headers:{'content-type':'application/json'},body:JSON.stringify({scenario:text})});
402
  const j = await r.json();
403
+ const canonical = typeof j.canonical_action === 'string' ? j.canonical_action : '';
404
+ const actionMatch = canonical.match(/<action\s+id="([^"]+)"([^\/]*?)\/>/i);
405
+ const revMatch = canonical.match(/<reversibility\s+level="R(\d)"\s*confidence="([^"]+)"/i);
406
+ const thinkingMatch = canonical.match(/<thinking>([\s\S]*?)<\/thinking>/i);
407
+
408
+ const matchedTask = j.matched_task || '—';
409
+ const actionId = actionMatch ? actionMatch[1] : '—';
410
+ const predictedNum = revMatch ? revMatch[1] : '';
411
+ const predictedLevel = predictedNum ? ('R' + predictedNum) : '—';
412
+ const predictedConf = revMatch ? revMatch[2] : '—';
413
+ const thinking = thinkingMatch ? thinkingMatch[1].trim() : 'No reasoning emitted.';
414
+ const reward = (typeof j.reward === 'number') ? j.reward : null;
415
+ const rewardText = (reward === null) ? '—' : ((reward >= 0 ? '+' : '') + reward.toFixed(3));
416
+ const rewardClass = reward === null ? 'zero' : (reward >= 0.5 ? 'positive' : (reward < 0 ? 'negative' : 'zero'));
417
+ const terminated = Boolean(j.terminated);
418
+
419
+ const finalState = j.final_state_summary || {};
420
+ const lockedActions = finalState.locked_actions && typeof finalState.locked_actions === 'object' ? finalState.locked_actions : {};
421
+ const critical = finalState.critical_options && typeof finalState.critical_options === 'object' ? finalState.critical_options : {};
422
+ const step = (finalState.step === 0 || finalState.step) ? String(finalState.step) : '—';
423
+
424
+ const criticalRows = Object.keys(critical).length ? Object.entries(critical).map(([k,v]) => {
425
+ const isBool = typeof v === 'boolean';
426
+ const label = isBool ? (v ? 'AVAILABLE' : 'LOCKED') : String(v);
427
+ const klass = isBool ? (v ? 'ok' : 'bad') : 'neutral';
428
+ return '<div class="sbx-state-row"><span class="sbx-state-key">' + k + '</span><span class="sbx-state-value ' + klass + '">' + label + '</span></div>';
429
+ }).join('') : '<div class="sbx-state-row"><span class="sbx-state-key">none</span><span class="sbx-state-value neutral">—</span></div>';
430
+
431
+ const lockedKeys = Object.keys(lockedActions);
432
+ const lockedHtml = lockedKeys.length
433
+ ? '<div class="sbx-locked-list">' + lockedKeys.map((k) => '<div class="sbx-locked-row">' + k + '</div>').join('') + '</div>'
434
+ : '<div class="sbx-locked-empty">none (all downstream options preserved)</div>';
435
+
436
+ pane.innerHTML =
437
+ '<div class="sbx-hero">'
438
+ + '<div class="sbx-hero-item"><span class="sbx-hero-label">matched task</span><span class="sbx-hero-value mono">' + matchedTask + '</span></div>'
439
+ + '<div class="sbx-hero-item"><span class="sbx-hero-label">scripted action</span><span class="sbx-hero-value mono">' + actionId + '</span></div>'
440
+ + '<div class="sbx-hero-item"><span class="sbx-hero-label">predicted R</span><span class="rlevel r' + (predictedNum || '1') + '">' + predictedLevel + '</span><span class="sbx-conf">conf ' + predictedConf + '</span></div>'
441
+ + '<div class="sbx-hero-item"><span class="sbx-hero-label">reward</span><span class="sbx-hero-value sbx-reward ' + rewardClass + '">' + rewardText + '</span></div>'
442
+ + '<div class="sbx-hero-item"><span class="sbx-hero-label">outcome</span><span class="sbx-hero-value">' + (terminated ? 'task completed' : 'task ongoing') + '</span><span class="sbx-conf">step ' + step + '</span></div>'
443
+ + '</div>'
444
+ + '<div class="sbx-section"><div class="sbx-section-head">reasoning</div><div class="sbx-reasoning">' + thinking + '</div></div>'
445
+ + '<div class="sbx-section"><div class="sbx-section-head">critical state</div><div class="sbx-state-grid">' + criticalRows + '</div></div>'
446
+ + '<div class="sbx-section"><div class="sbx-section-head">locked actions</div><div class="sbx-locked">' + lockedHtml + '</div></div>'
447
+ + '<button class="sbx-toggle" onclick="toggleRaw()">show raw JSON ▾</button>'
448
+ + '<pre id="resultRaw" class="sbx-raw" style="display:none"></pre>';
449
+
450
+ const rawPane = document.getElementById('resultRaw');
451
+ if(rawPane){ rawPane.textContent = JSON.stringify(j, null, 2); }
452
+ } catch(e){
453
+ pane.innerHTML = '<div class="result-pane idle" style="margin-top:0">error: ' + e.message + '</div><pre id="resultRaw" class="sbx-raw"></pre>';
454
+ }
455
+ }
456
+ function toggleRaw(){
457
+ const raw = document.getElementById('resultRaw');
458
+ const btn = document.querySelector('#result .sbx-toggle');
459
+ if(!raw || !btn) return;
460
+ const showing = raw.style.display !== 'none';
461
+ raw.style.display = showing ? 'none' : 'block';
462
+ btn.textContent = showing ? 'show raw JSON ▾' : 'hide raw JSON ▴';
463
  }
464
  </script>
465
  </body></html>