Spaces:
Sleeping
Sleeping
PERMANENCE training: 4-stage SFT -> gate -> GRPO -> eval pipeline
Browse files- server/app.py +93 -6
server/app.py
CHANGED
|
@@ -214,6 +214,31 @@ section{margin-bottom:64px}
|
|
| 214 |
.demo-btn .sub{color:var(--fg-2);font-size:11px;font-family:'JetBrains Mono',ui-monospace,monospace}
|
| 215 |
.result-pane{background:#030612;border:1px solid var(--border);border-radius:12px;padding:18px;margin-top:18px;min-height:100px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12px;color:#b4fc7c;white-space:pre-wrap;overflow-x:auto;line-height:1.55}
|
| 216 |
.result-pane.idle{color:var(--fg-2);font-style:italic;font-family:inherit;font-size:13px}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
.chip-row{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:24px}
|
| 218 |
.chip{padding:5px 12px;border-radius:999px;font-size:12px;font-weight:500;background:rgba(34,211,238,0.08);color:#7dd3fc;border:1px solid rgba(34,211,238,0.2)}
|
| 219 |
.code-block{background:#020510;border:1px solid var(--border);border-radius:12px;padding:20px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12.5px;line-height:1.7;color:#a5b4fc;overflow-x:auto}
|
|
@@ -226,6 +251,7 @@ section{margin-bottom:64px}
|
|
| 226 |
footer{text-align:center;color:var(--fg-2);font-size:13px;padding:40px 0 20px;border-top:1px solid var(--border)}
|
| 227 |
footer a{color:var(--fg-1);text-decoration:none}footer a:hover{color:var(--fg-0)}
|
| 228 |
@media (max-width:680px){.container{padding:24px 20px 60px}.demo-grid{grid-template-columns:1fr}.hero h1{font-size:2rem}}
|
|
|
|
| 229 |
</style>
|
| 230 |
</head><body>
|
| 231 |
<div class="container">
|
|
@@ -313,7 +339,10 @@ footer a{color:var(--fg-1);text-decoration:none}footer a:hover{color:var(--fg-0)
|
|
| 313 |
</div>
|
| 314 |
<textarea id="sc" style="width:100%;min-height:120px;font-family:'JetBrains Mono',ui-monospace,monospace;background:#030612;color:#c4b5fd;border:1px solid var(--border);border-radius:12px;padding:16px;font-size:13px;line-height:1.5;resize:vertical" placeholder="e.g. The release-notes commit has a typo. I want to git commit --amend and push..."></textarea>
|
| 315 |
<div style="margin-top:12px"><button class="btn btn-primary" onclick="runScenario()">▶ Run scenario</button></div>
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
| 317 |
</section>
|
| 318 |
<section>
|
| 319 |
<div class="section-head">
|
|
@@ -365,14 +394,72 @@ async function traj(task, prepared){
|
|
| 365 |
async function runScenario(){
|
| 366 |
const text = document.getElementById('sc').value.trim();
|
| 367 |
if(!text) return;
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
try {
|
| 372 |
const r = await fetch('/api/scenario',{method:'POST',headers:{'content-type':'application/json'},body:JSON.stringify({scenario:text})});
|
| 373 |
const j = await r.json();
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
}
|
| 377 |
</script>
|
| 378 |
</body></html>
|
|
|
|
| 214 |
.demo-btn .sub{color:var(--fg-2);font-size:11px;font-family:'JetBrains Mono',ui-monospace,monospace}
|
| 215 |
.result-pane{background:#030612;border:1px solid var(--border);border-radius:12px;padding:18px;margin-top:18px;min-height:100px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12px;color:#b4fc7c;white-space:pre-wrap;overflow-x:auto;line-height:1.55}
|
| 216 |
.result-pane.idle{color:var(--fg-2);font-style:italic;font-family:inherit;font-size:13px}
|
| 217 |
+
.sbx-card{background:var(--card-bg);border:1px solid var(--border);border-radius:14px;padding:22px;margin-top:18px}
|
| 218 |
+
.sbx-hero{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:14px;padding-bottom:18px;border-bottom:1px solid var(--border);margin-bottom:16px}
|
| 219 |
+
.sbx-hero-item{display:flex;flex-direction:column;gap:4px}
|
| 220 |
+
.sbx-hero-label,.sbx-section-head{color:var(--fg-2);font-size:11px;font-weight:700;letter-spacing:0.14em;text-transform:uppercase}
|
| 221 |
+
.sbx-hero-value{font-size:16px;font-weight:600;color:var(--fg-0);line-height:1.25}
|
| 222 |
+
.sbx-hero-value.mono,.sbx-conf,.sbx-state-key,.sbx-state-value,.sbx-locked-row{font-family:'JetBrains Mono',ui-monospace,monospace}
|
| 223 |
+
.sbx-conf{font-size:12px;color:var(--fg-2)}
|
| 224 |
+
.sbx-reward.positive{background:linear-gradient(180deg,#a3e635,#65a30d);-webkit-background-clip:text;-webkit-text-fill-color:transparent}
|
| 225 |
+
.sbx-reward.zero{color:var(--fg-0)}
|
| 226 |
+
.sbx-reward.negative{color:var(--r5)}
|
| 227 |
+
.sbx-section{margin-top:16px}
|
| 228 |
+
.sbx-reasoning{font-size:14px;color:var(--fg-1);font-style:italic;line-height:1.55;padding:12px;background:rgba(5,7,18,0.45);border-radius:8px;border-left:3px solid var(--accent-2)}
|
| 229 |
+
.sbx-state-grid{display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:8px}
|
| 230 |
+
.sbx-state-row{display:flex;justify-content:space-between;align-items:center;padding:8px 12px;background:rgba(5,7,18,0.45);border-radius:8px;gap:10px}
|
| 231 |
+
.sbx-state-key{font-size:12px;color:var(--fg-1)}
|
| 232 |
+
.sbx-state-value{font-size:11px;font-weight:700;letter-spacing:0.08em;padding:2px 8px;border-radius:5px}
|
| 233 |
+
.sbx-state-value.ok{color:var(--r1);background:rgba(52,211,153,0.1)}
|
| 234 |
+
.sbx-state-value.bad{color:var(--r5);background:rgba(248,113,113,0.1)}
|
| 235 |
+
.sbx-state-value.neutral{color:var(--fg-0);background:rgba(148,163,184,0.1)}
|
| 236 |
+
.sbx-locked-empty{color:var(--fg-2);font-size:13px;font-style:italic}
|
| 237 |
+
.sbx-locked-list{display:flex;flex-direction:column;gap:6px}
|
| 238 |
+
.sbx-locked-row{padding:10px 12px;background:rgba(248,113,113,0.05);border:1px solid rgba(248,113,113,0.25);border-radius:8px;color:var(--r5);font-size:12px}
|
| 239 |
+
.sbx-toggle{margin-top:14px;background:transparent;border:none;padding:0;color:var(--fg-2);font-size:12px;cursor:pointer;text-decoration:none}
|
| 240 |
+
.sbx-toggle:hover{text-decoration:underline;color:var(--fg-1)}
|
| 241 |
+
.sbx-raw{display:none;background:#030612;border:1px solid var(--border);border-radius:12px;padding:18px;margin-top:12px;min-height:100px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12px;color:#b4fc7c;white-space:pre-wrap;overflow-x:auto;line-height:1.55}
|
| 242 |
.chip-row{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:24px}
|
| 243 |
.chip{padding:5px 12px;border-radius:999px;font-size:12px;font-weight:500;background:rgba(34,211,238,0.08);color:#7dd3fc;border:1px solid rgba(34,211,238,0.2)}
|
| 244 |
.code-block{background:#020510;border:1px solid var(--border);border-radius:12px;padding:20px;font-family:'JetBrains Mono',ui-monospace,monospace;font-size:12.5px;line-height:1.7;color:#a5b4fc;overflow-x:auto}
|
|
|
|
| 251 |
footer{text-align:center;color:var(--fg-2);font-size:13px;padding:40px 0 20px;border-top:1px solid var(--border)}
|
| 252 |
footer a{color:var(--fg-1);text-decoration:none}footer a:hover{color:var(--fg-0)}
|
| 253 |
@media (max-width:680px){.container{padding:24px 20px 60px}.demo-grid{grid-template-columns:1fr}.hero h1{font-size:2rem}}
|
| 254 |
+
@media (max-width:560px){.sbx-state-grid{grid-template-columns:1fr}}
|
| 255 |
</style>
|
| 256 |
</head><body>
|
| 257 |
<div class="container">
|
|
|
|
| 339 |
</div>
|
| 340 |
<textarea id="sc" style="width:100%;min-height:120px;font-family:'JetBrains Mono',ui-monospace,monospace;background:#030612;color:#c4b5fd;border:1px solid var(--border);border-radius:12px;padding:16px;font-size:13px;line-height:1.5;resize:vertical" placeholder="e.g. The release-notes commit has a typo. I want to git commit --amend and push..."></textarea>
|
| 341 |
<div style="margin-top:12px"><button class="btn btn-primary" onclick="runScenario()">▶ Run scenario</button></div>
|
| 342 |
+
<div id="result" class="sbx-card">
|
| 343 |
+
<div class="result-pane idle" style="margin-top:0">results will appear here.</div>
|
| 344 |
+
<pre id="resultRaw" class="sbx-raw"></pre>
|
| 345 |
+
</div>
|
| 346 |
</section>
|
| 347 |
<section>
|
| 348 |
<div class="section-head">
|
|
|
|
| 394 |
async function runScenario(){
|
| 395 |
const text = document.getElementById('sc').value.trim();
|
| 396 |
if(!text) return;
|
| 397 |
+
const pane = document.getElementById('result');
|
| 398 |
+
const raw = document.getElementById('resultRaw');
|
| 399 |
+
pane.innerHTML = '<div class="result-pane idle" style="margin-top:0">▸ running…</div><pre id="resultRaw" class="sbx-raw"></pre>';
|
| 400 |
try {
|
| 401 |
const r = await fetch('/api/scenario',{method:'POST',headers:{'content-type':'application/json'},body:JSON.stringify({scenario:text})});
|
| 402 |
const j = await r.json();
|
| 403 |
+
const canonical = typeof j.canonical_action === 'string' ? j.canonical_action : '';
|
| 404 |
+
const actionMatch = canonical.match(/<action\s+id="([^"]+)"([^\/]*?)\/>/i);
|
| 405 |
+
const revMatch = canonical.match(/<reversibility\s+level="R(\d)"\s*confidence="([^"]+)"/i);
|
| 406 |
+
const thinkingMatch = canonical.match(/<thinking>([\s\S]*?)<\/thinking>/i);
|
| 407 |
+
|
| 408 |
+
const matchedTask = j.matched_task || '—';
|
| 409 |
+
const actionId = actionMatch ? actionMatch[1] : '—';
|
| 410 |
+
const predictedNum = revMatch ? revMatch[1] : '';
|
| 411 |
+
const predictedLevel = predictedNum ? ('R' + predictedNum) : '—';
|
| 412 |
+
const predictedConf = revMatch ? revMatch[2] : '—';
|
| 413 |
+
const thinking = thinkingMatch ? thinkingMatch[1].trim() : 'No reasoning emitted.';
|
| 414 |
+
const reward = (typeof j.reward === 'number') ? j.reward : null;
|
| 415 |
+
const rewardText = (reward === null) ? '—' : ((reward >= 0 ? '+' : '') + reward.toFixed(3));
|
| 416 |
+
const rewardClass = reward === null ? 'zero' : (reward >= 0.5 ? 'positive' : (reward < 0 ? 'negative' : 'zero'));
|
| 417 |
+
const terminated = Boolean(j.terminated);
|
| 418 |
+
|
| 419 |
+
const finalState = j.final_state_summary || {};
|
| 420 |
+
const lockedActions = finalState.locked_actions && typeof finalState.locked_actions === 'object' ? finalState.locked_actions : {};
|
| 421 |
+
const critical = finalState.critical_options && typeof finalState.critical_options === 'object' ? finalState.critical_options : {};
|
| 422 |
+
const step = (finalState.step === 0 || finalState.step) ? String(finalState.step) : '—';
|
| 423 |
+
|
| 424 |
+
const criticalRows = Object.keys(critical).length ? Object.entries(critical).map(([k,v]) => {
|
| 425 |
+
const isBool = typeof v === 'boolean';
|
| 426 |
+
const label = isBool ? (v ? 'AVAILABLE' : 'LOCKED') : String(v);
|
| 427 |
+
const klass = isBool ? (v ? 'ok' : 'bad') : 'neutral';
|
| 428 |
+
return '<div class="sbx-state-row"><span class="sbx-state-key">' + k + '</span><span class="sbx-state-value ' + klass + '">' + label + '</span></div>';
|
| 429 |
+
}).join('') : '<div class="sbx-state-row"><span class="sbx-state-key">none</span><span class="sbx-state-value neutral">—</span></div>';
|
| 430 |
+
|
| 431 |
+
const lockedKeys = Object.keys(lockedActions);
|
| 432 |
+
const lockedHtml = lockedKeys.length
|
| 433 |
+
? '<div class="sbx-locked-list">' + lockedKeys.map((k) => '<div class="sbx-locked-row">' + k + '</div>').join('') + '</div>'
|
| 434 |
+
: '<div class="sbx-locked-empty">none (all downstream options preserved)</div>';
|
| 435 |
+
|
| 436 |
+
pane.innerHTML =
|
| 437 |
+
'<div class="sbx-hero">'
|
| 438 |
+
+ '<div class="sbx-hero-item"><span class="sbx-hero-label">matched task</span><span class="sbx-hero-value mono">' + matchedTask + '</span></div>'
|
| 439 |
+
+ '<div class="sbx-hero-item"><span class="sbx-hero-label">scripted action</span><span class="sbx-hero-value mono">' + actionId + '</span></div>'
|
| 440 |
+
+ '<div class="sbx-hero-item"><span class="sbx-hero-label">predicted R</span><span class="rlevel r' + (predictedNum || '1') + '">' + predictedLevel + '</span><span class="sbx-conf">conf ' + predictedConf + '</span></div>'
|
| 441 |
+
+ '<div class="sbx-hero-item"><span class="sbx-hero-label">reward</span><span class="sbx-hero-value sbx-reward ' + rewardClass + '">' + rewardText + '</span></div>'
|
| 442 |
+
+ '<div class="sbx-hero-item"><span class="sbx-hero-label">outcome</span><span class="sbx-hero-value">' + (terminated ? 'task completed' : 'task ongoing') + '</span><span class="sbx-conf">step ' + step + '</span></div>'
|
| 443 |
+
+ '</div>'
|
| 444 |
+
+ '<div class="sbx-section"><div class="sbx-section-head">reasoning</div><div class="sbx-reasoning">' + thinking + '</div></div>'
|
| 445 |
+
+ '<div class="sbx-section"><div class="sbx-section-head">critical state</div><div class="sbx-state-grid">' + criticalRows + '</div></div>'
|
| 446 |
+
+ '<div class="sbx-section"><div class="sbx-section-head">locked actions</div><div class="sbx-locked">' + lockedHtml + '</div></div>'
|
| 447 |
+
+ '<button class="sbx-toggle" onclick="toggleRaw()">show raw JSON ▾</button>'
|
| 448 |
+
+ '<pre id="resultRaw" class="sbx-raw" style="display:none"></pre>';
|
| 449 |
+
|
| 450 |
+
const rawPane = document.getElementById('resultRaw');
|
| 451 |
+
if(rawPane){ rawPane.textContent = JSON.stringify(j, null, 2); }
|
| 452 |
+
} catch(e){
|
| 453 |
+
pane.innerHTML = '<div class="result-pane idle" style="margin-top:0">error: ' + e.message + '</div><pre id="resultRaw" class="sbx-raw"></pre>';
|
| 454 |
+
}
|
| 455 |
+
}
|
| 456 |
+
function toggleRaw(){
|
| 457 |
+
const raw = document.getElementById('resultRaw');
|
| 458 |
+
const btn = document.querySelector('#result .sbx-toggle');
|
| 459 |
+
if(!raw || !btn) return;
|
| 460 |
+
const showing = raw.style.display !== 'none';
|
| 461 |
+
raw.style.display = showing ? 'none' : 'block';
|
| 462 |
+
btn.textContent = showing ? 'show raw JSON ▾' : 'hide raw JSON ▴';
|
| 463 |
}
|
| 464 |
</script>
|
| 465 |
</body></html>
|