discovery-env / dashboard.py
echoboi
.
bfc20f2
#!/usr/bin/env python3
"""Unified live dashboard β€” monitors all 3 discovery agent projects.
Watches:
base_client_v1/run_logs/ β€” Claude baseline agents
alpha_evolve_gpt/results/ β€” GPT standalone evolution runs
alpha_evolve/results/ β€” Docker-based evolutionary Claude agents
Usage:
python dashboard.py
python dashboard.py --port 8789
"""
import json
import os
import subprocess
import time
from pathlib import Path
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import urlparse, parse_qs
import webbrowser
PORT = 8789
ROOT = Path(__file__).parent
# LOGS_BASE_DIR env var overrides all log paths (used on HF Space deployment).
# When set, LOG_DIRS keys map to <LOGS_BASE_DIR>/<key>/
_LOGS_BASE = os.environ.get("LOGS_BASE_DIR", "")
if _LOGS_BASE:
_B = Path(_LOGS_BASE)
LOG_DIRS = {
"base_client_v1": _B / "base_client_v1",
"base_client_v2": _B / "base_client_v2",
"leakage_v1": _B / "leakage_v1",
"leakage_v2": _B / "leakage_v2",
"alpha_evolve_gpt": _B / "alpha_evolve_gpt",
"alpha_evolve": _B / "alpha_evolve",
}
else:
LOG_DIRS = {
"base_client_v1": ROOT / "base_client_v1" / "run_logs",
"base_client_v2": ROOT / "base_client_v2" / "run_logs",
# Logs produced before hint-leakage was fixed β€” shown with LEAKAGE warning
"leakage_v1": ROOT / "logs_potential_leakage_in_prompt_hints" / "base_client_v1",
"leakage_v2": ROOT / "logs_potential_leakage_in_prompt_hints" / "base_client_v2",
"alpha_evolve_gpt": ROOT / "alpha_evolve_gpt" / "results",
"alpha_evolve": ROOT / "alpha_evolve" / "results",
}
for d in LOG_DIRS.values():
d.mkdir(parents=True, exist_ok=True)
# ──────────────────────────────────────────────────────────────────────────────
# HTML / JS frontend
# ──────────────────────────────────────────────────────────────────────────────
DASHBOARD_HTML = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Discovery Agent Monitor</title>
<style>
* { margin:0; padding:0; box-sizing:border-box; }
body { background:#0a0a0f; color:#e0e0e0; font-family:'Consolas','Monaco',monospace; padding:16px; }
h1 { color:#7ecfff; font-size:20px; margin-bottom:12px; border-bottom:1px solid #222; padding-bottom:8px; }
/* ── Project tabs ── */
.proj-bar { display:flex; gap:8px; margin-bottom:12px; }
.proj-btn {
padding:8px 22px; border-radius:6px; border:2px solid transparent;
font-family:inherit; font-size:13px; cursor:pointer; background:#12121a; color:#888;
transition:all .15s;
}
.proj-btn:hover { color:#ccc; }
.proj-btn.active { font-weight:bold; }
.proj-btn[data-project="base_client"] { border-color:#4a9eff; }
.proj-btn[data-project="alpha_evolve"] { border-color:#6fdb6f; }
.proj-btn[data-project="base_client"].active { background:#0d1a2e; color:#4a9eff; }
.proj-btn[data-project="alpha_evolve"].active { background:#0d1f0d; color:#6fdb6f; }
/* LEAKAGE warning badge β€” logs from before hint-removal fix */
.mc-leakage { font-size:9px; padding:2px 6px; border-radius:3px; font-weight:bold;
display:inline-block; margin-top:3px; letter-spacing:.8px;
background:#2e0a0a; color:#ff5555; border:1px solid #ff555566; }
.model-card.leakage { border-color:#ff555533; }
.model-card.leakage:hover { border-color:#ff5555; }
.proj-count { margin-left:6px; font-size:11px; opacity:.7; }
/* ── Cloud/local filter bar ── */
.filter-bar { display:flex; gap:6px; margin-bottom:8px; align-items:center; }
.filter-lbl { color:#555; font-size:10px; text-transform:uppercase; letter-spacing:1px; margin-right:2px; }
.filter-btn {
padding:3px 14px; border-radius:12px; border:1px solid #2a2a3a;
font-family:inherit; font-size:11px; cursor:pointer; background:#0d0d15; color:#666;
transition:all .15s;
}
.filter-btn:hover { color:#aaa; border-color:#444; }
.filter-btn.active { background:#1a1a28; color:#7ecfff; border-color:#4a9eff; font-weight:bold; }
.filter-btn[data-filter="cloud"].active { color:#6fdb6f; border-color:#6fdb6f; background:#0d1f0d; }
.filter-btn[data-filter="local"].active { color:#f0ad4e; border-color:#f0ad4e; background:#1f170a; }
/* ── Problem tabs (level 2) ── */
.problem-bar { display:flex; gap:6px; margin-bottom:10px; flex-wrap:wrap; align-items:center; }
.problem-lbl { color:#555; font-size:10px; text-transform:uppercase; letter-spacing:1px; margin-right:4px; }
.problem-btn {
padding:5px 16px; border-radius:6px; border:1px solid #2a2a3a;
font-family:inherit; font-size:12px; font-weight:bold; cursor:pointer; background:#0d0d15; color:#666;
transition:all .15s;
}
.problem-btn:hover { color:#aaa; border-color:#555; }
.problem-btn.active { background:#1a1a28; color:#7ecfff; border-color:#4a9eff; }
.problem-btn .pb-count { font-size:10px; color:#555; margin-left:4px; font-weight:normal; }
/* ── Model card grid (level 3) ── */
.model-grid { display:flex; gap:8px; flex-wrap:wrap; margin-bottom:16px; min-height:80px; }
.model-card {
background:#12121a; border:1px solid #2a2a3a; border-radius:8px;
padding:10px 14px; cursor:pointer; min-width:130px; max-width:180px;
transition:all .15s; position:relative;
}
.model-card:hover { border-color:#555; }
.model-card.active { border-color:#7ecfff; background:#141424; box-shadow:0 0 12px rgba(126,207,255,.1); }
.mc-rank { position:absolute; top:6px; right:8px; font-size:9px; color:#444; font-weight:bold; }
.mc-rank.gold { color:#f0ad4e; }
.mc-name { font-size:11px; color:#aaa; font-weight:bold; margin-bottom:6px;
white-space:nowrap; overflow:hidden; text-overflow:ellipsis; max-width:150px; }
.mc-acc { font-size:22px; font-weight:bold; color:#6fdb6f; line-height:1; }
.mc-acc.warn { color:#f0ad4e; }
.mc-acc.fail { color:#e94560; }
.mc-acc.none { color:#444; font-size:16px; }
.mc-sub { font-size:9px; color:#555; margin-top:2px; }
.mc-status { font-size:10px; color:#666; margin-top:6px; display:flex; align-items:center; gap:4px; flex-wrap:wrap; }
.mc-dot { display:inline-block; width:6px; height:6px; border-radius:50%; background:#888; flex-shrink:0; }
.mc-dot.live { background:#6fdb6f; animation:pulse 1.5s infinite; }
.mc-flag { font-size:9px; color:#f0ad4e; background:rgba(240,173,78,.12);
border:1px solid rgba(240,173,78,.3); border-radius:3px; padding:0 4px; }
.no-models { color:#555; font-size:13px; padding:24px; }
/* ── Legacy tab bar (hidden, kept for internal bookkeeping) ── */
.tab-bar { display:none; }
.tab-btn { display:none; }
.tab-dot { display:inline-block; width:6px; height:6px; border-radius:50%; margin-right:5px; background:#888; }
.tab-dot.live { background:#6fdb6f; animation:pulse 1.5s infinite; }
.tab-score { margin-left:6px; font-size:10px; color:#6fdb6f; }
.no-agents { color:#555; font-size:14px; padding:40px; text-align:center; }
/* ── Stat boxes ── */
.top-bar { display:flex; gap:12px; margin-bottom:16px; flex-wrap:wrap; }
.stat-box { background:#12121a; border:1px solid #2a2a3a; border-radius:8px; padding:10px 16px; min-width:100px; }
.stat-label { color:#888; font-size:10px; text-transform:uppercase; letter-spacing:1px; }
.stat-value { color:#7ecfff; font-size:24px; font-weight:bold; margin-top:2px; }
.stat-value.score { color:#6fdb6f; }
.stat-value.warn { color:#f0ad4e; }
/* ── Grid / heatmap ── */
.main-area { display:flex; gap:14px; flex-wrap:wrap; }
.panel { background:#12121a; border:1px solid #2a2a3a; border-radius:8px; padding:14px; }
.panel h2 { color:#aaa; font-size:12px; margin-bottom:10px; text-transform:uppercase; letter-spacing:1px; }
.grid-container { display:inline-block; line-height:0; }
.grid-row { display:flex; }
.grid-cell { width:13px; height:13px; border:0.5px solid rgba(255,255,255,0.05); }
/* ── Timelines ── */
.timeline-panel { flex:1 1 260px; max-height:560px; overflow-y:auto; }
.tl-entry {
padding:5px 7px; margin-bottom:3px; border-radius:4px; font-size:11px;
border-left:3px solid #333; word-break:break-all;
}
.tl-entry.step { border-left-color:#7ecfff; background:rgba(126,207,255,.04); }
.tl-entry.submit { border-left-color:#6fdb6f; background:rgba(111,219,111,.08); }
.tl-entry.finalresult { border-left-color:#ffed00; background:rgba(255,237,0,.06); }
.tl-entry.randomstate { border-left-color:#5bc0de; background:rgba(91,192,222,.05); }
.tl-entry.sessionstart{ border-left-color:#888; background:rgba(100,100,100,.04); }
.tl-entry .tl-time { color:#555; margin-right:6px; }
.tl-entry .tl-act { color:#7ecfff; font-weight:bold; margin-right:5px; }
.tl-entry.submit .tl-act { color:#6fdb6f; }
.tl-entry.finalresult .tl-act{ color:#ffed00; }
.cmd-entry {
padding:4px 7px; margin-bottom:2px; border-radius:4px; font-size:11px;
border-left:3px solid #444; word-break:break-all;
}
.cmd-entry.Bash { border-left-color:#e94560; background:rgba(233,69,96,.06); }
.cmd-entry.Write { border-left-color:#f0ad4e; background:rgba(240,173,78,.06); }
.cmd-entry.Read { border-left-color:#666; background:rgba(100,100,100,.04); }
.cmd-entry.Edit { border-left-color:#4ecdc4; background:rgba(78,205,196,.06); }
.cmd-entry .cmd-tool { font-weight:bold; margin-right:5px; }
.cmd-entry.Bash .cmd-tool { color:#e94560; }
.cmd-entry.Write .cmd-tool { color:#f0ad4e; }
.cmd-entry.Edit .cmd-tool { color:#4ecdc4; }
.cmd-entry .cmd-time { color:#555; margin-right:4px; font-size:10px; }
.cmd-entry .cmd-detail { color:#999; }
/* ── Final result banner ── */
.final-banner {
display:none; border-radius:10px; padding:18px 24px; margin-bottom:14px;
animation:fadeIn .5s ease-in;
}
.final-banner.visible { display:block; }
.final-banner.ok { background:linear-gradient(135deg,#0a1a0a,#12221a); border:2px solid #6fdb6f; }
.final-banner.fail { background:linear-gradient(135deg,#1a0a0a,#221218); border:2px solid #e94560; }
.final-banner.warn { background:linear-gradient(135deg,#1a160a,#221e0a); border:2px solid #f0ad4e; }
.final-banner h2 { font-size:15px; margin-bottom:10px; text-transform:uppercase; letter-spacing:2px; }
.final-banner.ok h2 { color:#6fdb6f; }
.final-banner.fail h2 { color:#e94560; }
.final-banner.warn h2 { color:#f0ad4e; }
.incomplete-flag { display:inline-block; padding:2px 8px; border-radius:4px; font-size:10px;
background:rgba(240,173,78,.15); border:1px solid #f0ad4e; color:#f0ad4e;
margin-left:10px; vertical-align:middle; text-transform:none; letter-spacing:0; }
.final-metrics { display:flex; gap:28px; flex-wrap:wrap; }
.final-metric .fm-label { color:#888; font-size:10px; text-transform:uppercase; letter-spacing:1px; }
.final-metric .fm-value { font-size:28px; font-weight:bold; margin-top:2px; }
.final-metric .fm-sub { color:#666; font-size:10px; margin-top:2px; }
/* ── Evolve-specific view (alpha_evolve_gpt) ── */
#evolve-view { display:none; }
.gen-table { width:100%; border-collapse:collapse; font-size:12px; }
.gen-table th { color:#888; text-align:left; padding:5px 10px; border-bottom:1px solid #222; font-size:10px; text-transform:uppercase; letter-spacing:1px; }
.gen-table td { padding:5px 10px; border-bottom:1px solid #1a1a22; }
.gen-table tr:last-child td { border-bottom:none; }
.acc-bar { height:8px; background:#6fdb6f; border-radius:4px; display:inline-block; }
.variant-row { border-left:3px solid #333; padding:4px 8px; margin-bottom:2px; border-radius:3px; font-size:11px; word-break:break-all; }
.variant-row.best { border-left-color:#6fdb6f; background:rgba(111,219,111,.06); }
.variant-row.ok { border-left-color:#4a9eff; background:rgba(74,158,255,.04); }
.variant-row.error { border-left-color:#e94560; background:rgba(233,69,96,.05); }
.variant-score { color:#6fdb6f; font-weight:bold; margin-right:6px; }
.variant-strategy { color:#aaa; }
.gen-header { color:#f0ad4e; font-weight:bold; margin:8px 0 4px; font-size:12px; }
.code-view { background:#0d0d15; border:1px solid #2a2a3a; border-radius:6px; padding:12px; font-size:11px; color:#b0d0b0; max-height:300px; overflow-y:auto; white-space:pre-wrap; margin-top:8px; }
/* ── Phase tracker ── */
.phase-row { display:flex; gap:10px; margin-bottom:14px; flex-wrap:wrap; align-items:center; }
.phase-chip {
padding:5px 14px; border-radius:20px; font-size:11px; font-weight:bold;
border:1px solid #333; background:#12121a; color:#555; letter-spacing:.5px;
transition:all .3s;
}
.phase-chip.done { border-color:#4a9eff; color:#4a9eff; background:rgba(74,158,255,.08); }
.phase-chip.active { border-color:#f0ad4e; color:#f0ad4e; background:rgba(240,173,78,.12); animation:pulse 1.5s infinite; }
.phase-chip.forced { border-color:#e94560; color:#e94560; background:rgba(233,69,96,.08); }
.phase-chip.solved { border-color:#6fdb6f; color:#6fdb6f; background:rgba(111,219,111,.14); }
.phase-score { font-size:11px; color:#6fdb6f; margin-left:6px; }
.phase-history { max-height:260px; overflow-y:auto; }
.ph-entry {
padding:5px 10px; margin-bottom:3px; border-radius:4px; font-size:11px;
border-left:3px solid #333; display:flex; gap:8px; align-items:baseline;
}
.ph-entry.started { border-left-color:#4a9eff; background:rgba(74,158,255,.04); }
.ph-entry.completed { border-left-color:#6fdb6f; background:rgba(111,219,111,.04); }
.ph-entry.forced { border-left-color:#e94560; background:rgba(233,69,96,.06); }
.ph-entry.skipped { border-left-color:#555; background:rgba(100,100,100,.03); }
.ph-time { color:#555; font-size:10px; min-width:38px; }
.ph-cycle { color:#888; min-width:30px; }
.ph-phase { font-weight:bold; color:#7ecfff; min-width:90px; }
.ph-status { color:#aaa; min-width:60px; }
.ph-sum { color:#666; flex:1; }
/* ── Text doc panels ── */
.doc-panel { flex:1 1 340px; max-height:320px; overflow-y:auto; }
.doc-content {
font-size:10.5px; line-height:1.5; color:#bbb; white-space:pre-wrap; word-break:break-word;
background:#0d0d15; border:1px solid #1a1a28; border-radius:5px; padding:10px;
max-height:260px; overflow-y:auto;
}
.doc-empty { color:#444; font-size:12px; padding:20px; text-align:center; }
.mem-tabs { display:flex; gap:6px; margin-bottom:8px; }
.mem-tab { padding:4px 12px; border-radius:4px; font-size:11px; cursor:pointer;
background:#0d0d15; border:1px solid #222; color:#666; }
.mem-tab.active { border-color:#4a9eff; color:#4a9eff; background:rgba(74,158,255,.07); }
/* ── Status / misc ── */
.status-dot { display:inline-block; width:7px; height:7px; border-radius:50%; margin-right:5px; }
.status-dot.live { background:#6fdb6f; animation:pulse 1.5s infinite; }
.status-dot.done { background:#888; }
#status { font-size:12px; color:#666; margin-bottom:10px; }
@keyframes pulse { 0%,100%{opacity:1} 50%{opacity:.3} }
@keyframes fadeIn { from{opacity:0;transform:translateY(-8px)} to{opacity:1;transform:translateY(0)} }
</style>
</head>
<body>
<h1>Discovery Agent Monitor</h1>
<div id="status"><span class="status-dot live" id="dot"></span>Connecting…</div>
<!-- Project selector (level 1) -->
<div class="proj-bar">
<button class="proj-btn active" data-project="base_client"
onclick="switchProject('base_client')">base_client<span class="proj-count" id="cnt-base_client"></span></button>
<button class="proj-btn" data-project="alpha_evolve"
onclick="switchProject('alpha_evolve')">alpha_evolve<span class="proj-count" id="cnt-alpha_evolve"></span></button>
</div>
<!-- Cloud / local filter bar -->
<div class="filter-bar" id="filter-bar">
<span class="filter-lbl">Show:</span>
<button class="filter-btn active" data-filter="all" onclick="setFilter('all')">all</button>
<button class="filter-btn" data-filter="cloud" onclick="setFilter('cloud')">☁ cloud</button>
<button class="filter-btn" data-filter="local" onclick="setFilter('local')">⚑ local</button>
</div>
<!-- Problem tabs (level 2) -->
<div class="problem-bar" id="problem-bar"><span class="problem-lbl">Problem:</span></div>
<!-- Model card grid (level 3) β€” sorted by accuracy, click to select -->
<div class="model-grid" id="model-grid"></div>
<!-- Hidden legacy tab-bar (bookkeeping only) -->
<div class="tab-bar" id="tab-bar"></div>
<!-- ── ENV view (base_client / alpha_evolve) ── -->
<div id="env-view">
<div class="final-banner" id="final-banner"><h2></h2><div class="final-metrics" id="final-metrics"></div></div>
<div class="top-bar">
<div class="stat-box"><div class="stat-label">Problem</div><div class="stat-value" id="problem-id">--</div></div>
<div class="stat-box"><div class="stat-label">Model</div><div class="stat-value" id="model-id" style="font-size:18px">--</div></div>
<div class="stat-box"><div class="stat-label">Queries</div><div class="stat-value" id="query-count">0</div></div>
<div class="stat-box"><div class="stat-label">Elapsed</div><div class="stat-value" id="elapsed">0s</div></div>
<div class="stat-box"><div class="stat-label">Cmds</div><div class="stat-value" id="cmd-count">0</div></div>
<div class="stat-box"><div class="stat-label">Last Score</div><div class="stat-value score" id="last-score">--</div></div>
<div class="stat-box"><div class="stat-label">Claude Tokens</div><div class="stat-value" id="claude-tokens" style="font-size:18px">--</div><div style="font-size:9px;color:#666;margin-top:2px" id="claude-tokens-detail"></div></div>
</div>
<!-- Phase status chips -->
<div class="phase-row" id="phase-chips">
<span style="color:#666;font-size:11px;margin-right:4px">PHASES:</span>
<span class="phase-chip" id="chip-BOOT">BOOT</span>
<span style="color:#333">&#8594;</span>
<span class="phase-chip" id="chip-OBSERVE">OBSERVE</span>
<span style="color:#333">&#8594;</span>
<span class="phase-chip" id="chip-HYPOTHESIZE">HYPOTHESIZE</span>
<span style="color:#333">&#8594;</span>
<span class="phase-chip" id="chip-BATCH_TEST">BATCH TEST</span>
<span style="color:#333">&#8594;</span>
<span class="phase-chip" id="chip-ANALYZE">ANALYZE</span>
<span style="color:#333">&#8594;</span>
<span class="phase-chip" id="chip-SOLVED">SOLVED?</span>
<span id="cycle-badge" style="margin-left:12px;color:#888;font-size:11px"></span>
</div>
<!-- Phase history + Plan + Memories row -->
<div class="main-area" style="margin-bottom:14px">
<div class="panel phase-history" style="flex:1 1 240px">
<h2>Phase History</h2>
<div id="phase-history"></div>
</div>
<div class="panel doc-panel" style="flex:1 1 300px">
<h2>Current Plan</h2>
<div id="plan-content" class="doc-content"><span class="doc-empty">No PLAN.md yet</span></div>
</div>
<div class="panel doc-panel" style="flex:1 1 300px">
<h2>Memories</h2>
<div class="mem-tabs">
<button class="mem-tab active" onclick="switchMem('I')">memoryI (observations)</button>
<button class="mem-tab" onclick="switchMem('II')">memoryII (rules tested)</button>
</div>
<div id="mem-I-content" class="doc-content"><span class="doc-empty">No memoryI yet</span></div>
<div id="mem-II-content" class="doc-content" style="display:none"><span class="doc-empty">No memoryII yet</span></div>
</div>
</div>
<div class="main-area">
<div class="panel"><h2>Grid State</h2><div id="grid-container" class="grid-container"></div></div>
<div class="panel"><h2>Change Heatmap</h2><div id="heatmap-container" class="grid-container"></div></div>
<div class="panel timeline-panel"><h2>Env Actions</h2><div id="env-timeline"></div></div>
<div class="panel timeline-panel"><h2>Agent Commands</h2><div id="cmd-timeline"></div></div>
</div>
</div>
<!-- ── EVOLVE view (alpha_evolve_gpt) ── -->
<div id="evolve-view">
<div class="final-banner" id="evolve-final-banner"><h2></h2><div class="final-metrics" id="evolve-final-metrics"></div></div>
<div class="top-bar">
<div class="stat-box"><div class="stat-label">Problem</div><div class="stat-value" id="ev-problem">--</div></div>
<div class="stat-box"><div class="stat-label">Generation</div><div class="stat-value" id="ev-gen">0</div></div>
<div class="stat-box"><div class="stat-label">Best Acc</div><div class="stat-value score" id="ev-best-acc">--</div></div>
<div class="stat-box"><div class="stat-label">Best Total</div><div class="stat-value score" id="ev-best-total">--</div></div>
<div class="stat-box"><div class="stat-label">Variants</div><div class="stat-value" id="ev-variants">0</div></div>
<div class="stat-box"><div class="stat-label">Elapsed</div><div class="stat-value" id="ev-elapsed">0s</div></div>
</div>
<div class="main-area">
<div class="panel" style="flex:1 1 320px; max-height:600px; overflow-y:auto;">
<h2>Generation Log</h2>
<div id="evolve-timeline"></div>
</div>
<div class="panel" style="flex:1 1 360px;">
<h2>Best Code</h2>
<div class="code-view" id="best-code">No code yet…</div>
</div>
</div>
</div>
<script>
// ── Color helpers ──────────────────────────────────────────────────────────
const COLOR_MAPS = {
2: ['#1a1a2e','#6fdb6f'],
3: ['#1a1a2e','#e94560','#5bc0de'],
5: ['#1a1a2e','#16213e','#0f3460','#e94560','#ffed00'],
};
function getColor(v, maxV) {
const n = (maxV||1)+1;
const map = COLOR_MAPS[n] || genColors(n);
return map[Math.min(v, map.length-1)] || '#333';
}
function genColors(n) {
return Array.from({length:n}, (_,i) => `hsl(${(i/n*300)|0},60%,${20+(i/n)*50}%)`);
}
function heatColor(v, maxH) {
if (!v) return '#111118';
const t = Math.min(v/Math.max(maxH,1),1);
return `rgb(${Math.round(255*t)},${Math.round(80*(1-t))},${Math.round(255*(1-t)*.5)})`;
}
function renderGrid(el, grid, colorFn) {
el.innerHTML = '';
for (const row of grid) {
const rd = document.createElement('div'); rd.className='grid-row';
for (const v of row) {
const cd = document.createElement('div'); cd.className='grid-cell';
cd.style.background = colorFn(v); cd.title = v; rd.appendChild(cd);
}
el.appendChild(rd);
}
}
// ── Cloud vs local classification ──────────────────────────────────────────
const CLOUD_PATTERNS = ['sonnet','haiku','opus','minimax','MiniMax','claude','gpt-4','gpt-3'];
function isLocalModel(id) {
const lower = id.toLowerCase();
for (const p of CLOUD_PATTERNS) if (lower.includes(p.toLowerCase())) return false;
return true;
}
// ── Parse agent ID into {problem, model} ──────────────────────────────────
// IDs prefixed: "v1::", "v2::", "lk1::", "lk2::" (lk = leakage/contaminated)
function parseAgentId(id) {
let raw = id;
const hm = id.match(/^(v[12]|lk[12])::(.*)/);
if (hm) { raw = hm[2]; }
const m = raw.match(/^(G\d+)_(.+)$/);
if (m) return { problem: m[1], model: m[2] };
return { problem: 'OTHER', model: raw };
}
// ── Normalize accuracy (some logs store as 0-100, we want 0-1) ────────────
function normAcc(v) {
if (!v) return 0;
return v > 1.5 ? v / 100 : v; // >1.5 means stored as percentage
}
// ── State ──────────────────────────────────────────────────────────────────
const projects = {
base_client: { type:'env', agents:{}, active:null, knownIds:new Set(), filter:'all', activeProblem:null },
alpha_evolve: { type:'mixed', runs:{}, agents:{}, active:null, knownIds:new Set(), filter:'all', activeProblem:null },
};
let activeProject = 'base_client';
// Map prefixed merged ID β†’ source API project
function agentSourceProject(id) {
const hm = id.match(/^(v[12]|lk[12])::/);
if (!hm) return activeProject;
const pfx = hm[1];
if (pfx === 'v1') return 'base_client_v1';
if (pfx === 'v2') return 'base_client_v2';
if (pfx === 'lk1') return 'leakage_v1';
if (pfx === 'lk2') return 'leakage_v2';
return activeProject;
}
// ── Get best accuracy for a run/agent (0-1 range) ─────────────────────────
function getBestAcc(item) {
if (!item) return 0;
if (item.finalResult) {
const r = item.finalResult.result || {};
// Use max of final result accuracy and best seen from submits/phases
return Math.max(normAcc(r.functional_accuracy || 0), item.bestAccuracy || 0);
}
return item.bestAccuracy || 0;
}
// ── Problem helpers ────────────────────────────────────────────────────────
function getProblems() {
const p = projects[activeProject];
const store = p.type === 'evolve' ? p.runs : p.agents;
const probs = new Set();
for (const id of Object.keys(store)) probs.add(parseAgentId(id).problem);
return [...probs].sort();
}
function getAgentsForProblem(problem) {
const p = projects[activeProject];
const store = p.type === 'evolve' ? p.runs : p.agents;
return Object.keys(store).filter(id => parseAgentId(id).problem === problem);
}
// ── Rebuild problem tab bar ────────────────────────────────────────────────
function rebuildProblemBar() {
const bar = document.getElementById('problem-bar');
const probs = getProblems();
bar.innerHTML = '<span class="problem-lbl">Problem:</span>';
if (!probs.length) return;
const p = projects[activeProject];
if (!p.activeProblem || !probs.includes(p.activeProblem)) p.activeProblem = probs[0];
for (const prob of probs) {
const agents = getAgentsForProblem(prob);
const btn = document.createElement('button');
btn.className = 'problem-btn' + (prob === p.activeProblem ? ' active' : '');
btn.innerHTML = prob + `<span class="pb-count">${agents.length}</span>`;
btn.onclick = () => switchProblem(prob);
bar.appendChild(btn);
}
}
function switchProblem(prob) {
const p = projects[activeProject];
p.activeProblem = prob;
document.querySelectorAll('.problem-btn').forEach(b => {
b.classList.toggle('active', b.textContent.startsWith(prob));
});
rebuildModelGrid();
selectTopModel();
}
// ── Rebuild model card grid ────────────────────────────────────────────────
function rebuildModelGrid() {
const p = projects[activeProject];
const grid = document.getElementById('model-grid');
grid.innerHTML = '';
if (!p.activeProblem) { grid.innerHTML = '<div class="no-models">No problems found</div>'; return; }
const f = p.filter || 'all';
let agents = getAgentsForProblem(p.activeProblem);
if (f !== 'all') agents = agents.filter(id => f === 'local' ? isLocalModel(id) : !isLocalModel(id));
if (!agents.length) { grid.innerHTML = '<div class="no-models">No agents match current filter</div>'; return; }
const store = p.type === 'evolve' ? p.runs : p.agents;
// Sort by best accuracy descending
agents.sort((a, b) => getBestAcc(store[b]) - getBestAcc(store[a]));
agents.forEach((id, idx) => {
const item = store[id];
if (!item) return;
const { model } = parseAgentId(id);
const acc = getBestAcc(item);
const hasFinal = !!item.finalResult;
const isIncomplete = item.done && !hasFinal;
const isActive = p.active === id;
const isLeakage = !!item.leakage;
const accPct = acc > 0 ? (acc * 100).toFixed(1) + '%' : '--';
const accClass = acc === 0 ? ' none' : acc >= 0.8 ? '' : acc >= 0.4 ? ' warn' : ' fail';
const dotCls = item.done ? 'done' : 'live';
const statusTxt = item.done ? (hasFinal ? 'complete' : 'stopped early') : 'running';
const subTxt = hasFinal ? 'final result' : (acc > 0 ? 'best submit' : 'no submits yet');
const rankCls = idx === 0 ? ' gold' : '';
const badgeHtml = isLeakage ? `<span class="mc-leakage">⚠ LEAKAGE</span>` : '';
const card = document.createElement('div');
card.className = 'model-card' + (isActive ? ' active' : '') + (isLeakage ? ' leakage' : '');
card.title = isLeakage ? id + ' β€” run before hint-leakage fix (results may be biased)' : id;
card.onclick = () => switchTab(id);
card.innerHTML = `
<span class="mc-rank${rankCls}">#${idx + 1}</span>
<div class="mc-name">${model}</div>
${badgeHtml}
<div class="mc-acc${accClass}">${accPct}</div>
<div class="mc-sub">${subTxt}</div>
<div class="mc-status">
<span class="mc-dot ${dotCls}"></span>${statusTxt}
${isIncomplete ? '<span class="mc-flag">INCOMPLETE</span>' : ''}
</div>`;
grid.appendChild(card);
});
}
// ── Auto-select top performing model in current problem ───────────────────
function selectTopModel() {
const p = projects[activeProject];
if (!p.activeProblem) return;
const f = p.filter || 'all';
let agents = getAgentsForProblem(p.activeProblem);
if (f !== 'all') agents = agents.filter(id => f === 'local' ? isLocalModel(id) : !isLocalModel(id));
if (!agents.length) return;
const store = p.type === 'evolve' ? p.runs : p.agents;
agents.sort((a, b) => getBestAcc(store[b]) - getBestAcc(store[a]));
// Only auto-select if current active is not in this problem
const curProblem = p.active ? parseAgentId(p.active).problem : null;
if (!p.active || curProblem !== p.activeProblem) {
switchTab(agents[0]);
}
}
function newEnvAgent(id, sourceProject, leakage) {
return { id, sourceProject: sourceProject||activeProject, leakage: !!leakage,
envOffset:0, cmdOffset:0, cmdTotal:0, maxVal:1, gridRows:20, gridCols:20,
heatmap:null, latestState:null, sessionStart:null, problemId:'?', modelId:'?',
queryCount:0, elapsed:'0s', lastScore:null, bestAccuracy:0, finalResult:null,
done:false, incomplete:false, envEntries:[], cmdEntries:[], claudeIn:0, claudeOut:0 };
}
function newEvolveRun(id) {
return { id, offset:0, problem:'?', gen:0, bestAcc:0, bestTotal:0, variants:0,
elapsed:'0s', done:false, bestCode:'', events:[], solvedGen:null, bestAccuracy:0 };
}
// ── Project / tab switching ────────────────────────────────────────────────
function setFilter(f) {
projects[activeProject].filter = f;
document.querySelectorAll('.filter-btn').forEach(b => b.classList.toggle('active', b.dataset.filter===f));
rebuildModelGrid();
}
function switchProject(proj) {
activeProject = proj;
document.querySelectorAll('.proj-btn').forEach(b => b.classList.toggle('active', b.dataset.project===proj));
const p = projects[proj];
const useEvolve = p.type==='evolve' || (p.type==='mixed' && p.active && !!p.runs[p.active]);
document.getElementById('env-view').style.display = useEvolve ? 'none' : 'block';
document.getElementById('evolve-view').style.display = useEvolve ? 'block' : 'none';
const f = p.filter || 'all';
document.querySelectorAll('.filter-btn').forEach(b => b.classList.toggle('active', b.dataset.filter===f));
rebuildProblemBar();
rebuildModelGrid();
if (p.active) {
if (useEvolve) renderEvolveRun(p.active);
else renderEnvAgent(p.active);
} else {
selectTopModel();
}
}
function rebuildTabBar() {
const p = projects[activeProject];
document.getElementById('tab-bar').innerHTML = '';
p.knownIds = new Set();
const itemSets = p.type==='mixed' ? [p.runs, p.agents] : [p.type==='evolve' ? p.runs : p.agents];
for (const items of itemSets) for (const id of Object.keys(items)) _addTabInternal(id);
rebuildProblemBar();
rebuildModelGrid();
if (!p.active) selectTopModel();
}
function _addTabInternal(id) {
const p = projects[activeProject];
if (p.knownIds.has(id)) return;
p.knownIds.add(id);
const btn = document.createElement('button');
btn.className = 'tab-btn'; btn.dataset.agent = id;
btn.onclick = () => switchTab(id);
document.getElementById('tab-bar').appendChild(btn);
}
function addTab(id) {
const p = projects[activeProject];
const isNew = !p.knownIds.has(id);
_addTabInternal(id);
if (isNew) {
rebuildProblemBar();
rebuildModelGrid();
if (!p.active) selectTopModel();
} else {
rebuildModelGrid(); // refresh scores
}
}
function updateTabDot(id) {
rebuildModelGrid(); // model cards show live/done state
}
function switchTab(id) {
const p = projects[activeProject];
p.active = id;
rebuildModelGrid(); // update active card highlight
if (p.type === 'evolve') renderEvolveRun(id);
else renderEnvAgent(id);
}
// ── ENV rendering ──────────────────────────────────────────────────────────
function processEnvEntry(ag, entry) {
const act = entry.action || '';
if (act === 'session_start') {
ag.problemId = entry.problem_id || '?';
const info = entry.shape_info || {};
const vals = (info.values||'0-1').split('-');
ag.maxVal = parseInt(vals[1]||'1');
ag.gridRows = info.rows||20; ag.gridCols = info.cols||20;
ag.heatmap = Array.from({length:ag.gridRows}, ()=>new Array(ag.gridCols).fill(0));
ag.sessionStart = entry.t !== undefined ? (Date.now()/1000 - entry.t) : Date.now()/1000;
}
if (entry.query_num !== undefined) ag.queryCount = entry.query_num;
if (entry.t !== undefined) ag.elapsed = entry.t.toFixed(0)+'s';
if (entry.state) {
const prev = ag.latestState;
ag.latestState = entry.state;
if (act==='step' && ag.heatmap && prev) {
for (let r=0; r<Math.min(entry.state.length,ag.gridRows); r++)
for (let c=0; c<Math.min(entry.state[r].length,ag.gridCols); c++)
if (entry.state[r][c] !== (prev[r]?.[c]??-1)) ag.heatmap[r][c]++;
}
}
if (act==='submit' && entry.result) {
ag.lastScore = entry.result.total || 0;
// Track best accuracy seen from any submit
const submitAcc = normAcc(entry.result.functional_accuracy || 0);
if (submitAcc > ag.bestAccuracy) ag.bestAccuracy = submitAcc;
}
if (act==='final_result' && entry.result) {
ag.finalResult = entry; ag.done = true;
const finalAcc = normAcc(entry.result.functional_accuracy || 0);
if (finalAcc > ag.bestAccuracy) ag.bestAccuracy = finalAcc;
}
ag.envEntries.push(entry);
if (ag.envEntries.length > 300) ag.envEntries.shift();
}
function renderEnvAgent(id) {
const p = projects[activeProject];
const ag = p.agents[id]; if (!ag) return;
const parts = id.split('_');
ag.modelId = parts.length>1 ? parts[parts.length-1] : '?';
document.getElementById('problem-id').textContent = ag.problemId;
document.getElementById('model-id').textContent = ag.modelId;
document.getElementById('query-count').textContent = ag.queryCount;
document.getElementById('elapsed').textContent = ag.elapsed;
document.getElementById('cmd-count').textContent = ag.cmdTotal;
const se = document.getElementById('last-score');
if (ag.lastScore !== null) {
se.textContent = ag.lastScore.toFixed(3);
se.className = 'stat-value ' + (ag.lastScore>=0.8?'score':'warn');
} else { se.textContent='--'; se.className='stat-value score'; }
const tot = ag.claudeIn + ag.claudeOut;
document.getElementById('claude-tokens').textContent = tot>0 ? (tot>9999?(tot/1000).toFixed(1)+'k':tot) : '--';
document.getElementById('claude-tokens-detail').textContent = tot>0 ? `in:${ag.claudeIn} out:${ag.claudeOut}` : '';
if (ag.latestState) renderGrid(document.getElementById('grid-container'), ag.latestState, v=>getColor(v,ag.maxVal));
if (ag.heatmap) {
const maxH = Math.max(...ag.heatmap.flat());
renderGrid(document.getElementById('heatmap-container'), ag.heatmap, v=>heatColor(v,maxH));
}
const tl = document.getElementById('env-timeline');
tl.innerHTML = '';
for (let i=ag.envEntries.length-1; i>=Math.max(0,ag.envEntries.length-200); i--)
tl.appendChild(makeEnvEntry(ag.envEntries[i]));
const ctl = document.getElementById('cmd-timeline');
ctl.innerHTML = '';
for (let i=ag.cmdEntries.length-1; i>=Math.max(0,ag.cmdEntries.length-300); i--)
ctl.appendChild(makeCmdEntry(ag.cmdEntries[i], ag.sessionStart));
const banner = document.getElementById('final-banner');
if (ag.finalResult) showFinalBanner(ag.finalResult, ag.problemId, ag);
else if (ag.incomplete) showIncompleteBanner(ag);
else banner.className='final-banner';
document.getElementById('dot').className='status-dot '+(ag.done?'done':'live');
}
function makeEnvEntry(e) {
const div = document.createElement('div');
const act = e.action||'?';
const cls = act.replace('_','');
div.className = 'tl-entry '+cls;
let detail = '';
if (act==='step') detail = `n=${e.n} | ${e.cells_changed} changed`;
else if (act==='submit') { const r=e.result||{}; detail=`acc=${(r.functional_accuracy||0).toFixed(3)} total=${(r.total||0).toFixed(3)}`; }
else if (act==='session_start') detail = `problem=${e.problem_id}`;
else if (act==='random_state') detail = `seed=${e.seed}`;
else if (act==='final_result') { const r=e.result||{}; detail=`acc=${(normAcc(r.functional_accuracy||0)*100).toFixed(1)}% src=${e.source||'?'}`; }
div.innerHTML = `<span class="tl-time">${(e.t||0).toFixed(1)}s</span><span class="tl-act">${act}</span><span>${detail}</span>`;
return div;
}
function makeCmdEntry(e, sessionStart) {
const div = document.createElement('div');
const tool = e.tool||'?';
div.className = 'cmd-entry '+tool;
const elapsed = (sessionStart && e.ts) ? (e.ts-sessionStart).toFixed(1)+'s' : '';
let detail = (e.detail||'').substring(0,180).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
div.innerHTML = `<span class="cmd-time">${elapsed}</span><span class="cmd-tool">${tool}</span><span class="cmd-detail">${detail}</span>`;
return div;
}
function showFinalBanner(entry, problemId, ag) {
const banner = document.getElementById('final-banner');
const metrics = document.getElementById('final-metrics');
const r = entry.result||{};
const acc = normAcc(r.functional_accuracy || 0);
const pars = r.parsimony_bonus || 0;
const eff = r.efficiency_bonus || 0;
const total = r.total || 0;
const incomplete = ag && ag.incomplete;
const cls = incomplete ? 'warn' : acc < 0.5 ? 'fail' : 'ok';
banner.className = `final-banner visible ${cls}`;
const incFlag = incomplete ? '<span class="incomplete-flag">INCOMPLETE RUN</span>' : '';
banner.querySelector('h2').innerHTML = `Final Result β€” ${problemId}${incFlag}`;
function vc(v,lo,hi){return v>=hi?'':v>=lo?' warn':' low'}
metrics.innerHTML = `
<div class="final-metric"><div class="fm-label">Accuracy</div><div class="fm-value${vc(acc,.5,.8)}">${(acc*100).toFixed(1)}%</div><div class="fm-sub">${r.correct_states??'?'}/${r.total_states??'?'} states</div></div>
<div class="final-metric"><div class="fm-label">Total Score</div><div class="fm-value${vc(total,.5,.8)}">${total.toFixed(4)}</div><div class="fm-sub">acc+pars+eff</div></div>
<div class="final-metric"><div class="fm-label">Parsimony</div><div class="fm-value" style="font-size:20px;color:#4ecdc4">+${pars.toFixed(4)}</div><div class="fm-sub">Ξ”DL:${r.delta_dl??'?'}</div></div>
<div class="final-metric"><div class="fm-label">Efficiency</div><div class="fm-value" style="font-size:20px;color:#5bc0de">+${eff.toFixed(4)}</div><div class="fm-sub">${r.queries_used??'?'} queries</div></div>`;
}
function showIncompleteBanner(ag) {
const banner = document.getElementById('final-banner');
const metrics = document.getElementById('final-metrics');
const bestAcc = ag.bestAccuracy || 0;
banner.className = 'final-banner visible warn';
banner.querySelector('h2').innerHTML = `Run Stopped Early β€” ${ag.problemId} <span class="incomplete-flag">INCOMPLETE</span>`;
metrics.innerHTML = `
<div class="final-metric"><div class="fm-label">Best Accuracy</div><div class="fm-value warn">${bestAcc > 0 ? (bestAcc*100).toFixed(1)+'%' : '--'}</div><div class="fm-sub">from submits</div></div>
<div class="final-metric"><div class="fm-label">Last Score</div><div class="fm-value" style="font-size:20px">${ag.lastScore !== null ? ag.lastScore.toFixed(4) : '--'}</div><div class="fm-sub">total</div></div>
<div class="final-metric"><div class="fm-label">Queries</div><div class="fm-value" style="font-size:20px">${ag.queryCount}</div><div class="fm-sub">before stop</div></div>`;
}
// ── EVOLVE rendering (alpha_evolve_gpt) ───────────────────────────────────
function processEvolveEntry(run, entry) {
const t = entry.type||'';
if (t==='start') { run.problem = entry.problem||'?'; }
if (t==='gen_start') { run.gen = entry.gen||0; }
if (entry.t !== undefined) run.elapsed = entry.t.toFixed(0)+'s';
if (t==='variant') { run.variants++; if (entry.acc > run.bestAcc) run.bestAcc=entry.acc; if ((entry.total||0)>run.bestTotal) { run.bestTotal=entry.total||0; if(entry.code) run.bestCode=entry.code; } }
if (t==='gen_end') { if(entry.best_acc>run.bestAcc) run.bestAcc=entry.best_acc; if((entry.best_total||0)>run.bestTotal) run.bestTotal=entry.best_total||0; }
if (t==='done') { run.done=true; run.bestAcc=entry.best_acc||run.bestAcc; run.bestTotal=entry.best_total||run.bestTotal; run.solvedGen=entry.solved_gen||null; if(entry.best_code) run.bestCode=entry.best_code; if(entry.elapsed) run.elapsed=entry.elapsed.toFixed(0)+'s'; }
run.bestAccuracy = run.bestAcc; // kept in sync for model cards
run.events.push(entry);
if (run.events.length > 500) run.events.shift();
}
function renderEvolveRun(id) {
const run = projects.alpha_evolve_gpt.runs[id]; if (!run) return;
document.getElementById('ev-problem').textContent = run.problem;
document.getElementById('ev-gen').textContent = run.gen;
document.getElementById('ev-best-acc').textContent = run.bestAcc ? (run.bestAcc*100).toFixed(1)+'%' : '--';
document.getElementById('ev-best-total').textContent = run.bestTotal ? run.bestTotal.toFixed(4) : '--';
document.getElementById('ev-variants').textContent = run.variants;
document.getElementById('ev-elapsed').textContent = run.elapsed;
document.getElementById('dot').className = 'status-dot '+(run.done?'done':'live');
const tl = document.getElementById('evolve-timeline');
tl.innerHTML = '';
let curGen = null;
for (let i=run.events.length-1; i>=Math.max(0,run.events.length-300); i--) {
const e = run.events[i];
const t = e.type||'';
if (t==='gen_end' || t==='gen_start') {
const hdr = document.createElement('div'); hdr.className='gen-header';
if (t==='gen_end') hdr.innerHTML = `Gen ${e.gen} β€” best acc <b>${((e.best_acc||0)*100).toFixed(1)}%</b> total <b>${(e.best_total||0).toFixed(4)}</b> (${e.n_programs||0} programs)`;
else hdr.textContent = `β–Ά Generation ${e.gen} starting…`;
tl.appendChild(hdr);
} else if (t==='variant') {
const d = document.createElement('div');
const cls = e.acc>=1.0 ? 'best' : e.status==='ok' ? 'ok' : 'error';
d.className = 'variant-row '+cls;
d.innerHTML = `<span class="variant-score">${(e.acc*100).toFixed(1)}%</span><span class="variant-strategy">${(e.strategy||'').substring(0,100)}</span> <span style="color:#555;font-size:10px">len=${e.code_len||'?'} t=${(e.t||0).toFixed(1)}s</span>`;
tl.appendChild(d);
} else if (t==='done') {
const d = document.createElement('div'); d.className='gen-header';
d.style.color='#6fdb6f';
d.innerHTML = run.solvedGen ? `βœ“ Solved at generation ${run.solvedGen}! elapsed=${run.elapsed}` : `Done β€” best acc ${(run.bestAcc*100).toFixed(1)}%`;
tl.appendChild(d);
} else if (t==='start') {
const d = document.createElement('div'); d.className='gen-header'; d.style.color='#888';
d.textContent = `Evolution started β€” problem ${e.problem}`;
tl.appendChild(d);
}
}
// Best code
const codeEl = document.getElementById('best-code');
codeEl.textContent = run.bestCode || 'No code yet…';
// Final evolve banner
const banner = document.getElementById('evolve-final-banner');
const metrics = document.getElementById('evolve-final-metrics');
if (run.done) {
banner.className = 'final-banner visible '+(run.bestAcc>=0.8?'ok':'fail');
banner.querySelector('h2').textContent = run.solvedGen ? `βœ“ Solved gen ${run.solvedGen} β€” ${run.problem}` : `Evolution complete β€” ${run.problem}`;
metrics.innerHTML = `
<div class="final-metric"><div class="fm-label">Best Accuracy</div><div class="fm-value" style="color:#6fdb6f">${(run.bestAcc*100).toFixed(1)}%</div></div>
<div class="final-metric"><div class="fm-label">Best Total</div><div class="fm-value" style="color:#6fdb6f">${run.bestTotal.toFixed(4)}</div></div>
<div class="final-metric"><div class="fm-label">Generations</div><div class="fm-value">${run.gen}</div></div>
<div class="final-metric"><div class="fm-label">Variants</div><div class="fm-value">${run.variants}</div></div>
<div class="final-metric"><div class="fm-label">Elapsed</div><div class="fm-value" style="font-size:18px">${run.elapsed}</div></div>`;
} else { banner.className='final-banner'; }
}
// ── Memory tab switch ─────────────────────────────────────────────────────
function switchMem(tab) {
document.getElementById('mem-I-content').style.display = tab==='I' ? '' : 'none';
document.getElementById('mem-II-content').style.display = tab==='II' ? '' : 'none';
document.querySelectorAll('.mem-tab').forEach((b,i) => b.classList.toggle('active', (i===0?'I':'II')===tab));
}
// ── Phase rendering ───────────────────────────────────────────────────────
const PHASE_ORDER = ['BOOT','OBSERVE','HYPOTHESIZE','BATCH_TEST','ANALYZE','SOLVED','DONE'];
function renderPhases(phases) {
const hist = document.getElementById('phase-history');
hist.innerHTML = '';
// Reset all chips
PHASE_ORDER.forEach(p => {
const chip = document.getElementById('chip-' + p);
if (chip) chip.className = 'phase-chip';
});
let latestCycle = 0;
let lastPhase = null;
for (const e of phases) {
latestCycle = Math.max(latestCycle, e.cycle||0);
lastPhase = e;
// Phase history entry
const div = document.createElement('div');
div.className = 'ph-entry ' + (e.status||'');
const mins = Math.floor((e.t||0)/60), secs = Math.floor((e.t||0)%60);
div.innerHTML = `
<span class="ph-time">${mins>0?mins+'m':''}${secs}s</span>
<span class="ph-cycle">C${e.cycle}</span>
<span class="ph-phase">${e.phase}</span>
<span class="ph-status">${e.status}</span>
<span class="ph-sum">${(e.summary||'').substring(0,80)}</span>`;
hist.insertBefore(div, hist.firstChild);
// Update chip
const chip = document.getElementById('chip-' + e.phase);
if (chip) {
if (e.phase === 'SOLVED' || e.phase === 'DONE') {
chip.className = 'phase-chip solved';
} else if (e.status === 'started') {
chip.className = 'phase-chip active';
} else if (e.status === 'forced') {
chip.className = 'phase-chip forced';
} else if (e.status === 'completed') {
chip.className = 'phase-chip done';
}
}
}
const badge = document.getElementById('cycle-badge');
if (badge) badge.textContent = latestCycle > 0 ? `cycle ${latestCycle}` : '';
}
function escHtml(s) {
return (s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
}
function renderDoc(elId, content) {
const el = document.getElementById(elId);
if (!el) return;
if (!content || !content.trim()) {
el.innerHTML = '<span class="doc-empty">Not written yet</span>';
return;
}
// Light markdown-ish highlighting for ## headers
const html = content.split('\n').map(line => {
if (line.startsWith('## ')) return `<b style="color:#7ecfff">${escHtml(line)}</b>`;
if (line.startsWith('### ')) return `<b style="color:#aaa">${escHtml(line)}</b>`;
if (line.startsWith('| ')) return `<span style="color:#888">${escHtml(line)}</span>`;
if (line.match(/^\d+\. /)) return `<span style="color:#c0d0ff">${escHtml(line)}</span>`;
if (line.startsWith('- ')) return `<span style="color:#b0d0b0">${escHtml(line)}</span>`;
return escHtml(line);
}).join('\n');
// Preserve scroll position across updates
const prevScroll = el.scrollTop;
el.innerHTML = html;
el.scrollTop = prevScroll;
}
async function pollPhasesAndDocs() {
const p = projects[activeProject];
const isEnvActive = p && (p.type==='env' || (p.type==='mixed' && p.active && !p.runs[p.active]));
if (!isEnvActive) { setTimeout(pollPhasesAndDocs, 2000); return; }
const id = p.active; if (!id) { setTimeout(pollPhasesAndDocs, 2000); return; }
const ag = p.agents[id];
const srcProj = (ag && ag.sourceProject) || activeProject;
const rawId = id.replace(/^(?:v[12]|lk[12])::/, '');
try {
const [prResp, planResp, memResp] = await Promise.all([
fetch(`/api/phases?project=${srcProj}&agent=${encodeURIComponent(rawId)}`),
fetch(`/api/plan?project=${srcProj}&agent=${encodeURIComponent(rawId)}`),
fetch(`/api/memories?project=${srcProj}&agent=${encodeURIComponent(rawId)}`),
]);
if (prResp.ok) {
const d = await prResp.json();
renderPhases(d);
// Extract best accuracy from phase summaries (catches stopped-early runs with no submit in env log)
if (ag) {
for (const e of d) {
if (e.phase === 'BATCH_TEST' && e.status === 'completed' && e.summary) {
const m = e.summary.match(/all_time_best=([\d.]+)%/);
if (m) { const acc = parseFloat(m[1]) / 100; if (acc > ag.bestAccuracy) ag.bestAccuracy = acc; }
}
}
if (ag.incomplete) showIncompleteBanner(ag);
rebuildModelGrid(); // refresh cards after accuracy backfill from phases
}
}
if (planResp.ok){ const d = await planResp.json(); renderDoc('plan-content', d.content); }
if (memResp.ok) {
const d = await memResp.json();
renderDoc('mem-I-content', d.memoryI);
renderDoc('mem-II-content', d.memoryII);
}
} catch(e) {}
setTimeout(pollPhasesAndDocs, 2500);
}
// ── Polling ────────────────────────────────────────────────────────────────
async function discoverAll() {
try {
const resp = await fetch('/api/state');
if (!resp.ok) return;
const data = await resp.json();
for (const [proj, ids] of Object.entries(data)) {
const isLeakage = proj === 'leakage_v1' || proj === 'leakage_v2';
const isBase = proj === 'base_client_v1' || proj === 'base_client_v2' || isLeakage;
const isEvolveDir = proj === 'alpha_evolve_gpt';
const mergedProj = isBase ? 'base_client' : isEvolveDir ? 'alpha_evolve' : proj;
const prefix = proj === 'base_client_v1' ? 'v1' : proj === 'base_client_v2' ? 'v2'
: proj === 'leakage_v1' ? 'lk1': proj === 'leakage_v2' ? 'lk2' : null;
const p = projects[mergedProj]; if (!p) continue;
const store = (p.type==='evolve' || isEvolveDir) ? p.runs : p.agents;
for (const rawId of ids) {
const id = prefix ? `${prefix}::${rawId}` : rawId;
if (!store[id]) store[id] = isEvolveDir ? newEvolveRun(id) : newEnvAgent(id, proj, isLeakage);
if (mergedProj === activeProject) addTab(id);
}
// Update count badge
if (isBase) {
const cnt = document.getElementById('cnt-base_client');
if (cnt) { const t=Object.keys(projects.base_client.agents).length; cnt.textContent=t?`(${t})`:''; }
} else {
const cnt2 = document.getElementById('cnt-'+mergedProj);
if (cnt2) {
const mp = projects[mergedProj];
const t = mp.type==='mixed'
? Object.keys(mp.runs).length + Object.keys(mp.agents).length
: ids.length;
cnt2.textContent = t ? `(${t})` : '';
}
}
}
const live = Object.values(projects).flatMap(p => {
const stores = p.type==='mixed' ? [p.runs, p.agents] : [p.type==='evolve' ? p.runs : p.agents];
return stores.flatMap(s => Object.values(s).filter(x=>!x.done));
});
document.getElementById('status').innerHTML =
`<span class="status-dot ${live.length?'live':'done'}" id="dot"></span>${live.length} agent(s) running`;
} catch(e) {}
setTimeout(discoverAll, 2000);
}
async function pollActive() {
const p = projects[activeProject];
const id = p.active; if (!id) { setTimeout(pollActive,700); return; }
const isEvolveItem = p.type==='evolve' || (p.type==='mixed' && !!p.runs[id]);
if (isEvolveItem) {
const run = p.runs[id]; if (!run || run.done) { setTimeout(pollActive,700); return; }
try {
const r = await fetch(`/api/evolve-log?run=${encodeURIComponent(id)}&offset=${run.offset}`);
if (r.ok) {
const d = await r.json();
for (const line of (d.lines||[])) { try { processEvolveEntry(run, JSON.parse(line)); } catch(e){} }
run.offset += (d.lines||[]).length;
if (d.done) run.done = true;
}
} catch(e){}
renderEvolveRun(id);
} else {
const ag = p.agents[id]; if (!ag || ag.done) { setTimeout(pollActive,700); return; }
const srcProj = ag.sourceProject || (p.type==='mixed' ? 'alpha_evolve' : activeProject);
const rawId = id.replace(/^(?:v[12]|lk[12])::/, '');
try {
const r = await fetch(`/api/log?project=${srcProj}&agent=${encodeURIComponent(rawId)}&offset=${ag.envOffset}`);
if (r.ok) {
const d = await r.json();
for (const line of (d.lines||[])) { try { processEnvEntry(ag, JSON.parse(line)); } catch(e){} }
ag.envOffset += (d.lines||[]).length;
if (d.done) { ag.done = true; if (!ag.finalResult) ag.incomplete = true; }
}
} catch(e){}
try {
const r = await fetch(`/api/cmd-log?project=${srcProj}&agent=${encodeURIComponent(rawId)}&offset=${ag.cmdOffset}`);
if (r.ok) {
const d = await r.json();
for (const line of (d.lines||[])) { try { ag.cmdEntries.push(JSON.parse(line)); ag.cmdTotal++; } catch(e){} }
ag.cmdOffset += (d.lines||[]).length;
if (ag.cmdEntries.length>500) ag.cmdEntries=ag.cmdEntries.slice(-400);
}
} catch(e){}
try {
const r = await fetch(`/api/claude-usage?project=${srcProj}&agent=${encodeURIComponent(rawId)}`);
if (r.ok) { const d=await r.json(); ag.claudeIn=d.input_tokens||0; ag.claudeOut=d.output_tokens||0; }
} catch(e){}
renderEnvAgent(id);
}
updateTabDot(id);
setTimeout(pollActive, 700);
}
// ── Poll all projects in background (not just active) ─────────────────────
async function pollBackground() {
// Poll non-active agents within the active project (so cards show data without needing a click)
const ap = projects[activeProject];
if (ap && ap.type !== 'evolve') {
for (const [id, item] of Object.entries(ap.type==='mixed' ? ap.agents : ap.agents)) {
if (id === ap.active || item.done) continue;
const srcProj = item.sourceProject || activeProject;
const rawId = id.replace(/^(?:v[12]|lk[12])::/, '');
try {
const r = await fetch(`/api/log?project=${srcProj}&agent=${encodeURIComponent(rawId)}&offset=${item.envOffset}`);
if (r.ok) {
const d = await r.json();
for (const line of (d.lines||[])) { try { processEnvEntry(item, JSON.parse(line)); } catch(e){} }
item.envOffset += (d.lines||[]).length;
if (d.done) { item.done=true; if (!item.finalResult) item.incomplete=true; }
}
} catch(e) {}
}
rebuildModelGrid();
}
for (const [proj, p] of Object.entries(projects)) {
if (proj === activeProject) continue;
const bgStores = p.type==='mixed' ? [[p.runs,true],[p.agents,false]] : [[p.type==='evolve'?p.runs:p.agents, p.type==='evolve']];
for (const [store, isEv] of bgStores) for (const [id, item] of Object.entries(store)) {
if (item.done) continue;
if (isEv) {
try {
const r = await fetch(`/api/evolve-log?run=${encodeURIComponent(id)}&offset=${item.offset}`);
if (r.ok) {
const d=await r.json();
for (const line of (d.lines||[])) { try { processEvolveEntry(item, JSON.parse(line)); } catch(e){} }
item.offset += (d.lines||[]).length;
if (d.done) item.done=true;
}
} catch(e){}
} else {
try {
const r = await fetch(`/api/log?project=${proj}&agent=${encodeURIComponent(id)}&offset=${item.envOffset}`);
if (r.ok) {
const d=await r.json();
for (const line of (d.lines||[])) { try { processEnvEntry(item, JSON.parse(line)); } catch(e){} }
item.envOffset += (d.lines||[]).length;
if (d.done) { item.done=true; if (!item.finalResult) item.incomplete=true; }
}
} catch(e){}
}
}
}
setTimeout(pollBackground, 3000);
}
discoverAll();
setTimeout(pollActive, 600);
setTimeout(pollBackground, 2000);
setTimeout(pollPhasesAndDocs, 1200);
</script>
</body>
</html>"""
# ──────────────────────────────────────────────────────────────────────────────
# Backend helpers
# ──────────────────────────────────────────────────────────────────────────────
def discover_agents(project: str) -> list[str]:
"""Return sorted list of agent/run IDs for a project."""
d = LOG_DIRS.get(project)
if not d or not d.exists():
return []
if project == "alpha_evolve_gpt":
# Each subdirectory that contains evolve_log.jsonl is a run
runs = []
for sub in sorted(d.iterdir()):
if sub.is_dir() and (sub / "evolve_log.jsonl").exists():
runs.append(sub.name)
return runs
# base_client and alpha_evolve: same env-log format
agents = []
for sub in sorted(d.iterdir()):
if not sub.is_dir() or sub.name.startswith("."):
continue
if list(sub.glob("*.jsonl")):
agents.append(sub.name)
else:
# Nested: run_dir/agent_dir (alpha_evolve pattern)
for ss in sorted(sub.iterdir()):
if ss.is_dir() and list(ss.glob("*.jsonl")):
agents.append(f"{sub.name}/{ss.name}")
# Fallback: .jsonl directly in root (unlikely but safe)
if not agents and list(d.glob("*.jsonl")):
agents.append("default")
return agents
def resolve_env_log(project: str, agent_id: str) -> str | None:
"""Find the main env JSONL log for an env-type agent."""
d = LOG_DIRS[project]
if agent_id == "default":
agent_dir = d
else:
agent_dir = d / agent_id
pointer = agent_dir / "LATEST.txt"
if pointer.exists():
fname = Path(pointer.read_text().strip()).name
candidate = agent_dir / fname
if candidate.exists():
return str(candidate)
skip = {"agent_commands.jsonl", "gpt_usage.jsonl"}
logs = sorted(agent_dir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True)
for log in logs:
if log.name not in skip:
return str(log)
return None
def resolve_cmd_log(project: str, agent_id: str) -> str | None:
d = LOG_DIRS[project]
agent_dir = d if agent_id == "default" else d / agent_id
p = agent_dir / "agent_commands.jsonl"
return str(p) if p.exists() else None
def resolve_evolve_log(run_id: str) -> str | None:
p = LOG_DIRS["alpha_evolve_gpt"] / run_id / "evolve_log.jsonl"
return str(p) if p.exists() else None
_live_containers_cache: set[str] = set()
_live_containers_ts: float = 0.0
def get_live_containers() -> set[str]:
"""Return set of currently running Docker container names. Cached for 3s."""
global _live_containers_cache, _live_containers_ts
now = time.time()
if now - _live_containers_ts < 3.0:
return _live_containers_cache
try:
out = subprocess.check_output(
["docker", "ps", "--format", "{{.Names}}"],
stderr=subprocess.DEVNULL, timeout=5
).decode().strip()
_live_containers_cache = set(out.splitlines()) if out else set()
except Exception:
pass # keep stale cache on error
_live_containers_ts = now
return _live_containers_cache
def is_container_running(agent_id: str) -> bool:
"""Check if the Docker container for this agent is currently running."""
container = f"agent-{agent_id.replace('/', '-')}"
return container in get_live_containers()
def read_log_lines(fpath: str | None, offset: int, agent_id: str = "") -> tuple[list[str], bool]:
if not fpath or not os.path.exists(fpath):
return [], False
with open(fpath, "r", encoding="utf-8", errors="replace") as f:
all_lines = f.readlines()
lines = all_lines[offset:]
# Done if log says so, OR if the Docker container is no longer running
log_done = bool(all_lines) and ('"final_result"' in all_lines[-1] or '"done"' in all_lines[-1])
container_gone = bool(agent_id) and not is_container_running(agent_id)
done = log_done or container_gone
return [l.strip() for l in lines if l.strip()], done
def read_phases(project: str, agent_id: str) -> list:
"""Return parsed phase events for an agent (phases.jsonl)."""
d = LOG_DIRS[project]
agent_dir = d if agent_id == "default" else d / agent_id
p = agent_dir / "phases.jsonl"
if not p.exists():
return []
entries = []
try:
with open(p, encoding="utf-8", errors="replace") as f:
for line in f:
line = line.strip()
if line:
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
pass
except OSError:
pass
return entries
def read_doc(project: str, agent_id: str, filename: str) -> str:
"""Read a synced doc (PLAN.md, memoryI/II) from the agent's log dir."""
d = LOG_DIRS[project]
agent_dir = d if agent_id == "default" else d / agent_id
p = agent_dir / filename
if not p.exists():
return ""
try:
return p.read_text(encoding="utf-8", errors="replace")
except OSError:
return ""
def parse_claude_usage(project: str, agent_id: str) -> dict:
d = LOG_DIRS[project]
agent_dir = d if agent_id == "default" else d / agent_id
claude_dir = agent_dir / "claude_logs"
result = {"input_tokens": 0, "output_tokens": 0}
if not claude_dir.is_dir():
return result
for fpath in claude_dir.glob("*.jsonl"):
try:
with open(fpath, encoding="utf-8", errors="replace") as f:
for line in f:
try:
entry = json.loads(line)
if entry.get("type") == "assistant":
usage = entry.get("message", {}).get("usage") or {}
result["input_tokens"] += (usage.get("input_tokens") or 0)
result["input_tokens"] += (usage.get("cache_creation_input_tokens") or 0)
result["input_tokens"] += (usage.get("cache_read_input_tokens") or 0)
result["output_tokens"] += (usage.get("output_tokens") or 0)
except (json.JSONDecodeError, KeyError):
pass
except OSError:
pass
return result
# ──────────────────────────────────────────────────────────────────────────────
# HTTP handler
# ──────────────────────────────────────────────────────────────────────────────
class DashboardHandler(SimpleHTTPRequestHandler):
def do_GET(self):
parsed = urlparse(self.path)
qs = parse_qs(parsed.query)
if parsed.path in ("/", "/index.html"):
self._html(DASHBOARD_HTML)
elif parsed.path == "/api/state":
state = {proj: discover_agents(proj) for proj in LOG_DIRS}
self._json(state)
elif parsed.path == "/api/log":
project = qs.get("project", [None])[0]
agent = qs.get("agent", [None])[0]
offset = int(qs.get("offset", [0])[0])
if project and agent:
fpath = resolve_env_log(project, agent)
lines, done = read_log_lines(fpath, offset, agent_id=agent)
self._json({"lines": lines, "done": done})
else:
self._json({"lines": [], "done": False})
elif parsed.path == "/api/cmd-log":
project = qs.get("project", [None])[0]
agent = qs.get("agent", [None])[0]
offset = int(qs.get("offset", [0])[0])
if project and agent:
fpath = resolve_cmd_log(project, agent)
lines, _ = read_log_lines(fpath, offset)
self._json({"lines": lines})
else:
self._json({"lines": []})
elif parsed.path == "/api/evolve-log":
run = qs.get("run", [None])[0]
offset = int(qs.get("offset", [0])[0])
if run:
fpath = resolve_evolve_log(run)
lines, done = read_log_lines(fpath, offset, agent_id=run)
self._json({"lines": lines, "done": done})
else:
self._json({"lines": [], "done": False})
elif parsed.path == "/api/claude-usage":
project = qs.get("project", [None])[0]
agent = qs.get("agent", [None])[0]
if project and agent:
self._json(parse_claude_usage(project, agent))
else:
self._json({"input_tokens": 0, "output_tokens": 0})
elif parsed.path == "/api/phases":
project = qs.get("project", [None])[0]
agent = qs.get("agent", [None])[0]
if project and agent:
self._json(read_phases(project, agent))
else:
self._json([])
elif parsed.path == "/api/plan":
project = qs.get("project", [None])[0]
agent = qs.get("agent", [None])[0]
if project and agent:
self._json({"content": read_doc(project, agent, "PLAN.md")})
else:
self._json({"content": ""})
elif parsed.path == "/api/memories":
project = qs.get("project", [None])[0]
agent = qs.get("agent", [None])[0]
if project and agent:
self._json({
"memoryI": read_doc(project, agent, "memoryI_small_tests.md"),
"memoryII": read_doc(project, agent, "memoryII_rules_tested.md"),
})
else:
self._json({"memoryI": "", "memoryII": ""})
else:
self.send_error(404)
def _html(self, html: str):
b = html.encode()
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.send_header("Content-Length", len(b))
self.end_headers()
self.wfile.write(b)
def _json(self, obj):
b = json.dumps(obj).encode()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Content-Length", len(b))
self.end_headers()
self.wfile.write(b)
def log_message(self, *_):
pass
# ──────────────────────────────────────────────────────────────────────────────
# Entry point
# ──────────────────────────────────────────────────────────────────────────────
def main():
global PORT
import argparse
parser = argparse.ArgumentParser(description="Unified discovery agent dashboard")
parser.add_argument("--port", type=int, default=PORT)
parser.add_argument("--no-browser", action="store_true")
args = parser.parse_args()
PORT = args.port
print(f"Dashboard: http://127.0.0.1:{PORT}")
print(f"Watching:")
for name, path in LOG_DIRS.items():
print(f" {name:20s} {path}")
server = HTTPServer(("127.0.0.1", PORT), DashboardHandler)
if not args.no_browser:
webbrowser.open(f"http://127.0.0.1:{PORT}")
try:
server.serve_forever()
except KeyboardInterrupt:
print("\nDashboard stopped.")
server.server_close()
if __name__ == "__main__":
main()