Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Unified live dashboard β monitors all 3 discovery agent projects. | |
| Watches: | |
| base_client_v1/run_logs/ β Claude baseline agents | |
| alpha_evolve_gpt/results/ β GPT standalone evolution runs | |
| alpha_evolve/results/ β Docker-based evolutionary Claude agents | |
| Usage: | |
| python dashboard.py | |
| python dashboard.py --port 8789 | |
| """ | |
| import json | |
| import os | |
| import subprocess | |
| import time | |
| from pathlib import Path | |
| from http.server import HTTPServer, SimpleHTTPRequestHandler | |
| from urllib.parse import urlparse, parse_qs | |
| import webbrowser | |
| PORT = 8789 | |
| ROOT = Path(__file__).parent | |
| # LOGS_BASE_DIR env var overrides all log paths (used on HF Space deployment). | |
| # When set, LOG_DIRS keys map to <LOGS_BASE_DIR>/<key>/ | |
| _LOGS_BASE = os.environ.get("LOGS_BASE_DIR", "") | |
| if _LOGS_BASE: | |
| _B = Path(_LOGS_BASE) | |
| LOG_DIRS = { | |
| "base_client_v1": _B / "base_client_v1", | |
| "base_client_v2": _B / "base_client_v2", | |
| "leakage_v1": _B / "leakage_v1", | |
| "leakage_v2": _B / "leakage_v2", | |
| "alpha_evolve_gpt": _B / "alpha_evolve_gpt", | |
| "alpha_evolve": _B / "alpha_evolve", | |
| } | |
| else: | |
| LOG_DIRS = { | |
| "base_client_v1": ROOT / "base_client_v1" / "run_logs", | |
| "base_client_v2": ROOT / "base_client_v2" / "run_logs", | |
| # Logs produced before hint-leakage was fixed β shown with LEAKAGE warning | |
| "leakage_v1": ROOT / "logs_potential_leakage_in_prompt_hints" / "base_client_v1", | |
| "leakage_v2": ROOT / "logs_potential_leakage_in_prompt_hints" / "base_client_v2", | |
| "alpha_evolve_gpt": ROOT / "alpha_evolve_gpt" / "results", | |
| "alpha_evolve": ROOT / "alpha_evolve" / "results", | |
| } | |
| for d in LOG_DIRS.values(): | |
| d.mkdir(parents=True, exist_ok=True) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HTML / JS frontend | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| DASHBOARD_HTML = r"""<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>Discovery Agent Monitor</title> | |
| <style> | |
| * { margin:0; padding:0; box-sizing:border-box; } | |
| body { background:#0a0a0f; color:#e0e0e0; font-family:'Consolas','Monaco',monospace; padding:16px; } | |
| h1 { color:#7ecfff; font-size:20px; margin-bottom:12px; border-bottom:1px solid #222; padding-bottom:8px; } | |
| /* ββ Project tabs ββ */ | |
| .proj-bar { display:flex; gap:8px; margin-bottom:12px; } | |
| .proj-btn { | |
| padding:8px 22px; border-radius:6px; border:2px solid transparent; | |
| font-family:inherit; font-size:13px; cursor:pointer; background:#12121a; color:#888; | |
| transition:all .15s; | |
| } | |
| .proj-btn:hover { color:#ccc; } | |
| .proj-btn.active { font-weight:bold; } | |
| .proj-btn[data-project="base_client"] { border-color:#4a9eff; } | |
| .proj-btn[data-project="alpha_evolve"] { border-color:#6fdb6f; } | |
| .proj-btn[data-project="base_client"].active { background:#0d1a2e; color:#4a9eff; } | |
| .proj-btn[data-project="alpha_evolve"].active { background:#0d1f0d; color:#6fdb6f; } | |
| /* LEAKAGE warning badge β logs from before hint-removal fix */ | |
| .mc-leakage { font-size:9px; padding:2px 6px; border-radius:3px; font-weight:bold; | |
| display:inline-block; margin-top:3px; letter-spacing:.8px; | |
| background:#2e0a0a; color:#ff5555; border:1px solid #ff555566; } | |
| .model-card.leakage { border-color:#ff555533; } | |
| .model-card.leakage:hover { border-color:#ff5555; } | |
| .proj-count { margin-left:6px; font-size:11px; opacity:.7; } | |
| /* ββ Cloud/local filter bar ββ */ | |
| .filter-bar { display:flex; gap:6px; margin-bottom:8px; align-items:center; } | |
| .filter-lbl { color:#555; font-size:10px; text-transform:uppercase; letter-spacing:1px; margin-right:2px; } | |
| .filter-btn { | |
| padding:3px 14px; border-radius:12px; border:1px solid #2a2a3a; | |
| font-family:inherit; font-size:11px; cursor:pointer; background:#0d0d15; color:#666; | |
| transition:all .15s; | |
| } | |
| .filter-btn:hover { color:#aaa; border-color:#444; } | |
| .filter-btn.active { background:#1a1a28; color:#7ecfff; border-color:#4a9eff; font-weight:bold; } | |
| .filter-btn[data-filter="cloud"].active { color:#6fdb6f; border-color:#6fdb6f; background:#0d1f0d; } | |
| .filter-btn[data-filter="local"].active { color:#f0ad4e; border-color:#f0ad4e; background:#1f170a; } | |
| /* ββ Problem tabs (level 2) ββ */ | |
| .problem-bar { display:flex; gap:6px; margin-bottom:10px; flex-wrap:wrap; align-items:center; } | |
| .problem-lbl { color:#555; font-size:10px; text-transform:uppercase; letter-spacing:1px; margin-right:4px; } | |
| .problem-btn { | |
| padding:5px 16px; border-radius:6px; border:1px solid #2a2a3a; | |
| font-family:inherit; font-size:12px; font-weight:bold; cursor:pointer; background:#0d0d15; color:#666; | |
| transition:all .15s; | |
| } | |
| .problem-btn:hover { color:#aaa; border-color:#555; } | |
| .problem-btn.active { background:#1a1a28; color:#7ecfff; border-color:#4a9eff; } | |
| .problem-btn .pb-count { font-size:10px; color:#555; margin-left:4px; font-weight:normal; } | |
| /* ββ Model card grid (level 3) ββ */ | |
| .model-grid { display:flex; gap:8px; flex-wrap:wrap; margin-bottom:16px; min-height:80px; } | |
| .model-card { | |
| background:#12121a; border:1px solid #2a2a3a; border-radius:8px; | |
| padding:10px 14px; cursor:pointer; min-width:130px; max-width:180px; | |
| transition:all .15s; position:relative; | |
| } | |
| .model-card:hover { border-color:#555; } | |
| .model-card.active { border-color:#7ecfff; background:#141424; box-shadow:0 0 12px rgba(126,207,255,.1); } | |
| .mc-rank { position:absolute; top:6px; right:8px; font-size:9px; color:#444; font-weight:bold; } | |
| .mc-rank.gold { color:#f0ad4e; } | |
| .mc-name { font-size:11px; color:#aaa; font-weight:bold; margin-bottom:6px; | |
| white-space:nowrap; overflow:hidden; text-overflow:ellipsis; max-width:150px; } | |
| .mc-acc { font-size:22px; font-weight:bold; color:#6fdb6f; line-height:1; } | |
| .mc-acc.warn { color:#f0ad4e; } | |
| .mc-acc.fail { color:#e94560; } | |
| .mc-acc.none { color:#444; font-size:16px; } | |
| .mc-sub { font-size:9px; color:#555; margin-top:2px; } | |
| .mc-status { font-size:10px; color:#666; margin-top:6px; display:flex; align-items:center; gap:4px; flex-wrap:wrap; } | |
| .mc-dot { display:inline-block; width:6px; height:6px; border-radius:50%; background:#888; flex-shrink:0; } | |
| .mc-dot.live { background:#6fdb6f; animation:pulse 1.5s infinite; } | |
| .mc-flag { font-size:9px; color:#f0ad4e; background:rgba(240,173,78,.12); | |
| border:1px solid rgba(240,173,78,.3); border-radius:3px; padding:0 4px; } | |
| .no-models { color:#555; font-size:13px; padding:24px; } | |
| /* ββ Legacy tab bar (hidden, kept for internal bookkeeping) ββ */ | |
| .tab-bar { display:none; } | |
| .tab-btn { display:none; } | |
| .tab-dot { display:inline-block; width:6px; height:6px; border-radius:50%; margin-right:5px; background:#888; } | |
| .tab-dot.live { background:#6fdb6f; animation:pulse 1.5s infinite; } | |
| .tab-score { margin-left:6px; font-size:10px; color:#6fdb6f; } | |
| .no-agents { color:#555; font-size:14px; padding:40px; text-align:center; } | |
| /* ββ Stat boxes ββ */ | |
| .top-bar { display:flex; gap:12px; margin-bottom:16px; flex-wrap:wrap; } | |
| .stat-box { background:#12121a; border:1px solid #2a2a3a; border-radius:8px; padding:10px 16px; min-width:100px; } | |
| .stat-label { color:#888; font-size:10px; text-transform:uppercase; letter-spacing:1px; } | |
| .stat-value { color:#7ecfff; font-size:24px; font-weight:bold; margin-top:2px; } | |
| .stat-value.score { color:#6fdb6f; } | |
| .stat-value.warn { color:#f0ad4e; } | |
| /* ββ Grid / heatmap ββ */ | |
| .main-area { display:flex; gap:14px; flex-wrap:wrap; } | |
| .panel { background:#12121a; border:1px solid #2a2a3a; border-radius:8px; padding:14px; } | |
| .panel h2 { color:#aaa; font-size:12px; margin-bottom:10px; text-transform:uppercase; letter-spacing:1px; } | |
| .grid-container { display:inline-block; line-height:0; } | |
| .grid-row { display:flex; } | |
| .grid-cell { width:13px; height:13px; border:0.5px solid rgba(255,255,255,0.05); } | |
| /* ββ Timelines ββ */ | |
| .timeline-panel { flex:1 1 260px; max-height:560px; overflow-y:auto; } | |
| .tl-entry { | |
| padding:5px 7px; margin-bottom:3px; border-radius:4px; font-size:11px; | |
| border-left:3px solid #333; word-break:break-all; | |
| } | |
| .tl-entry.step { border-left-color:#7ecfff; background:rgba(126,207,255,.04); } | |
| .tl-entry.submit { border-left-color:#6fdb6f; background:rgba(111,219,111,.08); } | |
| .tl-entry.finalresult { border-left-color:#ffed00; background:rgba(255,237,0,.06); } | |
| .tl-entry.randomstate { border-left-color:#5bc0de; background:rgba(91,192,222,.05); } | |
| .tl-entry.sessionstart{ border-left-color:#888; background:rgba(100,100,100,.04); } | |
| .tl-entry .tl-time { color:#555; margin-right:6px; } | |
| .tl-entry .tl-act { color:#7ecfff; font-weight:bold; margin-right:5px; } | |
| .tl-entry.submit .tl-act { color:#6fdb6f; } | |
| .tl-entry.finalresult .tl-act{ color:#ffed00; } | |
| .cmd-entry { | |
| padding:4px 7px; margin-bottom:2px; border-radius:4px; font-size:11px; | |
| border-left:3px solid #444; word-break:break-all; | |
| } | |
| .cmd-entry.Bash { border-left-color:#e94560; background:rgba(233,69,96,.06); } | |
| .cmd-entry.Write { border-left-color:#f0ad4e; background:rgba(240,173,78,.06); } | |
| .cmd-entry.Read { border-left-color:#666; background:rgba(100,100,100,.04); } | |
| .cmd-entry.Edit { border-left-color:#4ecdc4; background:rgba(78,205,196,.06); } | |
| .cmd-entry .cmd-tool { font-weight:bold; margin-right:5px; } | |
| .cmd-entry.Bash .cmd-tool { color:#e94560; } | |
| .cmd-entry.Write .cmd-tool { color:#f0ad4e; } | |
| .cmd-entry.Edit .cmd-tool { color:#4ecdc4; } | |
| .cmd-entry .cmd-time { color:#555; margin-right:4px; font-size:10px; } | |
| .cmd-entry .cmd-detail { color:#999; } | |
| /* ββ Final result banner ββ */ | |
| .final-banner { | |
| display:none; border-radius:10px; padding:18px 24px; margin-bottom:14px; | |
| animation:fadeIn .5s ease-in; | |
| } | |
| .final-banner.visible { display:block; } | |
| .final-banner.ok { background:linear-gradient(135deg,#0a1a0a,#12221a); border:2px solid #6fdb6f; } | |
| .final-banner.fail { background:linear-gradient(135deg,#1a0a0a,#221218); border:2px solid #e94560; } | |
| .final-banner.warn { background:linear-gradient(135deg,#1a160a,#221e0a); border:2px solid #f0ad4e; } | |
| .final-banner h2 { font-size:15px; margin-bottom:10px; text-transform:uppercase; letter-spacing:2px; } | |
| .final-banner.ok h2 { color:#6fdb6f; } | |
| .final-banner.fail h2 { color:#e94560; } | |
| .final-banner.warn h2 { color:#f0ad4e; } | |
| .incomplete-flag { display:inline-block; padding:2px 8px; border-radius:4px; font-size:10px; | |
| background:rgba(240,173,78,.15); border:1px solid #f0ad4e; color:#f0ad4e; | |
| margin-left:10px; vertical-align:middle; text-transform:none; letter-spacing:0; } | |
| .final-metrics { display:flex; gap:28px; flex-wrap:wrap; } | |
| .final-metric .fm-label { color:#888; font-size:10px; text-transform:uppercase; letter-spacing:1px; } | |
| .final-metric .fm-value { font-size:28px; font-weight:bold; margin-top:2px; } | |
| .final-metric .fm-sub { color:#666; font-size:10px; margin-top:2px; } | |
| /* ββ Evolve-specific view (alpha_evolve_gpt) ββ */ | |
| #evolve-view { display:none; } | |
| .gen-table { width:100%; border-collapse:collapse; font-size:12px; } | |
| .gen-table th { color:#888; text-align:left; padding:5px 10px; border-bottom:1px solid #222; font-size:10px; text-transform:uppercase; letter-spacing:1px; } | |
| .gen-table td { padding:5px 10px; border-bottom:1px solid #1a1a22; } | |
| .gen-table tr:last-child td { border-bottom:none; } | |
| .acc-bar { height:8px; background:#6fdb6f; border-radius:4px; display:inline-block; } | |
| .variant-row { border-left:3px solid #333; padding:4px 8px; margin-bottom:2px; border-radius:3px; font-size:11px; word-break:break-all; } | |
| .variant-row.best { border-left-color:#6fdb6f; background:rgba(111,219,111,.06); } | |
| .variant-row.ok { border-left-color:#4a9eff; background:rgba(74,158,255,.04); } | |
| .variant-row.error { border-left-color:#e94560; background:rgba(233,69,96,.05); } | |
| .variant-score { color:#6fdb6f; font-weight:bold; margin-right:6px; } | |
| .variant-strategy { color:#aaa; } | |
| .gen-header { color:#f0ad4e; font-weight:bold; margin:8px 0 4px; font-size:12px; } | |
| .code-view { background:#0d0d15; border:1px solid #2a2a3a; border-radius:6px; padding:12px; font-size:11px; color:#b0d0b0; max-height:300px; overflow-y:auto; white-space:pre-wrap; margin-top:8px; } | |
| /* ββ Phase tracker ββ */ | |
| .phase-row { display:flex; gap:10px; margin-bottom:14px; flex-wrap:wrap; align-items:center; } | |
| .phase-chip { | |
| padding:5px 14px; border-radius:20px; font-size:11px; font-weight:bold; | |
| border:1px solid #333; background:#12121a; color:#555; letter-spacing:.5px; | |
| transition:all .3s; | |
| } | |
| .phase-chip.done { border-color:#4a9eff; color:#4a9eff; background:rgba(74,158,255,.08); } | |
| .phase-chip.active { border-color:#f0ad4e; color:#f0ad4e; background:rgba(240,173,78,.12); animation:pulse 1.5s infinite; } | |
| .phase-chip.forced { border-color:#e94560; color:#e94560; background:rgba(233,69,96,.08); } | |
| .phase-chip.solved { border-color:#6fdb6f; color:#6fdb6f; background:rgba(111,219,111,.14); } | |
| .phase-score { font-size:11px; color:#6fdb6f; margin-left:6px; } | |
| .phase-history { max-height:260px; overflow-y:auto; } | |
| .ph-entry { | |
| padding:5px 10px; margin-bottom:3px; border-radius:4px; font-size:11px; | |
| border-left:3px solid #333; display:flex; gap:8px; align-items:baseline; | |
| } | |
| .ph-entry.started { border-left-color:#4a9eff; background:rgba(74,158,255,.04); } | |
| .ph-entry.completed { border-left-color:#6fdb6f; background:rgba(111,219,111,.04); } | |
| .ph-entry.forced { border-left-color:#e94560; background:rgba(233,69,96,.06); } | |
| .ph-entry.skipped { border-left-color:#555; background:rgba(100,100,100,.03); } | |
| .ph-time { color:#555; font-size:10px; min-width:38px; } | |
| .ph-cycle { color:#888; min-width:30px; } | |
| .ph-phase { font-weight:bold; color:#7ecfff; min-width:90px; } | |
| .ph-status { color:#aaa; min-width:60px; } | |
| .ph-sum { color:#666; flex:1; } | |
| /* ββ Text doc panels ββ */ | |
| .doc-panel { flex:1 1 340px; max-height:320px; overflow-y:auto; } | |
| .doc-content { | |
| font-size:10.5px; line-height:1.5; color:#bbb; white-space:pre-wrap; word-break:break-word; | |
| background:#0d0d15; border:1px solid #1a1a28; border-radius:5px; padding:10px; | |
| max-height:260px; overflow-y:auto; | |
| } | |
| .doc-empty { color:#444; font-size:12px; padding:20px; text-align:center; } | |
| .mem-tabs { display:flex; gap:6px; margin-bottom:8px; } | |
| .mem-tab { padding:4px 12px; border-radius:4px; font-size:11px; cursor:pointer; | |
| background:#0d0d15; border:1px solid #222; color:#666; } | |
| .mem-tab.active { border-color:#4a9eff; color:#4a9eff; background:rgba(74,158,255,.07); } | |
| /* ββ Status / misc ββ */ | |
| .status-dot { display:inline-block; width:7px; height:7px; border-radius:50%; margin-right:5px; } | |
| .status-dot.live { background:#6fdb6f; animation:pulse 1.5s infinite; } | |
| .status-dot.done { background:#888; } | |
| #status { font-size:12px; color:#666; margin-bottom:10px; } | |
| @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:.3} } | |
| @keyframes fadeIn { from{opacity:0;transform:translateY(-8px)} to{opacity:1;transform:translateY(0)} } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>Discovery Agent Monitor</h1> | |
| <div id="status"><span class="status-dot live" id="dot"></span>Connectingβ¦</div> | |
| <!-- Project selector (level 1) --> | |
| <div class="proj-bar"> | |
| <button class="proj-btn active" data-project="base_client" | |
| onclick="switchProject('base_client')">base_client<span class="proj-count" id="cnt-base_client"></span></button> | |
| <button class="proj-btn" data-project="alpha_evolve" | |
| onclick="switchProject('alpha_evolve')">alpha_evolve<span class="proj-count" id="cnt-alpha_evolve"></span></button> | |
| </div> | |
| <!-- Cloud / local filter bar --> | |
| <div class="filter-bar" id="filter-bar"> | |
| <span class="filter-lbl">Show:</span> | |
| <button class="filter-btn active" data-filter="all" onclick="setFilter('all')">all</button> | |
| <button class="filter-btn" data-filter="cloud" onclick="setFilter('cloud')">β cloud</button> | |
| <button class="filter-btn" data-filter="local" onclick="setFilter('local')">β‘ local</button> | |
| </div> | |
| <!-- Problem tabs (level 2) --> | |
| <div class="problem-bar" id="problem-bar"><span class="problem-lbl">Problem:</span></div> | |
| <!-- Model card grid (level 3) β sorted by accuracy, click to select --> | |
| <div class="model-grid" id="model-grid"></div> | |
| <!-- Hidden legacy tab-bar (bookkeeping only) --> | |
| <div class="tab-bar" id="tab-bar"></div> | |
| <!-- ββ ENV view (base_client / alpha_evolve) ββ --> | |
| <div id="env-view"> | |
| <div class="final-banner" id="final-banner"><h2></h2><div class="final-metrics" id="final-metrics"></div></div> | |
| <div class="top-bar"> | |
| <div class="stat-box"><div class="stat-label">Problem</div><div class="stat-value" id="problem-id">--</div></div> | |
| <div class="stat-box"><div class="stat-label">Model</div><div class="stat-value" id="model-id" style="font-size:18px">--</div></div> | |
| <div class="stat-box"><div class="stat-label">Queries</div><div class="stat-value" id="query-count">0</div></div> | |
| <div class="stat-box"><div class="stat-label">Elapsed</div><div class="stat-value" id="elapsed">0s</div></div> | |
| <div class="stat-box"><div class="stat-label">Cmds</div><div class="stat-value" id="cmd-count">0</div></div> | |
| <div class="stat-box"><div class="stat-label">Last Score</div><div class="stat-value score" id="last-score">--</div></div> | |
| <div class="stat-box"><div class="stat-label">Claude Tokens</div><div class="stat-value" id="claude-tokens" style="font-size:18px">--</div><div style="font-size:9px;color:#666;margin-top:2px" id="claude-tokens-detail"></div></div> | |
| </div> | |
| <!-- Phase status chips --> | |
| <div class="phase-row" id="phase-chips"> | |
| <span style="color:#666;font-size:11px;margin-right:4px">PHASES:</span> | |
| <span class="phase-chip" id="chip-BOOT">BOOT</span> | |
| <span style="color:#333">→</span> | |
| <span class="phase-chip" id="chip-OBSERVE">OBSERVE</span> | |
| <span style="color:#333">→</span> | |
| <span class="phase-chip" id="chip-HYPOTHESIZE">HYPOTHESIZE</span> | |
| <span style="color:#333">→</span> | |
| <span class="phase-chip" id="chip-BATCH_TEST">BATCH TEST</span> | |
| <span style="color:#333">→</span> | |
| <span class="phase-chip" id="chip-ANALYZE">ANALYZE</span> | |
| <span style="color:#333">→</span> | |
| <span class="phase-chip" id="chip-SOLVED">SOLVED?</span> | |
| <span id="cycle-badge" style="margin-left:12px;color:#888;font-size:11px"></span> | |
| </div> | |
| <!-- Phase history + Plan + Memories row --> | |
| <div class="main-area" style="margin-bottom:14px"> | |
| <div class="panel phase-history" style="flex:1 1 240px"> | |
| <h2>Phase History</h2> | |
| <div id="phase-history"></div> | |
| </div> | |
| <div class="panel doc-panel" style="flex:1 1 300px"> | |
| <h2>Current Plan</h2> | |
| <div id="plan-content" class="doc-content"><span class="doc-empty">No PLAN.md yet</span></div> | |
| </div> | |
| <div class="panel doc-panel" style="flex:1 1 300px"> | |
| <h2>Memories</h2> | |
| <div class="mem-tabs"> | |
| <button class="mem-tab active" onclick="switchMem('I')">memoryI (observations)</button> | |
| <button class="mem-tab" onclick="switchMem('II')">memoryII (rules tested)</button> | |
| </div> | |
| <div id="mem-I-content" class="doc-content"><span class="doc-empty">No memoryI yet</span></div> | |
| <div id="mem-II-content" class="doc-content" style="display:none"><span class="doc-empty">No memoryII yet</span></div> | |
| </div> | |
| </div> | |
| <div class="main-area"> | |
| <div class="panel"><h2>Grid State</h2><div id="grid-container" class="grid-container"></div></div> | |
| <div class="panel"><h2>Change Heatmap</h2><div id="heatmap-container" class="grid-container"></div></div> | |
| <div class="panel timeline-panel"><h2>Env Actions</h2><div id="env-timeline"></div></div> | |
| <div class="panel timeline-panel"><h2>Agent Commands</h2><div id="cmd-timeline"></div></div> | |
| </div> | |
| </div> | |
| <!-- ββ EVOLVE view (alpha_evolve_gpt) ββ --> | |
| <div id="evolve-view"> | |
| <div class="final-banner" id="evolve-final-banner"><h2></h2><div class="final-metrics" id="evolve-final-metrics"></div></div> | |
| <div class="top-bar"> | |
| <div class="stat-box"><div class="stat-label">Problem</div><div class="stat-value" id="ev-problem">--</div></div> | |
| <div class="stat-box"><div class="stat-label">Generation</div><div class="stat-value" id="ev-gen">0</div></div> | |
| <div class="stat-box"><div class="stat-label">Best Acc</div><div class="stat-value score" id="ev-best-acc">--</div></div> | |
| <div class="stat-box"><div class="stat-label">Best Total</div><div class="stat-value score" id="ev-best-total">--</div></div> | |
| <div class="stat-box"><div class="stat-label">Variants</div><div class="stat-value" id="ev-variants">0</div></div> | |
| <div class="stat-box"><div class="stat-label">Elapsed</div><div class="stat-value" id="ev-elapsed">0s</div></div> | |
| </div> | |
| <div class="main-area"> | |
| <div class="panel" style="flex:1 1 320px; max-height:600px; overflow-y:auto;"> | |
| <h2>Generation Log</h2> | |
| <div id="evolve-timeline"></div> | |
| </div> | |
| <div class="panel" style="flex:1 1 360px;"> | |
| <h2>Best Code</h2> | |
| <div class="code-view" id="best-code">No code yetβ¦</div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| // ββ Color helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const COLOR_MAPS = { | |
| 2: ['#1a1a2e','#6fdb6f'], | |
| 3: ['#1a1a2e','#e94560','#5bc0de'], | |
| 5: ['#1a1a2e','#16213e','#0f3460','#e94560','#ffed00'], | |
| }; | |
| function getColor(v, maxV) { | |
| const n = (maxV||1)+1; | |
| const map = COLOR_MAPS[n] || genColors(n); | |
| return map[Math.min(v, map.length-1)] || '#333'; | |
| } | |
| function genColors(n) { | |
| return Array.from({length:n}, (_,i) => `hsl(${(i/n*300)|0},60%,${20+(i/n)*50}%)`); | |
| } | |
| function heatColor(v, maxH) { | |
| if (!v) return '#111118'; | |
| const t = Math.min(v/Math.max(maxH,1),1); | |
| return `rgb(${Math.round(255*t)},${Math.round(80*(1-t))},${Math.round(255*(1-t)*.5)})`; | |
| } | |
| function renderGrid(el, grid, colorFn) { | |
| el.innerHTML = ''; | |
| for (const row of grid) { | |
| const rd = document.createElement('div'); rd.className='grid-row'; | |
| for (const v of row) { | |
| const cd = document.createElement('div'); cd.className='grid-cell'; | |
| cd.style.background = colorFn(v); cd.title = v; rd.appendChild(cd); | |
| } | |
| el.appendChild(rd); | |
| } | |
| } | |
| // ββ Cloud vs local classification ββββββββββββββββββββββββββββββββββββββββββ | |
| const CLOUD_PATTERNS = ['sonnet','haiku','opus','minimax','MiniMax','claude','gpt-4','gpt-3']; | |
| function isLocalModel(id) { | |
| const lower = id.toLowerCase(); | |
| for (const p of CLOUD_PATTERNS) if (lower.includes(p.toLowerCase())) return false; | |
| return true; | |
| } | |
| // ββ Parse agent ID into {problem, model} ββββββββββββββββββββββββββββββββββ | |
| // IDs prefixed: "v1::", "v2::", "lk1::", "lk2::" (lk = leakage/contaminated) | |
| function parseAgentId(id) { | |
| let raw = id; | |
| const hm = id.match(/^(v[12]|lk[12])::(.*)/); | |
| if (hm) { raw = hm[2]; } | |
| const m = raw.match(/^(G\d+)_(.+)$/); | |
| if (m) return { problem: m[1], model: m[2] }; | |
| return { problem: 'OTHER', model: raw }; | |
| } | |
| // ββ Normalize accuracy (some logs store as 0-100, we want 0-1) ββββββββββββ | |
| function normAcc(v) { | |
| if (!v) return 0; | |
| return v > 1.5 ? v / 100 : v; // >1.5 means stored as percentage | |
| } | |
| // ββ State ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const projects = { | |
| base_client: { type:'env', agents:{}, active:null, knownIds:new Set(), filter:'all', activeProblem:null }, | |
| alpha_evolve: { type:'mixed', runs:{}, agents:{}, active:null, knownIds:new Set(), filter:'all', activeProblem:null }, | |
| }; | |
| let activeProject = 'base_client'; | |
| // Map prefixed merged ID β source API project | |
| function agentSourceProject(id) { | |
| const hm = id.match(/^(v[12]|lk[12])::/); | |
| if (!hm) return activeProject; | |
| const pfx = hm[1]; | |
| if (pfx === 'v1') return 'base_client_v1'; | |
| if (pfx === 'v2') return 'base_client_v2'; | |
| if (pfx === 'lk1') return 'leakage_v1'; | |
| if (pfx === 'lk2') return 'leakage_v2'; | |
| return activeProject; | |
| } | |
| // ββ Get best accuracy for a run/agent (0-1 range) βββββββββββββββββββββββββ | |
| function getBestAcc(item) { | |
| if (!item) return 0; | |
| if (item.finalResult) { | |
| const r = item.finalResult.result || {}; | |
| // Use max of final result accuracy and best seen from submits/phases | |
| return Math.max(normAcc(r.functional_accuracy || 0), item.bestAccuracy || 0); | |
| } | |
| return item.bestAccuracy || 0; | |
| } | |
| // ββ Problem helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function getProblems() { | |
| const p = projects[activeProject]; | |
| const store = p.type === 'evolve' ? p.runs : p.agents; | |
| const probs = new Set(); | |
| for (const id of Object.keys(store)) probs.add(parseAgentId(id).problem); | |
| return [...probs].sort(); | |
| } | |
| function getAgentsForProblem(problem) { | |
| const p = projects[activeProject]; | |
| const store = p.type === 'evolve' ? p.runs : p.agents; | |
| return Object.keys(store).filter(id => parseAgentId(id).problem === problem); | |
| } | |
| // ββ Rebuild problem tab bar ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function rebuildProblemBar() { | |
| const bar = document.getElementById('problem-bar'); | |
| const probs = getProblems(); | |
| bar.innerHTML = '<span class="problem-lbl">Problem:</span>'; | |
| if (!probs.length) return; | |
| const p = projects[activeProject]; | |
| if (!p.activeProblem || !probs.includes(p.activeProblem)) p.activeProblem = probs[0]; | |
| for (const prob of probs) { | |
| const agents = getAgentsForProblem(prob); | |
| const btn = document.createElement('button'); | |
| btn.className = 'problem-btn' + (prob === p.activeProblem ? ' active' : ''); | |
| btn.innerHTML = prob + `<span class="pb-count">${agents.length}</span>`; | |
| btn.onclick = () => switchProblem(prob); | |
| bar.appendChild(btn); | |
| } | |
| } | |
| function switchProblem(prob) { | |
| const p = projects[activeProject]; | |
| p.activeProblem = prob; | |
| document.querySelectorAll('.problem-btn').forEach(b => { | |
| b.classList.toggle('active', b.textContent.startsWith(prob)); | |
| }); | |
| rebuildModelGrid(); | |
| selectTopModel(); | |
| } | |
| // ββ Rebuild model card grid ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function rebuildModelGrid() { | |
| const p = projects[activeProject]; | |
| const grid = document.getElementById('model-grid'); | |
| grid.innerHTML = ''; | |
| if (!p.activeProblem) { grid.innerHTML = '<div class="no-models">No problems found</div>'; return; } | |
| const f = p.filter || 'all'; | |
| let agents = getAgentsForProblem(p.activeProblem); | |
| if (f !== 'all') agents = agents.filter(id => f === 'local' ? isLocalModel(id) : !isLocalModel(id)); | |
| if (!agents.length) { grid.innerHTML = '<div class="no-models">No agents match current filter</div>'; return; } | |
| const store = p.type === 'evolve' ? p.runs : p.agents; | |
| // Sort by best accuracy descending | |
| agents.sort((a, b) => getBestAcc(store[b]) - getBestAcc(store[a])); | |
| agents.forEach((id, idx) => { | |
| const item = store[id]; | |
| if (!item) return; | |
| const { model } = parseAgentId(id); | |
| const acc = getBestAcc(item); | |
| const hasFinal = !!item.finalResult; | |
| const isIncomplete = item.done && !hasFinal; | |
| const isActive = p.active === id; | |
| const isLeakage = !!item.leakage; | |
| const accPct = acc > 0 ? (acc * 100).toFixed(1) + '%' : '--'; | |
| const accClass = acc === 0 ? ' none' : acc >= 0.8 ? '' : acc >= 0.4 ? ' warn' : ' fail'; | |
| const dotCls = item.done ? 'done' : 'live'; | |
| const statusTxt = item.done ? (hasFinal ? 'complete' : 'stopped early') : 'running'; | |
| const subTxt = hasFinal ? 'final result' : (acc > 0 ? 'best submit' : 'no submits yet'); | |
| const rankCls = idx === 0 ? ' gold' : ''; | |
| const badgeHtml = isLeakage ? `<span class="mc-leakage">β LEAKAGE</span>` : ''; | |
| const card = document.createElement('div'); | |
| card.className = 'model-card' + (isActive ? ' active' : '') + (isLeakage ? ' leakage' : ''); | |
| card.title = isLeakage ? id + ' β run before hint-leakage fix (results may be biased)' : id; | |
| card.onclick = () => switchTab(id); | |
| card.innerHTML = ` | |
| <span class="mc-rank${rankCls}">#${idx + 1}</span> | |
| <div class="mc-name">${model}</div> | |
| ${badgeHtml} | |
| <div class="mc-acc${accClass}">${accPct}</div> | |
| <div class="mc-sub">${subTxt}</div> | |
| <div class="mc-status"> | |
| <span class="mc-dot ${dotCls}"></span>${statusTxt} | |
| ${isIncomplete ? '<span class="mc-flag">INCOMPLETE</span>' : ''} | |
| </div>`; | |
| grid.appendChild(card); | |
| }); | |
| } | |
| // ββ Auto-select top performing model in current problem βββββββββββββββββββ | |
| function selectTopModel() { | |
| const p = projects[activeProject]; | |
| if (!p.activeProblem) return; | |
| const f = p.filter || 'all'; | |
| let agents = getAgentsForProblem(p.activeProblem); | |
| if (f !== 'all') agents = agents.filter(id => f === 'local' ? isLocalModel(id) : !isLocalModel(id)); | |
| if (!agents.length) return; | |
| const store = p.type === 'evolve' ? p.runs : p.agents; | |
| agents.sort((a, b) => getBestAcc(store[b]) - getBestAcc(store[a])); | |
| // Only auto-select if current active is not in this problem | |
| const curProblem = p.active ? parseAgentId(p.active).problem : null; | |
| if (!p.active || curProblem !== p.activeProblem) { | |
| switchTab(agents[0]); | |
| } | |
| } | |
| function newEnvAgent(id, sourceProject, leakage) { | |
| return { id, sourceProject: sourceProject||activeProject, leakage: !!leakage, | |
| envOffset:0, cmdOffset:0, cmdTotal:0, maxVal:1, gridRows:20, gridCols:20, | |
| heatmap:null, latestState:null, sessionStart:null, problemId:'?', modelId:'?', | |
| queryCount:0, elapsed:'0s', lastScore:null, bestAccuracy:0, finalResult:null, | |
| done:false, incomplete:false, envEntries:[], cmdEntries:[], claudeIn:0, claudeOut:0 }; | |
| } | |
| function newEvolveRun(id) { | |
| return { id, offset:0, problem:'?', gen:0, bestAcc:0, bestTotal:0, variants:0, | |
| elapsed:'0s', done:false, bestCode:'', events:[], solvedGen:null, bestAccuracy:0 }; | |
| } | |
| // ββ Project / tab switching ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function setFilter(f) { | |
| projects[activeProject].filter = f; | |
| document.querySelectorAll('.filter-btn').forEach(b => b.classList.toggle('active', b.dataset.filter===f)); | |
| rebuildModelGrid(); | |
| } | |
| function switchProject(proj) { | |
| activeProject = proj; | |
| document.querySelectorAll('.proj-btn').forEach(b => b.classList.toggle('active', b.dataset.project===proj)); | |
| const p = projects[proj]; | |
| const useEvolve = p.type==='evolve' || (p.type==='mixed' && p.active && !!p.runs[p.active]); | |
| document.getElementById('env-view').style.display = useEvolve ? 'none' : 'block'; | |
| document.getElementById('evolve-view').style.display = useEvolve ? 'block' : 'none'; | |
| const f = p.filter || 'all'; | |
| document.querySelectorAll('.filter-btn').forEach(b => b.classList.toggle('active', b.dataset.filter===f)); | |
| rebuildProblemBar(); | |
| rebuildModelGrid(); | |
| if (p.active) { | |
| if (useEvolve) renderEvolveRun(p.active); | |
| else renderEnvAgent(p.active); | |
| } else { | |
| selectTopModel(); | |
| } | |
| } | |
| function rebuildTabBar() { | |
| const p = projects[activeProject]; | |
| document.getElementById('tab-bar').innerHTML = ''; | |
| p.knownIds = new Set(); | |
| const itemSets = p.type==='mixed' ? [p.runs, p.agents] : [p.type==='evolve' ? p.runs : p.agents]; | |
| for (const items of itemSets) for (const id of Object.keys(items)) _addTabInternal(id); | |
| rebuildProblemBar(); | |
| rebuildModelGrid(); | |
| if (!p.active) selectTopModel(); | |
| } | |
| function _addTabInternal(id) { | |
| const p = projects[activeProject]; | |
| if (p.knownIds.has(id)) return; | |
| p.knownIds.add(id); | |
| const btn = document.createElement('button'); | |
| btn.className = 'tab-btn'; btn.dataset.agent = id; | |
| btn.onclick = () => switchTab(id); | |
| document.getElementById('tab-bar').appendChild(btn); | |
| } | |
| function addTab(id) { | |
| const p = projects[activeProject]; | |
| const isNew = !p.knownIds.has(id); | |
| _addTabInternal(id); | |
| if (isNew) { | |
| rebuildProblemBar(); | |
| rebuildModelGrid(); | |
| if (!p.active) selectTopModel(); | |
| } else { | |
| rebuildModelGrid(); // refresh scores | |
| } | |
| } | |
| function updateTabDot(id) { | |
| rebuildModelGrid(); // model cards show live/done state | |
| } | |
| function switchTab(id) { | |
| const p = projects[activeProject]; | |
| p.active = id; | |
| rebuildModelGrid(); // update active card highlight | |
| if (p.type === 'evolve') renderEvolveRun(id); | |
| else renderEnvAgent(id); | |
| } | |
| // ββ ENV rendering ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function processEnvEntry(ag, entry) { | |
| const act = entry.action || ''; | |
| if (act === 'session_start') { | |
| ag.problemId = entry.problem_id || '?'; | |
| const info = entry.shape_info || {}; | |
| const vals = (info.values||'0-1').split('-'); | |
| ag.maxVal = parseInt(vals[1]||'1'); | |
| ag.gridRows = info.rows||20; ag.gridCols = info.cols||20; | |
| ag.heatmap = Array.from({length:ag.gridRows}, ()=>new Array(ag.gridCols).fill(0)); | |
| ag.sessionStart = entry.t !== undefined ? (Date.now()/1000 - entry.t) : Date.now()/1000; | |
| } | |
| if (entry.query_num !== undefined) ag.queryCount = entry.query_num; | |
| if (entry.t !== undefined) ag.elapsed = entry.t.toFixed(0)+'s'; | |
| if (entry.state) { | |
| const prev = ag.latestState; | |
| ag.latestState = entry.state; | |
| if (act==='step' && ag.heatmap && prev) { | |
| for (let r=0; r<Math.min(entry.state.length,ag.gridRows); r++) | |
| for (let c=0; c<Math.min(entry.state[r].length,ag.gridCols); c++) | |
| if (entry.state[r][c] !== (prev[r]?.[c]??-1)) ag.heatmap[r][c]++; | |
| } | |
| } | |
| if (act==='submit' && entry.result) { | |
| ag.lastScore = entry.result.total || 0; | |
| // Track best accuracy seen from any submit | |
| const submitAcc = normAcc(entry.result.functional_accuracy || 0); | |
| if (submitAcc > ag.bestAccuracy) ag.bestAccuracy = submitAcc; | |
| } | |
| if (act==='final_result' && entry.result) { | |
| ag.finalResult = entry; ag.done = true; | |
| const finalAcc = normAcc(entry.result.functional_accuracy || 0); | |
| if (finalAcc > ag.bestAccuracy) ag.bestAccuracy = finalAcc; | |
| } | |
| ag.envEntries.push(entry); | |
| if (ag.envEntries.length > 300) ag.envEntries.shift(); | |
| } | |
| function renderEnvAgent(id) { | |
| const p = projects[activeProject]; | |
| const ag = p.agents[id]; if (!ag) return; | |
| const parts = id.split('_'); | |
| ag.modelId = parts.length>1 ? parts[parts.length-1] : '?'; | |
| document.getElementById('problem-id').textContent = ag.problemId; | |
| document.getElementById('model-id').textContent = ag.modelId; | |
| document.getElementById('query-count').textContent = ag.queryCount; | |
| document.getElementById('elapsed').textContent = ag.elapsed; | |
| document.getElementById('cmd-count').textContent = ag.cmdTotal; | |
| const se = document.getElementById('last-score'); | |
| if (ag.lastScore !== null) { | |
| se.textContent = ag.lastScore.toFixed(3); | |
| se.className = 'stat-value ' + (ag.lastScore>=0.8?'score':'warn'); | |
| } else { se.textContent='--'; se.className='stat-value score'; } | |
| const tot = ag.claudeIn + ag.claudeOut; | |
| document.getElementById('claude-tokens').textContent = tot>0 ? (tot>9999?(tot/1000).toFixed(1)+'k':tot) : '--'; | |
| document.getElementById('claude-tokens-detail').textContent = tot>0 ? `in:${ag.claudeIn} out:${ag.claudeOut}` : ''; | |
| if (ag.latestState) renderGrid(document.getElementById('grid-container'), ag.latestState, v=>getColor(v,ag.maxVal)); | |
| if (ag.heatmap) { | |
| const maxH = Math.max(...ag.heatmap.flat()); | |
| renderGrid(document.getElementById('heatmap-container'), ag.heatmap, v=>heatColor(v,maxH)); | |
| } | |
| const tl = document.getElementById('env-timeline'); | |
| tl.innerHTML = ''; | |
| for (let i=ag.envEntries.length-1; i>=Math.max(0,ag.envEntries.length-200); i--) | |
| tl.appendChild(makeEnvEntry(ag.envEntries[i])); | |
| const ctl = document.getElementById('cmd-timeline'); | |
| ctl.innerHTML = ''; | |
| for (let i=ag.cmdEntries.length-1; i>=Math.max(0,ag.cmdEntries.length-300); i--) | |
| ctl.appendChild(makeCmdEntry(ag.cmdEntries[i], ag.sessionStart)); | |
| const banner = document.getElementById('final-banner'); | |
| if (ag.finalResult) showFinalBanner(ag.finalResult, ag.problemId, ag); | |
| else if (ag.incomplete) showIncompleteBanner(ag); | |
| else banner.className='final-banner'; | |
| document.getElementById('dot').className='status-dot '+(ag.done?'done':'live'); | |
| } | |
| function makeEnvEntry(e) { | |
| const div = document.createElement('div'); | |
| const act = e.action||'?'; | |
| const cls = act.replace('_',''); | |
| div.className = 'tl-entry '+cls; | |
| let detail = ''; | |
| if (act==='step') detail = `n=${e.n} | ${e.cells_changed} changed`; | |
| else if (act==='submit') { const r=e.result||{}; detail=`acc=${(r.functional_accuracy||0).toFixed(3)} total=${(r.total||0).toFixed(3)}`; } | |
| else if (act==='session_start') detail = `problem=${e.problem_id}`; | |
| else if (act==='random_state') detail = `seed=${e.seed}`; | |
| else if (act==='final_result') { const r=e.result||{}; detail=`acc=${(normAcc(r.functional_accuracy||0)*100).toFixed(1)}% src=${e.source||'?'}`; } | |
| div.innerHTML = `<span class="tl-time">${(e.t||0).toFixed(1)}s</span><span class="tl-act">${act}</span><span>${detail}</span>`; | |
| return div; | |
| } | |
| function makeCmdEntry(e, sessionStart) { | |
| const div = document.createElement('div'); | |
| const tool = e.tool||'?'; | |
| div.className = 'cmd-entry '+tool; | |
| const elapsed = (sessionStart && e.ts) ? (e.ts-sessionStart).toFixed(1)+'s' : ''; | |
| let detail = (e.detail||'').substring(0,180).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>'); | |
| div.innerHTML = `<span class="cmd-time">${elapsed}</span><span class="cmd-tool">${tool}</span><span class="cmd-detail">${detail}</span>`; | |
| return div; | |
| } | |
| function showFinalBanner(entry, problemId, ag) { | |
| const banner = document.getElementById('final-banner'); | |
| const metrics = document.getElementById('final-metrics'); | |
| const r = entry.result||{}; | |
| const acc = normAcc(r.functional_accuracy || 0); | |
| const pars = r.parsimony_bonus || 0; | |
| const eff = r.efficiency_bonus || 0; | |
| const total = r.total || 0; | |
| const incomplete = ag && ag.incomplete; | |
| const cls = incomplete ? 'warn' : acc < 0.5 ? 'fail' : 'ok'; | |
| banner.className = `final-banner visible ${cls}`; | |
| const incFlag = incomplete ? '<span class="incomplete-flag">INCOMPLETE RUN</span>' : ''; | |
| banner.querySelector('h2').innerHTML = `Final Result β ${problemId}${incFlag}`; | |
| function vc(v,lo,hi){return v>=hi?'':v>=lo?' warn':' low'} | |
| metrics.innerHTML = ` | |
| <div class="final-metric"><div class="fm-label">Accuracy</div><div class="fm-value${vc(acc,.5,.8)}">${(acc*100).toFixed(1)}%</div><div class="fm-sub">${r.correct_states??'?'}/${r.total_states??'?'} states</div></div> | |
| <div class="final-metric"><div class="fm-label">Total Score</div><div class="fm-value${vc(total,.5,.8)}">${total.toFixed(4)}</div><div class="fm-sub">acc+pars+eff</div></div> | |
| <div class="final-metric"><div class="fm-label">Parsimony</div><div class="fm-value" style="font-size:20px;color:#4ecdc4">+${pars.toFixed(4)}</div><div class="fm-sub">ΞDL:${r.delta_dl??'?'}</div></div> | |
| <div class="final-metric"><div class="fm-label">Efficiency</div><div class="fm-value" style="font-size:20px;color:#5bc0de">+${eff.toFixed(4)}</div><div class="fm-sub">${r.queries_used??'?'} queries</div></div>`; | |
| } | |
| function showIncompleteBanner(ag) { | |
| const banner = document.getElementById('final-banner'); | |
| const metrics = document.getElementById('final-metrics'); | |
| const bestAcc = ag.bestAccuracy || 0; | |
| banner.className = 'final-banner visible warn'; | |
| banner.querySelector('h2').innerHTML = `Run Stopped Early β ${ag.problemId} <span class="incomplete-flag">INCOMPLETE</span>`; | |
| metrics.innerHTML = ` | |
| <div class="final-metric"><div class="fm-label">Best Accuracy</div><div class="fm-value warn">${bestAcc > 0 ? (bestAcc*100).toFixed(1)+'%' : '--'}</div><div class="fm-sub">from submits</div></div> | |
| <div class="final-metric"><div class="fm-label">Last Score</div><div class="fm-value" style="font-size:20px">${ag.lastScore !== null ? ag.lastScore.toFixed(4) : '--'}</div><div class="fm-sub">total</div></div> | |
| <div class="final-metric"><div class="fm-label">Queries</div><div class="fm-value" style="font-size:20px">${ag.queryCount}</div><div class="fm-sub">before stop</div></div>`; | |
| } | |
| // ββ EVOLVE rendering (alpha_evolve_gpt) βββββββββββββββββββββββββββββββββββ | |
| function processEvolveEntry(run, entry) { | |
| const t = entry.type||''; | |
| if (t==='start') { run.problem = entry.problem||'?'; } | |
| if (t==='gen_start') { run.gen = entry.gen||0; } | |
| if (entry.t !== undefined) run.elapsed = entry.t.toFixed(0)+'s'; | |
| if (t==='variant') { run.variants++; if (entry.acc > run.bestAcc) run.bestAcc=entry.acc; if ((entry.total||0)>run.bestTotal) { run.bestTotal=entry.total||0; if(entry.code) run.bestCode=entry.code; } } | |
| if (t==='gen_end') { if(entry.best_acc>run.bestAcc) run.bestAcc=entry.best_acc; if((entry.best_total||0)>run.bestTotal) run.bestTotal=entry.best_total||0; } | |
| if (t==='done') { run.done=true; run.bestAcc=entry.best_acc||run.bestAcc; run.bestTotal=entry.best_total||run.bestTotal; run.solvedGen=entry.solved_gen||null; if(entry.best_code) run.bestCode=entry.best_code; if(entry.elapsed) run.elapsed=entry.elapsed.toFixed(0)+'s'; } | |
| run.bestAccuracy = run.bestAcc; // kept in sync for model cards | |
| run.events.push(entry); | |
| if (run.events.length > 500) run.events.shift(); | |
| } | |
| function renderEvolveRun(id) { | |
| const run = projects.alpha_evolve_gpt.runs[id]; if (!run) return; | |
| document.getElementById('ev-problem').textContent = run.problem; | |
| document.getElementById('ev-gen').textContent = run.gen; | |
| document.getElementById('ev-best-acc').textContent = run.bestAcc ? (run.bestAcc*100).toFixed(1)+'%' : '--'; | |
| document.getElementById('ev-best-total').textContent = run.bestTotal ? run.bestTotal.toFixed(4) : '--'; | |
| document.getElementById('ev-variants').textContent = run.variants; | |
| document.getElementById('ev-elapsed').textContent = run.elapsed; | |
| document.getElementById('dot').className = 'status-dot '+(run.done?'done':'live'); | |
| const tl = document.getElementById('evolve-timeline'); | |
| tl.innerHTML = ''; | |
| let curGen = null; | |
| for (let i=run.events.length-1; i>=Math.max(0,run.events.length-300); i--) { | |
| const e = run.events[i]; | |
| const t = e.type||''; | |
| if (t==='gen_end' || t==='gen_start') { | |
| const hdr = document.createElement('div'); hdr.className='gen-header'; | |
| if (t==='gen_end') hdr.innerHTML = `Gen ${e.gen} β best acc <b>${((e.best_acc||0)*100).toFixed(1)}%</b> total <b>${(e.best_total||0).toFixed(4)}</b> (${e.n_programs||0} programs)`; | |
| else hdr.textContent = `βΆ Generation ${e.gen} startingβ¦`; | |
| tl.appendChild(hdr); | |
| } else if (t==='variant') { | |
| const d = document.createElement('div'); | |
| const cls = e.acc>=1.0 ? 'best' : e.status==='ok' ? 'ok' : 'error'; | |
| d.className = 'variant-row '+cls; | |
| d.innerHTML = `<span class="variant-score">${(e.acc*100).toFixed(1)}%</span><span class="variant-strategy">${(e.strategy||'').substring(0,100)}</span> <span style="color:#555;font-size:10px">len=${e.code_len||'?'} t=${(e.t||0).toFixed(1)}s</span>`; | |
| tl.appendChild(d); | |
| } else if (t==='done') { | |
| const d = document.createElement('div'); d.className='gen-header'; | |
| d.style.color='#6fdb6f'; | |
| d.innerHTML = run.solvedGen ? `β Solved at generation ${run.solvedGen}! elapsed=${run.elapsed}` : `Done β best acc ${(run.bestAcc*100).toFixed(1)}%`; | |
| tl.appendChild(d); | |
| } else if (t==='start') { | |
| const d = document.createElement('div'); d.className='gen-header'; d.style.color='#888'; | |
| d.textContent = `Evolution started β problem ${e.problem}`; | |
| tl.appendChild(d); | |
| } | |
| } | |
| // Best code | |
| const codeEl = document.getElementById('best-code'); | |
| codeEl.textContent = run.bestCode || 'No code yetβ¦'; | |
| // Final evolve banner | |
| const banner = document.getElementById('evolve-final-banner'); | |
| const metrics = document.getElementById('evolve-final-metrics'); | |
| if (run.done) { | |
| banner.className = 'final-banner visible '+(run.bestAcc>=0.8?'ok':'fail'); | |
| banner.querySelector('h2').textContent = run.solvedGen ? `β Solved gen ${run.solvedGen} β ${run.problem}` : `Evolution complete β ${run.problem}`; | |
| metrics.innerHTML = ` | |
| <div class="final-metric"><div class="fm-label">Best Accuracy</div><div class="fm-value" style="color:#6fdb6f">${(run.bestAcc*100).toFixed(1)}%</div></div> | |
| <div class="final-metric"><div class="fm-label">Best Total</div><div class="fm-value" style="color:#6fdb6f">${run.bestTotal.toFixed(4)}</div></div> | |
| <div class="final-metric"><div class="fm-label">Generations</div><div class="fm-value">${run.gen}</div></div> | |
| <div class="final-metric"><div class="fm-label">Variants</div><div class="fm-value">${run.variants}</div></div> | |
| <div class="final-metric"><div class="fm-label">Elapsed</div><div class="fm-value" style="font-size:18px">${run.elapsed}</div></div>`; | |
| } else { banner.className='final-banner'; } | |
| } | |
| // ββ Memory tab switch βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function switchMem(tab) { | |
| document.getElementById('mem-I-content').style.display = tab==='I' ? '' : 'none'; | |
| document.getElementById('mem-II-content').style.display = tab==='II' ? '' : 'none'; | |
| document.querySelectorAll('.mem-tab').forEach((b,i) => b.classList.toggle('active', (i===0?'I':'II')===tab)); | |
| } | |
| // ββ Phase rendering βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const PHASE_ORDER = ['BOOT','OBSERVE','HYPOTHESIZE','BATCH_TEST','ANALYZE','SOLVED','DONE']; | |
| function renderPhases(phases) { | |
| const hist = document.getElementById('phase-history'); | |
| hist.innerHTML = ''; | |
| // Reset all chips | |
| PHASE_ORDER.forEach(p => { | |
| const chip = document.getElementById('chip-' + p); | |
| if (chip) chip.className = 'phase-chip'; | |
| }); | |
| let latestCycle = 0; | |
| let lastPhase = null; | |
| for (const e of phases) { | |
| latestCycle = Math.max(latestCycle, e.cycle||0); | |
| lastPhase = e; | |
| // Phase history entry | |
| const div = document.createElement('div'); | |
| div.className = 'ph-entry ' + (e.status||''); | |
| const mins = Math.floor((e.t||0)/60), secs = Math.floor((e.t||0)%60); | |
| div.innerHTML = ` | |
| <span class="ph-time">${mins>0?mins+'m':''}${secs}s</span> | |
| <span class="ph-cycle">C${e.cycle}</span> | |
| <span class="ph-phase">${e.phase}</span> | |
| <span class="ph-status">${e.status}</span> | |
| <span class="ph-sum">${(e.summary||'').substring(0,80)}</span>`; | |
| hist.insertBefore(div, hist.firstChild); | |
| // Update chip | |
| const chip = document.getElementById('chip-' + e.phase); | |
| if (chip) { | |
| if (e.phase === 'SOLVED' || e.phase === 'DONE') { | |
| chip.className = 'phase-chip solved'; | |
| } else if (e.status === 'started') { | |
| chip.className = 'phase-chip active'; | |
| } else if (e.status === 'forced') { | |
| chip.className = 'phase-chip forced'; | |
| } else if (e.status === 'completed') { | |
| chip.className = 'phase-chip done'; | |
| } | |
| } | |
| } | |
| const badge = document.getElementById('cycle-badge'); | |
| if (badge) badge.textContent = latestCycle > 0 ? `cycle ${latestCycle}` : ''; | |
| } | |
| function escHtml(s) { | |
| return (s||'').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>'); | |
| } | |
| function renderDoc(elId, content) { | |
| const el = document.getElementById(elId); | |
| if (!el) return; | |
| if (!content || !content.trim()) { | |
| el.innerHTML = '<span class="doc-empty">Not written yet</span>'; | |
| return; | |
| } | |
| // Light markdown-ish highlighting for ## headers | |
| const html = content.split('\n').map(line => { | |
| if (line.startsWith('## ')) return `<b style="color:#7ecfff">${escHtml(line)}</b>`; | |
| if (line.startsWith('### ')) return `<b style="color:#aaa">${escHtml(line)}</b>`; | |
| if (line.startsWith('| ')) return `<span style="color:#888">${escHtml(line)}</span>`; | |
| if (line.match(/^\d+\. /)) return `<span style="color:#c0d0ff">${escHtml(line)}</span>`; | |
| if (line.startsWith('- ')) return `<span style="color:#b0d0b0">${escHtml(line)}</span>`; | |
| return escHtml(line); | |
| }).join('\n'); | |
| // Preserve scroll position across updates | |
| const prevScroll = el.scrollTop; | |
| el.innerHTML = html; | |
| el.scrollTop = prevScroll; | |
| } | |
| async function pollPhasesAndDocs() { | |
| const p = projects[activeProject]; | |
| const isEnvActive = p && (p.type==='env' || (p.type==='mixed' && p.active && !p.runs[p.active])); | |
| if (!isEnvActive) { setTimeout(pollPhasesAndDocs, 2000); return; } | |
| const id = p.active; if (!id) { setTimeout(pollPhasesAndDocs, 2000); return; } | |
| const ag = p.agents[id]; | |
| const srcProj = (ag && ag.sourceProject) || activeProject; | |
| const rawId = id.replace(/^(?:v[12]|lk[12])::/, ''); | |
| try { | |
| const [prResp, planResp, memResp] = await Promise.all([ | |
| fetch(`/api/phases?project=${srcProj}&agent=${encodeURIComponent(rawId)}`), | |
| fetch(`/api/plan?project=${srcProj}&agent=${encodeURIComponent(rawId)}`), | |
| fetch(`/api/memories?project=${srcProj}&agent=${encodeURIComponent(rawId)}`), | |
| ]); | |
| if (prResp.ok) { | |
| const d = await prResp.json(); | |
| renderPhases(d); | |
| // Extract best accuracy from phase summaries (catches stopped-early runs with no submit in env log) | |
| if (ag) { | |
| for (const e of d) { | |
| if (e.phase === 'BATCH_TEST' && e.status === 'completed' && e.summary) { | |
| const m = e.summary.match(/all_time_best=([\d.]+)%/); | |
| if (m) { const acc = parseFloat(m[1]) / 100; if (acc > ag.bestAccuracy) ag.bestAccuracy = acc; } | |
| } | |
| } | |
| if (ag.incomplete) showIncompleteBanner(ag); | |
| rebuildModelGrid(); // refresh cards after accuracy backfill from phases | |
| } | |
| } | |
| if (planResp.ok){ const d = await planResp.json(); renderDoc('plan-content', d.content); } | |
| if (memResp.ok) { | |
| const d = await memResp.json(); | |
| renderDoc('mem-I-content', d.memoryI); | |
| renderDoc('mem-II-content', d.memoryII); | |
| } | |
| } catch(e) {} | |
| setTimeout(pollPhasesAndDocs, 2500); | |
| } | |
| // ββ Polling ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function discoverAll() { | |
| try { | |
| const resp = await fetch('/api/state'); | |
| if (!resp.ok) return; | |
| const data = await resp.json(); | |
| for (const [proj, ids] of Object.entries(data)) { | |
| const isLeakage = proj === 'leakage_v1' || proj === 'leakage_v2'; | |
| const isBase = proj === 'base_client_v1' || proj === 'base_client_v2' || isLeakage; | |
| const isEvolveDir = proj === 'alpha_evolve_gpt'; | |
| const mergedProj = isBase ? 'base_client' : isEvolveDir ? 'alpha_evolve' : proj; | |
| const prefix = proj === 'base_client_v1' ? 'v1' : proj === 'base_client_v2' ? 'v2' | |
| : proj === 'leakage_v1' ? 'lk1': proj === 'leakage_v2' ? 'lk2' : null; | |
| const p = projects[mergedProj]; if (!p) continue; | |
| const store = (p.type==='evolve' || isEvolveDir) ? p.runs : p.agents; | |
| for (const rawId of ids) { | |
| const id = prefix ? `${prefix}::${rawId}` : rawId; | |
| if (!store[id]) store[id] = isEvolveDir ? newEvolveRun(id) : newEnvAgent(id, proj, isLeakage); | |
| if (mergedProj === activeProject) addTab(id); | |
| } | |
| // Update count badge | |
| if (isBase) { | |
| const cnt = document.getElementById('cnt-base_client'); | |
| if (cnt) { const t=Object.keys(projects.base_client.agents).length; cnt.textContent=t?`(${t})`:''; } | |
| } else { | |
| const cnt2 = document.getElementById('cnt-'+mergedProj); | |
| if (cnt2) { | |
| const mp = projects[mergedProj]; | |
| const t = mp.type==='mixed' | |
| ? Object.keys(mp.runs).length + Object.keys(mp.agents).length | |
| : ids.length; | |
| cnt2.textContent = t ? `(${t})` : ''; | |
| } | |
| } | |
| } | |
| const live = Object.values(projects).flatMap(p => { | |
| const stores = p.type==='mixed' ? [p.runs, p.agents] : [p.type==='evolve' ? p.runs : p.agents]; | |
| return stores.flatMap(s => Object.values(s).filter(x=>!x.done)); | |
| }); | |
| document.getElementById('status').innerHTML = | |
| `<span class="status-dot ${live.length?'live':'done'}" id="dot"></span>${live.length} agent(s) running`; | |
| } catch(e) {} | |
| setTimeout(discoverAll, 2000); | |
| } | |
| async function pollActive() { | |
| const p = projects[activeProject]; | |
| const id = p.active; if (!id) { setTimeout(pollActive,700); return; } | |
| const isEvolveItem = p.type==='evolve' || (p.type==='mixed' && !!p.runs[id]); | |
| if (isEvolveItem) { | |
| const run = p.runs[id]; if (!run || run.done) { setTimeout(pollActive,700); return; } | |
| try { | |
| const r = await fetch(`/api/evolve-log?run=${encodeURIComponent(id)}&offset=${run.offset}`); | |
| if (r.ok) { | |
| const d = await r.json(); | |
| for (const line of (d.lines||[])) { try { processEvolveEntry(run, JSON.parse(line)); } catch(e){} } | |
| run.offset += (d.lines||[]).length; | |
| if (d.done) run.done = true; | |
| } | |
| } catch(e){} | |
| renderEvolveRun(id); | |
| } else { | |
| const ag = p.agents[id]; if (!ag || ag.done) { setTimeout(pollActive,700); return; } | |
| const srcProj = ag.sourceProject || (p.type==='mixed' ? 'alpha_evolve' : activeProject); | |
| const rawId = id.replace(/^(?:v[12]|lk[12])::/, ''); | |
| try { | |
| const r = await fetch(`/api/log?project=${srcProj}&agent=${encodeURIComponent(rawId)}&offset=${ag.envOffset}`); | |
| if (r.ok) { | |
| const d = await r.json(); | |
| for (const line of (d.lines||[])) { try { processEnvEntry(ag, JSON.parse(line)); } catch(e){} } | |
| ag.envOffset += (d.lines||[]).length; | |
| if (d.done) { ag.done = true; if (!ag.finalResult) ag.incomplete = true; } | |
| } | |
| } catch(e){} | |
| try { | |
| const r = await fetch(`/api/cmd-log?project=${srcProj}&agent=${encodeURIComponent(rawId)}&offset=${ag.cmdOffset}`); | |
| if (r.ok) { | |
| const d = await r.json(); | |
| for (const line of (d.lines||[])) { try { ag.cmdEntries.push(JSON.parse(line)); ag.cmdTotal++; } catch(e){} } | |
| ag.cmdOffset += (d.lines||[]).length; | |
| if (ag.cmdEntries.length>500) ag.cmdEntries=ag.cmdEntries.slice(-400); | |
| } | |
| } catch(e){} | |
| try { | |
| const r = await fetch(`/api/claude-usage?project=${srcProj}&agent=${encodeURIComponent(rawId)}`); | |
| if (r.ok) { const d=await r.json(); ag.claudeIn=d.input_tokens||0; ag.claudeOut=d.output_tokens||0; } | |
| } catch(e){} | |
| renderEnvAgent(id); | |
| } | |
| updateTabDot(id); | |
| setTimeout(pollActive, 700); | |
| } | |
| // ββ Poll all projects in background (not just active) βββββββββββββββββββββ | |
| async function pollBackground() { | |
| // Poll non-active agents within the active project (so cards show data without needing a click) | |
| const ap = projects[activeProject]; | |
| if (ap && ap.type !== 'evolve') { | |
| for (const [id, item] of Object.entries(ap.type==='mixed' ? ap.agents : ap.agents)) { | |
| if (id === ap.active || item.done) continue; | |
| const srcProj = item.sourceProject || activeProject; | |
| const rawId = id.replace(/^(?:v[12]|lk[12])::/, ''); | |
| try { | |
| const r = await fetch(`/api/log?project=${srcProj}&agent=${encodeURIComponent(rawId)}&offset=${item.envOffset}`); | |
| if (r.ok) { | |
| const d = await r.json(); | |
| for (const line of (d.lines||[])) { try { processEnvEntry(item, JSON.parse(line)); } catch(e){} } | |
| item.envOffset += (d.lines||[]).length; | |
| if (d.done) { item.done=true; if (!item.finalResult) item.incomplete=true; } | |
| } | |
| } catch(e) {} | |
| } | |
| rebuildModelGrid(); | |
| } | |
| for (const [proj, p] of Object.entries(projects)) { | |
| if (proj === activeProject) continue; | |
| const bgStores = p.type==='mixed' ? [[p.runs,true],[p.agents,false]] : [[p.type==='evolve'?p.runs:p.agents, p.type==='evolve']]; | |
| for (const [store, isEv] of bgStores) for (const [id, item] of Object.entries(store)) { | |
| if (item.done) continue; | |
| if (isEv) { | |
| try { | |
| const r = await fetch(`/api/evolve-log?run=${encodeURIComponent(id)}&offset=${item.offset}`); | |
| if (r.ok) { | |
| const d=await r.json(); | |
| for (const line of (d.lines||[])) { try { processEvolveEntry(item, JSON.parse(line)); } catch(e){} } | |
| item.offset += (d.lines||[]).length; | |
| if (d.done) item.done=true; | |
| } | |
| } catch(e){} | |
| } else { | |
| try { | |
| const r = await fetch(`/api/log?project=${proj}&agent=${encodeURIComponent(id)}&offset=${item.envOffset}`); | |
| if (r.ok) { | |
| const d=await r.json(); | |
| for (const line of (d.lines||[])) { try { processEnvEntry(item, JSON.parse(line)); } catch(e){} } | |
| item.envOffset += (d.lines||[]).length; | |
| if (d.done) { item.done=true; if (!item.finalResult) item.incomplete=true; } | |
| } | |
| } catch(e){} | |
| } | |
| } | |
| } | |
| setTimeout(pollBackground, 3000); | |
| } | |
| discoverAll(); | |
| setTimeout(pollActive, 600); | |
| setTimeout(pollBackground, 2000); | |
| setTimeout(pollPhasesAndDocs, 1200); | |
| </script> | |
| </body> | |
| </html>""" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Backend helpers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def discover_agents(project: str) -> list[str]: | |
| """Return sorted list of agent/run IDs for a project.""" | |
| d = LOG_DIRS.get(project) | |
| if not d or not d.exists(): | |
| return [] | |
| if project == "alpha_evolve_gpt": | |
| # Each subdirectory that contains evolve_log.jsonl is a run | |
| runs = [] | |
| for sub in sorted(d.iterdir()): | |
| if sub.is_dir() and (sub / "evolve_log.jsonl").exists(): | |
| runs.append(sub.name) | |
| return runs | |
| # base_client and alpha_evolve: same env-log format | |
| agents = [] | |
| for sub in sorted(d.iterdir()): | |
| if not sub.is_dir() or sub.name.startswith("."): | |
| continue | |
| if list(sub.glob("*.jsonl")): | |
| agents.append(sub.name) | |
| else: | |
| # Nested: run_dir/agent_dir (alpha_evolve pattern) | |
| for ss in sorted(sub.iterdir()): | |
| if ss.is_dir() and list(ss.glob("*.jsonl")): | |
| agents.append(f"{sub.name}/{ss.name}") | |
| # Fallback: .jsonl directly in root (unlikely but safe) | |
| if not agents and list(d.glob("*.jsonl")): | |
| agents.append("default") | |
| return agents | |
| def resolve_env_log(project: str, agent_id: str) -> str | None: | |
| """Find the main env JSONL log for an env-type agent.""" | |
| d = LOG_DIRS[project] | |
| if agent_id == "default": | |
| agent_dir = d | |
| else: | |
| agent_dir = d / agent_id | |
| pointer = agent_dir / "LATEST.txt" | |
| if pointer.exists(): | |
| fname = Path(pointer.read_text().strip()).name | |
| candidate = agent_dir / fname | |
| if candidate.exists(): | |
| return str(candidate) | |
| skip = {"agent_commands.jsonl", "gpt_usage.jsonl"} | |
| logs = sorted(agent_dir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True) | |
| for log in logs: | |
| if log.name not in skip: | |
| return str(log) | |
| return None | |
| def resolve_cmd_log(project: str, agent_id: str) -> str | None: | |
| d = LOG_DIRS[project] | |
| agent_dir = d if agent_id == "default" else d / agent_id | |
| p = agent_dir / "agent_commands.jsonl" | |
| return str(p) if p.exists() else None | |
| def resolve_evolve_log(run_id: str) -> str | None: | |
| p = LOG_DIRS["alpha_evolve_gpt"] / run_id / "evolve_log.jsonl" | |
| return str(p) if p.exists() else None | |
| _live_containers_cache: set[str] = set() | |
| _live_containers_ts: float = 0.0 | |
| def get_live_containers() -> set[str]: | |
| """Return set of currently running Docker container names. Cached for 3s.""" | |
| global _live_containers_cache, _live_containers_ts | |
| now = time.time() | |
| if now - _live_containers_ts < 3.0: | |
| return _live_containers_cache | |
| try: | |
| out = subprocess.check_output( | |
| ["docker", "ps", "--format", "{{.Names}}"], | |
| stderr=subprocess.DEVNULL, timeout=5 | |
| ).decode().strip() | |
| _live_containers_cache = set(out.splitlines()) if out else set() | |
| except Exception: | |
| pass # keep stale cache on error | |
| _live_containers_ts = now | |
| return _live_containers_cache | |
| def is_container_running(agent_id: str) -> bool: | |
| """Check if the Docker container for this agent is currently running.""" | |
| container = f"agent-{agent_id.replace('/', '-')}" | |
| return container in get_live_containers() | |
| def read_log_lines(fpath: str | None, offset: int, agent_id: str = "") -> tuple[list[str], bool]: | |
| if not fpath or not os.path.exists(fpath): | |
| return [], False | |
| with open(fpath, "r", encoding="utf-8", errors="replace") as f: | |
| all_lines = f.readlines() | |
| lines = all_lines[offset:] | |
| # Done if log says so, OR if the Docker container is no longer running | |
| log_done = bool(all_lines) and ('"final_result"' in all_lines[-1] or '"done"' in all_lines[-1]) | |
| container_gone = bool(agent_id) and not is_container_running(agent_id) | |
| done = log_done or container_gone | |
| return [l.strip() for l in lines if l.strip()], done | |
| def read_phases(project: str, agent_id: str) -> list: | |
| """Return parsed phase events for an agent (phases.jsonl).""" | |
| d = LOG_DIRS[project] | |
| agent_dir = d if agent_id == "default" else d / agent_id | |
| p = agent_dir / "phases.jsonl" | |
| if not p.exists(): | |
| return [] | |
| entries = [] | |
| try: | |
| with open(p, encoding="utf-8", errors="replace") as f: | |
| for line in f: | |
| line = line.strip() | |
| if line: | |
| try: | |
| entries.append(json.loads(line)) | |
| except json.JSONDecodeError: | |
| pass | |
| except OSError: | |
| pass | |
| return entries | |
| def read_doc(project: str, agent_id: str, filename: str) -> str: | |
| """Read a synced doc (PLAN.md, memoryI/II) from the agent's log dir.""" | |
| d = LOG_DIRS[project] | |
| agent_dir = d if agent_id == "default" else d / agent_id | |
| p = agent_dir / filename | |
| if not p.exists(): | |
| return "" | |
| try: | |
| return p.read_text(encoding="utf-8", errors="replace") | |
| except OSError: | |
| return "" | |
| def parse_claude_usage(project: str, agent_id: str) -> dict: | |
| d = LOG_DIRS[project] | |
| agent_dir = d if agent_id == "default" else d / agent_id | |
| claude_dir = agent_dir / "claude_logs" | |
| result = {"input_tokens": 0, "output_tokens": 0} | |
| if not claude_dir.is_dir(): | |
| return result | |
| for fpath in claude_dir.glob("*.jsonl"): | |
| try: | |
| with open(fpath, encoding="utf-8", errors="replace") as f: | |
| for line in f: | |
| try: | |
| entry = json.loads(line) | |
| if entry.get("type") == "assistant": | |
| usage = entry.get("message", {}).get("usage") or {} | |
| result["input_tokens"] += (usage.get("input_tokens") or 0) | |
| result["input_tokens"] += (usage.get("cache_creation_input_tokens") or 0) | |
| result["input_tokens"] += (usage.get("cache_read_input_tokens") or 0) | |
| result["output_tokens"] += (usage.get("output_tokens") or 0) | |
| except (json.JSONDecodeError, KeyError): | |
| pass | |
| except OSError: | |
| pass | |
| return result | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HTTP handler | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class DashboardHandler(SimpleHTTPRequestHandler): | |
| def do_GET(self): | |
| parsed = urlparse(self.path) | |
| qs = parse_qs(parsed.query) | |
| if parsed.path in ("/", "/index.html"): | |
| self._html(DASHBOARD_HTML) | |
| elif parsed.path == "/api/state": | |
| state = {proj: discover_agents(proj) for proj in LOG_DIRS} | |
| self._json(state) | |
| elif parsed.path == "/api/log": | |
| project = qs.get("project", [None])[0] | |
| agent = qs.get("agent", [None])[0] | |
| offset = int(qs.get("offset", [0])[0]) | |
| if project and agent: | |
| fpath = resolve_env_log(project, agent) | |
| lines, done = read_log_lines(fpath, offset, agent_id=agent) | |
| self._json({"lines": lines, "done": done}) | |
| else: | |
| self._json({"lines": [], "done": False}) | |
| elif parsed.path == "/api/cmd-log": | |
| project = qs.get("project", [None])[0] | |
| agent = qs.get("agent", [None])[0] | |
| offset = int(qs.get("offset", [0])[0]) | |
| if project and agent: | |
| fpath = resolve_cmd_log(project, agent) | |
| lines, _ = read_log_lines(fpath, offset) | |
| self._json({"lines": lines}) | |
| else: | |
| self._json({"lines": []}) | |
| elif parsed.path == "/api/evolve-log": | |
| run = qs.get("run", [None])[0] | |
| offset = int(qs.get("offset", [0])[0]) | |
| if run: | |
| fpath = resolve_evolve_log(run) | |
| lines, done = read_log_lines(fpath, offset, agent_id=run) | |
| self._json({"lines": lines, "done": done}) | |
| else: | |
| self._json({"lines": [], "done": False}) | |
| elif parsed.path == "/api/claude-usage": | |
| project = qs.get("project", [None])[0] | |
| agent = qs.get("agent", [None])[0] | |
| if project and agent: | |
| self._json(parse_claude_usage(project, agent)) | |
| else: | |
| self._json({"input_tokens": 0, "output_tokens": 0}) | |
| elif parsed.path == "/api/phases": | |
| project = qs.get("project", [None])[0] | |
| agent = qs.get("agent", [None])[0] | |
| if project and agent: | |
| self._json(read_phases(project, agent)) | |
| else: | |
| self._json([]) | |
| elif parsed.path == "/api/plan": | |
| project = qs.get("project", [None])[0] | |
| agent = qs.get("agent", [None])[0] | |
| if project and agent: | |
| self._json({"content": read_doc(project, agent, "PLAN.md")}) | |
| else: | |
| self._json({"content": ""}) | |
| elif parsed.path == "/api/memories": | |
| project = qs.get("project", [None])[0] | |
| agent = qs.get("agent", [None])[0] | |
| if project and agent: | |
| self._json({ | |
| "memoryI": read_doc(project, agent, "memoryI_small_tests.md"), | |
| "memoryII": read_doc(project, agent, "memoryII_rules_tested.md"), | |
| }) | |
| else: | |
| self._json({"memoryI": "", "memoryII": ""}) | |
| else: | |
| self.send_error(404) | |
| def _html(self, html: str): | |
| b = html.encode() | |
| self.send_response(200) | |
| self.send_header("Content-Type", "text/html; charset=utf-8") | |
| self.send_header("Content-Length", len(b)) | |
| self.end_headers() | |
| self.wfile.write(b) | |
| def _json(self, obj): | |
| b = json.dumps(obj).encode() | |
| self.send_response(200) | |
| self.send_header("Content-Type", "application/json") | |
| self.send_header("Access-Control-Allow-Origin", "*") | |
| self.send_header("Content-Length", len(b)) | |
| self.end_headers() | |
| self.wfile.write(b) | |
| def log_message(self, *_): | |
| pass | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Entry point | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| global PORT | |
| import argparse | |
| parser = argparse.ArgumentParser(description="Unified discovery agent dashboard") | |
| parser.add_argument("--port", type=int, default=PORT) | |
| parser.add_argument("--no-browser", action="store_true") | |
| args = parser.parse_args() | |
| PORT = args.port | |
| print(f"Dashboard: http://127.0.0.1:{PORT}") | |
| print(f"Watching:") | |
| for name, path in LOG_DIRS.items(): | |
| print(f" {name:20s} {path}") | |
| server = HTTPServer(("127.0.0.1", PORT), DashboardHandler) | |
| if not args.no_browser: | |
| webbrowser.open(f"http://127.0.0.1:{PORT}") | |
| try: | |
| server.serve_forever() | |
| except KeyboardInterrupt: | |
| print("\nDashboard stopped.") | |
| server.server_close() | |
| if __name__ == "__main__": | |
| main() | |