v121rc_exp1 / generate_html.py
Linksome's picture
Add files using upload-large-folder tool
ac94d57 verified
#!/usr/bin/env python3
import json
import re
from pathlib import Path
from typing import Any, Dict, List, Tuple, Optional
ROOT = Path("/workspace/v121rc_exp1")
OUT_HTML = ROOT / "heatmap.html"
CONFIGS = list("ABCDEFGHI")
DATASETS = {
"PandaEval12_1": "PandaEval12_1_results",
"PandaEval12_2": "PandaEval12_2_results",
}
TEMPLATE_SUFFIX_RE = re.compile(r"_(?P<kind>[PRA])(?P<idx>\d+)$")
def template_sort_key(t: str) -> int:
if t == "BASE":
return 0
m = re.match(r"([PRA])(\d+)$", t)
if not m:
return 9999
kind, idx = m.group(1), int(m.group(2))
base = {"P": 100, "R": 200, "A": 300}.get(kind, 1000)
return base + idx
def parse_template_from_stem(stem: str) -> str:
m = TEMPLATE_SUFFIX_RE.search(stem)
if not m:
return "BASE"
return f"{m.group('kind')}{int(m.group('idx'))}"
def safe_load_json(path: Path) -> Optional[Any]:
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return None
def infer_steps(entries: List[Dict[str, Any]]) -> List[int]:
steps = set()
for e in entries:
if not isinstance(e, dict):
continue
for k in e.keys():
if k.startswith("step_"):
try:
steps.add(int(k.split("_", 1)[1]))
except Exception:
pass
return sorted(steps)
def compute_acc_invalid(entries: List[Dict[str, Any]], step: int) -> Tuple[float, float]:
key = f"step_{step}"
n = len(entries)
if n == 0:
return float("nan"), float("nan")
acc_sum = 0.0
valid_cnt = 0
for e in entries:
v = (e.get(key) or {}) if isinstance(e, dict) else {}
label = v.get("label", "")
if label in ("Yes", "No"):
valid_cnt += 1
try:
acc_sum += float(v.get("accuracy", 0.0))
except Exception:
acc_sum += 0.0
acc = acc_sum / n
invalid_rate = 1.0 - (valid_cnt / n)
return acc, invalid_rate
def collect_dataset(ds_dirname: str) -> Dict[str, Any]:
acc_sum: Dict[Tuple[int, str, str], float] = {}
acc_cnt: Dict[Tuple[int, str, str], int] = {}
inv_sum: Dict[Tuple[int, str, str], float] = {}
inv_cnt: Dict[Tuple[int, str, str], int] = {}
templates_set = set()
steps_set = set()
for cfg in CONFIGS:
base = ROOT / cfg / ds_dirname
if not base.exists():
continue
for p in base.rglob("*_results.json"):
stem = p.name[:-len("_results.json")]
template = parse_template_from_stem(stem)
templates_set.add(template)
data = safe_load_json(p)
if not isinstance(data, list):
continue
steps = infer_steps(data)
for step in steps:
steps_set.add(step)
acc, inv = compute_acc_invalid(data, step)
k = (step, template, cfg)
if acc == acc:
acc_sum[k] = acc_sum.get(k, 0.0) + acc
acc_cnt[k] = acc_cnt.get(k, 0) + 1
if inv == inv:
inv_sum[k] = inv_sum.get(k, 0.0) + inv
inv_cnt[k] = inv_cnt.get(k, 0) + 1
templates = sorted(list(templates_set), key=template_sort_key)
steps = sorted(list(steps_set))
by_step: Dict[int, Dict[str, List[List[Optional[float]]]]] = {}
for step in steps:
acc_mat: List[List[Optional[float]]] = [[None for _ in CONFIGS] for _ in templates]
inv_mat: List[List[Optional[float]]] = [[None for _ in CONFIGS] for _ in templates]
for ti, t in enumerate(templates):
for ci, cfg in enumerate(CONFIGS):
k = (step, t, cfg)
if k in acc_cnt and acc_cnt[k] > 0:
acc_mat[ti][ci] = acc_sum[k] / acc_cnt[k]
if k in inv_cnt and inv_cnt[k] > 0:
inv_mat[ti][ci] = inv_sum[k] / inv_cnt[k]
by_step[step] = {"accuracy": acc_mat, "invalid_rate": inv_mat}
return {"templates": templates, "steps": steps, "by_step": by_step}
def main() -> None:
collected = {}
all_steps = set()
all_templates = set()
for ds_name, ds_dir in DATASETS.items():
collected[ds_name] = collect_dataset(ds_dir)
all_steps.update(collected[ds_name]["steps"])
all_templates.update(collected[ds_name]["templates"])
templates = sorted(list(all_templates), key=template_sort_key)
steps = sorted(list(all_steps))
datasets_payload = {}
for ds_name in DATASETS.keys():
ds = collected[ds_name]
ds_templates = ds["templates"]
ds_template_to_idx = {t: i for i, t in enumerate(ds_templates)}
by_step_unified = {}
for step in steps:
acc_mat = [[None for _ in CONFIGS] for _ in templates]
inv_mat = [[None for _ in CONFIGS] for _ in templates]
if step in ds["by_step"]:
old = ds["by_step"][step]
old_acc = old["accuracy"]
old_inv = old["invalid_rate"]
for ti, t in enumerate(templates):
if t not in ds_template_to_idx:
continue
oti = ds_template_to_idx[t]
for ci in range(len(CONFIGS)):
acc_mat[ti][ci] = old_acc[oti][ci]
inv_mat[ti][ci] = old_inv[oti][ci]
by_step_unified[step] = {"accuracy": acc_mat, "invalid_rate": inv_mat}
datasets_payload[ds_name] = {"by_step": by_step_unified}
payload = {
"configs": CONFIGS,
"templates": templates,
"steps": steps,
"datasets": datasets_payload,
}
html = f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<title>PandaEval12 Heatmaps</title>
<style>
:root {{
--bg: #0b0f14;
--card: rgba(255,255,255,0.06);
--card2: rgba(255,255,255,0.08);
--stroke: rgba(255,255,255,0.10);
--text: rgba(255,255,255,0.92);
--muted: rgba(255,255,255,0.70);
--muted2: rgba(255,255,255,0.55);
--shadow: 0 10px 30px rgba(0,0,0,0.35);
--radius: 16px;
}}
body {{
margin: 0;
background: radial-gradient(1000px 500px at 15% 0%, rgba(120,80,255,0.18), transparent 60%),
radial-gradient(900px 500px at 85% 0%, rgba(40,200,160,0.16), transparent 65%),
var(--bg);
color: var(--text);
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial;
}}
/* Wider, slightly tighter vertically */
.wrap {{ max-width: 1560px; margin: 16px auto; padding: 0 14px 16px; }}
.header {{
display:flex; align-items:flex-end; justify-content:space-between; gap: 16px; flex-wrap:wrap;
margin-bottom: 10px;
}}
h1 {{ margin: 0; font-size: 16px; letter-spacing: 0.2px; }}
.sub {{ color: var(--muted2); font-size: 11px; margin-top: 2px; }}
.chips {{ display:flex; gap: 8px; flex-wrap:wrap; }}
.chip {{
font-size: 10px; color: var(--muted); border: 1px solid var(--stroke);
padding: 5px 9px; border-radius: 999px; background: rgba(255,255,255,0.04);
}}
.grid {{
display:grid;
grid-template-columns: 1fr 1fr;
gap: 12px;
}}
.card {{
background: linear-gradient(180deg, rgba(255,255,255,0.08), rgba(255,255,255,0.05));
border: 1px solid var(--stroke);
border-radius: var(--radius);
box-shadow: var(--shadow);
overflow: hidden;
}}
.cardHead {{
display:flex; justify-content:space-between; align-items:center;
padding: 10px 12px;
border-bottom: 1px solid rgba(255,255,255,0.08);
}}
.title {{
font-size: 13px;
color: var(--text);
display:flex; gap:10px; align-items:center;
}}
.badge {{
font-size: 10px; color: rgba(255,255,255,0.72);
border: 1px solid rgba(255,255,255,0.12);
padding: 3px 8px; border-radius: 999px;
background: rgba(0,0,0,0.18);
}}
.body {{
padding: 8px 10px 10px;
}}
.svgWrap {{
width: 100%;
overflow: auto;
border-radius: 12px;
background: rgba(0,0,0,0.18);
border: 1px solid rgba(255,255,255,0.08);
}}
.controls {{
margin-top: 10px;
background: rgba(255,255,255,0.06);
border: 1px solid var(--stroke);
border-radius: var(--radius);
padding: 10px 12px;
box-shadow: var(--shadow);
display:flex; gap: 12px; align-items:center; flex-wrap:wrap;
}}
.mono {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
.lbl {{ font-size: 12px; color: var(--muted); }}
input[type="range"] {{
width: min(980px, 100%);
accent-color: rgba(120,80,255,0.9);
}}
.right {{ margin-left:auto; color: var(--muted2); font-size: 11px; }}
/* Tooltip */
.tip {{
position: fixed;
pointer-events: none;
background: rgba(10,14,20,0.92);
border: 1px solid rgba(255,255,255,0.14);
color: rgba(255,255,255,0.92);
padding: 8px 10px;
border-radius: 12px;
box-shadow: 0 14px 40px rgba(0,0,0,0.40);
font-size: 12px;
transform: translate(10px, 10px);
z-index: 10;
max-width: 260px;
display: none;
}}
.tip .k {{ color: rgba(255,255,255,0.65); }}
.tip .v {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; }}
@media (max-width: 980px) {{
.grid {{ grid-template-columns: 1fr; }}
}}
</style>
</head>
<body>
<div class="wrap">
<div class="header">
<div>
<h1>PandaEval12 — Template × Config Heatmaps</h1>
<div class="sub">Standalone HTML (embedded data). Slider sweeps checkpoint steps.</div>
</div>
<div class="chips">
<div class="chip">Top: Accuracy</div>
<div class="chip">Bottom: Invalid rate</div>
<div class="chip">Rows: BASE, P*, R*, A*</div>
<div class="chip">Cols: A..I</div>
</div>
</div>
<div class="grid">
<div class="card">
<div class="cardHead">
<div class="title">Accuracy <span class="badge">PandaEval12_1</span></div>
<div class="lbl mono" id="acc1Legend"></div>
</div>
<div class="body"><div class="svgWrap" id="acc1"></div></div>
</div>
<div class="card">
<div class="cardHead">
<div class="title">Accuracy <span class="badge">PandaEval12_2</span></div>
<div class="lbl mono" id="acc2Legend"></div>
</div>
<div class="body"><div class="svgWrap" id="acc2"></div></div>
</div>
<div class="card">
<div class="cardHead">
<div class="title">Invalid rate <span class="badge">PandaEval12_1</span></div>
<div class="lbl mono" id="inv1Legend"></div>
</div>
<div class="body"><div class="svgWrap" id="inv1"></div></div>
</div>
<div class="card">
<div class="cardHead">
<div class="title">Invalid rate <span class="badge">PandaEval12_2</span></div>
<div class="lbl mono" id="inv2Legend"></div>
</div>
<div class="body"><div class="svgWrap" id="inv2"></div></div>
</div>
</div>
<div class="controls">
<div class="lbl"><b>Step</b>: <span id="stepLabel" class="mono">—</span></div>
<input id="stepSlider" type="range" min="0" max="0" value="0" step="1"/>
<div class="right mono" id="stepRange">—</div>
</div>
</div>
<div class="tip" id="tip"></div>
<script>
const DATA = {json.dumps(payload)};
// --- viridis-like palette ---
const STOPS = [
[0.00,[68,1,84]],
[0.25,[59,82,139]],
[0.50,[33,145,140]],
[0.75,[94,201,98]],
[1.00,[253,231,37]],
];
const clamp=(x,a,b)=>Math.max(a,Math.min(b,x));
function color(t){{
t=clamp(t,0,1);
for(let i=0;i<STOPS.length-1;i++){{
const [ta,ca]=STOPS[i], [tb,cb]=STOPS[i+1];
if(t>=ta && t<=tb){{
const u=(t-ta)/((tb-ta)||1e-9);
const r=Math.round(ca[0]+(cb[0]-ca[0])*u);
const g=Math.round(ca[1]+(cb[1]-ca[1])*u);
const b=Math.round(ca[2]+(cb[2]-ca[2])*u);
return `rgb(${{r}},${{g}},${{b}})`;
}}
}}
const last=STOPS[STOPS.length-1][1];
return `rgb(${{last[0]}},${{last[1]}},${{last[2]}})`;
}}
function svgHeatmap({{
titleLegendEl,
containerId,
matrix,
rowLabels,
colLabels,
vmin,
vmax,
formatFn,
panelName,
}}){{
const container=document.getElementById(containerId);
container.innerHTML="";
// Layout (wider + shorter)
const cellW=60, cellH=22;
const padL=80, padT=10, padR=84, padB=56;
const rows=rowLabels.length, cols=colLabels.length;
const w=padL + cols*cellW + padR;
const h=padT + rows*cellH + padB;
titleLegendEl.textContent = `${{vmin.toFixed(2)}} … ${{vmax.toFixed(2)}}`;
// Build SVG
const svgNS="http://www.w3.org/2000/svg";
const svg=document.createElementNS(svgNS,"svg");
svg.setAttribute("width", w);
svg.setAttribute("height", h);
svg.setAttribute("viewBox", `0 0 ${{w}} ${{h}}`);
// Background
const bg=document.createElementNS(svgNS,"rect");
bg.setAttribute("x",0); bg.setAttribute("y",0);
bg.setAttribute("width",w); bg.setAttribute("height",h);
bg.setAttribute("fill","rgba(0,0,0,0)");
svg.appendChild(bg);
// Grid cells
for(let r=0;r<rows;r++){{
for(let c=0;c<cols;c++){{
const v=matrix[r][c];
const x=padL + c*cellW;
const y=padT + r*cellH;
const rect=document.createElementNS(svgNS,"rect");
rect.setAttribute("x",x);
rect.setAttribute("y",y);
rect.setAttribute("width",cellW);
rect.setAttribute("height",cellH);
if(Number.isFinite(v)){{
const t=(v-vmin)/((vmax-vmin)||1e-9);
rect.setAttribute("fill", color(t));
}} else {{
rect.setAttribute("fill","rgba(255,255,255,0.08)");
}}
rect.setAttribute("stroke","rgba(255,255,255,0.10)");
rect.setAttribute("stroke-width","1");
// tooltip
rect.dataset.panel = panelName;
rect.dataset.row = rowLabels[r];
rect.dataset.col = colLabels[c];
rect.dataset.val = Number.isFinite(v) ? String(v) : "NA";
svg.appendChild(rect);
// value text
if(Number.isFinite(v)){{
const txt=document.createElementNS(svgNS,"text");
txt.setAttribute("x", x+6);
txt.setAttribute("y", y+15);
txt.setAttribute("fill","rgba(0,0,0,0.85)");
txt.setAttribute("font-size","11");
txt.setAttribute("font-family","ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace");
txt.textContent = formatFn(v);
svg.appendChild(txt);
}}
}}
}}
// Row labels
for(let r=0;r<rows;r++){{
const t=document.createElementNS(svgNS,"text");
t.setAttribute("x", 12);
t.setAttribute("y", padT + r*cellH + 15);
t.setAttribute("fill","rgba(255,255,255,0.82)");
t.setAttribute("font-size","11");
t.setAttribute("font-family","ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace");
t.textContent = rowLabels[r];
svg.appendChild(t);
}}
// Col labels (rotated 30°)
for(let c=0;c<cols;c++){{
const t=document.createElementNS(svgNS,"text");
const x = padL + c*cellW + 18;
const y = padT + rows*cellH + 22;
t.setAttribute("fill","rgba(255,255,255,0.82)");
t.setAttribute("font-size","11");
t.setAttribute("font-family","ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace");
t.setAttribute("transform", `translate(${{x}},${{y}}) rotate(-30)`);
t.textContent = colLabels[c];
svg.appendChild(t);
}}
// Colorbar
const cbX = padL + cols*cellW + 28;
const cbY = padT;
const cbW = 14;
const cbH = rows*cellH;
// Gradient def
const defs=document.createElementNS(svgNS,"defs");
const grad=document.createElementNS(svgNS,"linearGradient");
grad.setAttribute("id", `grad_${{containerId}}`);
grad.setAttribute("x1","0"); grad.setAttribute("y1","1");
grad.setAttribute("x2","0"); grad.setAttribute("y2","0");
for(const [tt,cc] of STOPS){{
const stop=document.createElementNS(svgNS,"stop");
stop.setAttribute("offset", `${{Math.round(tt*100)}}%`);
stop.setAttribute("stop-color", `rgb(${{cc[0]}},${{cc[1]}},${{cc[2]}})`);
grad.appendChild(stop);
}}
defs.appendChild(grad);
svg.appendChild(defs);
const cb=document.createElementNS(svgNS,"rect");
cb.setAttribute("x", cbX);
cb.setAttribute("y", cbY);
cb.setAttribute("width", cbW);
cb.setAttribute("height", cbH);
cb.setAttribute("fill", `url(#grad_${{containerId}})`);
cb.setAttribute("stroke","rgba(255,255,255,0.12)");
svg.appendChild(cb);
const cbTop=document.createElementNS(svgNS,"text");
cbTop.setAttribute("x", cbX + cbW + 8);
cbTop.setAttribute("y", cbY + 10);
cbTop.setAttribute("fill","rgba(255,255,255,0.72)");
cbTop.setAttribute("font-size","11");
cbTop.setAttribute("font-family","ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace");
cbTop.textContent = vmax.toFixed(2);
svg.appendChild(cbTop);
const cbBot=document.createElementNS(svgNS,"text");
cbBot.setAttribute("x", cbX + cbW + 8);
cbBot.setAttribute("y", cbY + cbH);
cbBot.setAttribute("fill","rgba(255,255,255,0.72)");
cbBot.setAttribute("font-size","11");
cbBot.setAttribute("font-family","ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace");
cbBot.textContent = vmin.toFixed(2);
svg.appendChild(cbBot);
container.appendChild(svg);
}}
const tip = document.getElementById("tip");
function showTip(e){{
const t = e.target;
if(!t || !t.dataset || t.dataset.val === undefined) return;
const val = t.dataset.val;
tip.style.display = "block";
tip.innerHTML = `
<div><span class="k">Panel:</span> <span class="v">${{t.dataset.panel}}</span></div>
<div><span class="k">Config:</span> <span class="v">${{t.dataset.col}}</span></div>
<div><span class="k">Template:</span> <span class="v">${{t.dataset.row}}</span></div>
<div><span class="k">Value:</span> <span class="v">${{val === "NA" ? "NA" : Number(val).toFixed(4)}}</span></div>`;
tip.style.left = e.clientX + "px";
tip.style.top = e.clientY + "px";
}}
function moveTip(e) {{
if (tip.style.display !== "block") return;
tip.style.left = e.clientX + "px";
tip.style.top = e.clientY + "px";
}}
function hideTip() {{
tip.style.display = "none";
}}
document.addEventListener("mousemove", moveTip);
document.addEventListener("mouseover", showTip);
document.addEventListener("mouseout", (e)=>{{
if(e.target && e.target.dataset && e.target.dataset.val !== undefined) hideTip();
}});
function render(stepIndex){{
const steps = DATA.steps;
const step = steps[stepIndex];
document.getElementById("stepLabel").textContent = String(step);
const templates = DATA.templates;
const configs = DATA.configs;
const d1 = DATA.datasets["PandaEval12_1"].by_step[step];
const d2 = DATA.datasets["PandaEval12_2"].by_step[step];
svgHeatmap({{
titleLegendEl: document.getElementById("acc1Legend"),
containerId: "acc1",
matrix: d1.accuracy,
rowLabels: templates,
colLabels: configs,
vmin: 0, vmax: 1,
formatFn: v => v.toFixed(2),
panelName: "Accuracy / PandaEval12_1",
}});
svgHeatmap({{
titleLegendEl: document.getElementById("acc2Legend"),
containerId: "acc2",
matrix: d2.accuracy,
rowLabels: templates,
colLabels: configs,
vmin: 0, vmax: 1,
formatFn: v => v.toFixed(2),
panelName: "Accuracy / PandaEval12_2",
}});
svgHeatmap({{
titleLegendEl: document.getElementById("inv1Legend"),
containerId: "inv1",
matrix: d1.invalid_rate,
rowLabels: templates,
colLabels: configs,
vmin: 0, vmax: 1,
formatFn: v => v.toFixed(2),
panelName: "Invalid rate / PandaEval12_1",
}});
svgHeatmap({{
titleLegendEl: document.getElementById("inv2Legend"),
containerId: "inv2",
matrix: d2.invalid_rate,
rowLabels: templates,
colLabels: configs,
vmin: 0, vmax: 1,
formatFn: v => v.toFixed(2),
panelName: "Invalid rate / PandaEval12_2",
}});
}}
(function init(){{
const steps = DATA.steps;
const slider = document.getElementById("stepSlider");
slider.min = 0;
slider.max = Math.max(0, steps.length - 1);
slider.value = Math.max(0, steps.length - 1);
document.getElementById("stepRange").textContent = `${{steps[0]}} … ${{steps[steps.length-1]}}`;
slider.addEventListener("input", () => render(parseInt(slider.value, 10)));
render(parseInt(slider.value, 10));
}})();
</script>
</body>
</html>
"""
OUT_HTML.write_text(html, encoding="utf-8")
print(f"Wrote: {OUT_HTML}")
if __name__ == "__main__":
main()