// ProcGrep explorer frontend. Talks to the FastAPI backend (/datasets, /query); // no embedded data, so it queries whole datasets server-side rather than a sample. // Default (index 0) is a high-hit, behaviorally interesting pattern so first // contact is not an empty result. Patterns run over the raw spine, which keeps // think/other, so literally-consecutive action patterns are rare. const SAMPLES = [ { label: "submitted without testing", pat: "^(?:(?!run_test).)*submit" }, { label: "never searched the repo", pat: "^(?:(?!search_repo).)*$" }, { label: "stuck reading", pat: "(read_file (?:think )?){4,}" }, { label: "edit-streak ≥5", pat: "(edit (?:think |other )?){5,}" }, { label: "recovered from an error", pat: "error (?:think |other )?edit" }, ]; const $ = (id) => document.getElementById(id); function prettyModel(a){let m=String(a).replace(/^(swe-agent|agentless|moatless|dars|mini-swe-agent)[-+]/i,''); m=m.replace(/llama-?(\d+)b/i,(_,n)=>'Llama '+n+'B').replace(/\bgpt-?4o\b/i,'GPT-4o').replace(/\bgpt-?4\b/i,'GPT-4') .replace(/claude-?([\d.]+)([a-z-]*)/i,(_,v,sfx)=>'Claude '+v+(sfx?' '+sfx.replace(/-/g,' ').trim():'')) .replace(/deepseek-?(v?\d+|r\d+)/i,(_,v)=>'DeepSeek '+v.toUpperCase()); return (m.replace(/-/g,' ').trim())||String(a);} function prettyTask(t){if(!t)return '';const m=String(t).match(/^(.+?)__(.+?)-(\d+)$/);return m?`${m[1]}/${m[2]} #${m[3]}`:String(t);} let HIDE_NOISE = false; const fmtTok = (n) => (n >= 1000 ? (n / 1000).toFixed(1) + "k" : String(n || 0)); let DSETS = []; // store-backed dataset ids (shared by query + compare) let OUTCOME_DS = []; // datasets that carry a genuine resolution label let QTIMER; // debounce handle for live-as-you-type let QHITS = [], QCOLOR = {}, QNH = 0, QPAT = ""; // current query hits, for click + paginate const CMP = { axis: "agent", ds: null, left: null, right: null, data: null }; // Brief provenance: link the selected dataset to its Hugging Face source page. function setSource(ds) { const a = $("dsource"); if (!a) return; a.href = "https://huggingface.co/datasets/" + ds; a.textContent = "source: " + ds + " ↗"; } async function loadDatasets() { const d = await (await fetch("datasets")).json(); DSETS = d.suggested; OUTCOME_DS = d.outcome_datasets || []; CMP.ds = DSETS[0]; $("ds").innerHTML = d.suggested.map((s) => ``).join(""); $("trychips").innerHTML = SAMPLES.map((s, i) => `${s.label}`).join(""); setSource(DSETS[0]); } // Collapse runs of think/other into a gap marker; run-length the signal atoms. // hi (optional) is an [startAtom, endAtom] index range to outline as the match. function spine(atoms, color, hi) { const runs = []; let idx = 0; for (const a of atoms) { const noise = a === "think" || a === "other"; const t = runs[runs.length - 1]; if (noise) { if (t && t.gap) { t.n++; t.end = idx; } else runs.push({ gap: true, n: 1, start: idx, end: idx }); } else { if (t && t.a === a) { t.n++; t.end = idx; } else runs.push({ a, n: 1, start: idx, end: idx }); } idx++; } const lit = (it) => (hi && it.end >= hi[0] && it.start <= hi[1]) ? " hit" : ""; return '' + runs.slice(0, 90).map((it) => it.gap ? `` : `` ).join("") + ""; } // Locate the matched atom span in a hit, for highlighting. Returns [start,end] // atom indices or null. Best-effort over the rendered (capped) atom list. function matchSpan(atoms, pattern) { try { const sp = atoms.join(" ") + " "; const m = new RegExp(pattern).exec(sp); if (!m || !m[0].trim()) return null; const start = sp.slice(0, m.index).split(" ").length - 1; const len = m[0].trim().split(/\s+/).length; return [start, start + len - 1]; } catch { return null; } } function mixbar(mix, color) { return '' + Object.entries(mix).sort((a, b) => b[1] - a[1]).map( ([a, v]) => `` ).join("") + ""; } async function run(pattern) { const ds = $("ds").value; $("res").innerHTML = 'scanning the full dataset on the server…'; $("q").value = pattern; let r; try { r = await (await fetch("query", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ dataset: ds, pattern }), })).json(); } catch (e) { $("res").innerHTML = `request failed: ${e}`; return; } if (r.error) { $("res").innerHTML = `${r.error}`; return; } $("dsmeta").textContent = `· ${r.n_traces} traces${r.truncated ? " (capped)" : ""}`; QHITS = r.hits; QCOLOR = r.atom_color; QNH = r.n_hits; QPAT = r.pattern; const col = r.atom_color; const top = r.by_model[0]; const s = r.stats || {}; const statline = s.n_models ? `
diversity ${s.diversity_bits} bits · median ${s.median_len} steps · CoT ${s.median_cot} steps${s.median_cot_tokens ? `, ~${fmtTok(s.median_cot_tokens)} tokens` : ""} · ${(s.exact_dup_rate * 100).toFixed(0)}% exact-duplicate · ${s.n_models} models
` : ""; $("res").innerHTML = `
${r.n_hits} / ${r.n_traces} traces match /${r.pattern}/
scanned in ${r.elapsed_ms} ms, no model call${top ? ` · ${prettyModel(top.model)} most affected at ${(top.rate * 100).toFixed(0)}%` : ""}
${statline}
which models
${r.by_model.map((m) => `
${prettyModel(m.model)}${(m.rate * 100).toFixed(0)}%
`).join("")}
action mix · matched vs. all
matched${r.n_hits ? mixbar(r.mix_hits, col) : 'no matches'}
all traces${mixbar(r.mix_all, col)}
matching traces · click to open · matched span outlined
`; renderHits(); } document.addEventListener("click", (e) => { const c = e.target.closest(".chip[data-i]"); if (c) run(SAMPLES[+c.dataset.i].pat); if (e.target.id === "noisetog") { HIDE_NOISE = !HIDE_NOISE; document.body.classList.toggle("hide-noise", HIDE_NOISE); } }); $("q").addEventListener("keydown", (e) => { if (e.key === "Enter") { clearTimeout(QTIMER); run($("q").value); } }); // Live-as-you-type: debounce, and only fire on a valid regex so partial patterns // (e.g. an open paren mid-type) never clobber the last results with an error. $("q").addEventListener("input", () => { clearTimeout(QTIMER); const v = $("q").value.trim(); if (!v) return; try { new RegExp(v); } catch { return; } QTIMER = setTimeout(() => run(v), 250); }); $("ds").addEventListener("change", () => { setSource($("ds").value); run($("q").value || SAMPLES[0].pat); }); // Free-text: query any HF dataset id (live-ingested on the server, slower first load). $("dsfree").addEventListener("keydown", (e) => { if (e.key !== "Enter") return; const v = e.target.value.trim(); if (!v) return; if (![...$("ds").options].some((o) => o.value === v)) { const o = document.createElement("option"); o.value = v; o.textContent = v + " · live"; $("ds").appendChild(o); } $("ds").value = v; setSource(v); e.target.value = ""; run($("q").value || SAMPLES[0].pat); }); // Comparator. // A trail-as-thread: collapse think/other runs into fold lines, render signal // atoms as labeled colored steps. The same skeleton the spine() bars draw, // expanded into a readable column. This is how we show a "conversation": // procgrep keeps the action structure, not the raw text. function threadHTML(atoms, color) { const out = []; let i = 0; while (i < atoms.length) { const a = atoms[i]; if (a === "think" || a === "other") { let n = 0; while (i < atoms.length && (atoms[i] === "think" || atoms[i] === "other")) { n++; i++; } out.push(`
⋯ ${n} reasoning ${n > 1 ? "steps" : "step"}
`); } else { let n = 1; while (i + n < atoms.length && atoms[i + n] === a) n++; out.push(`
${i + 1}${a}${n > 1 ? " ×" + n : ""}
`); i += n; } } return out.join(""); } // A contiguous trajectory barcode: one cell per atom, reasoning pale and // actions vivid, so trail length is proportional and behavior reads as color // density. Used by the comparator (vs the collapsing spine() the query uses). // hi (optional) is an [start,end] atom range; cells outside it dim so the // matched span pops. Noise cells get class nz so "hide think/other" can drop them. function barcode(atoms, color, hi) { return '' + atoms.map((a, idx) => { const noise = a === "think" || a === "other"; const dim = hi && (idx < hi[0] || idx > hi[1]) ? ";opacity:.3" : ""; return ``; }).join("") + ""; } // Open any trace as a thread sheet. Shared by the comparator trails and the // query hits, so "select a trace to inspect" works the same everywhere. // Present-tense phrase per atom, for the terminal-style replay lines. const ATOM_PHRASE = { search_repo: "grepped the repo", read_file: "opened a file", edit: "edited code", create_file: "created a file", delete_file: "deleted a file", run_test: "ran the tests", submit: "submitted the patch", localize: "localized the fault", error: "hit an error", think: "reasoning", other: "other", }; // Active replay state for the open sheet. One sheet at a time. let RP = null; function rpMini(atoms, n, color) { return '' + atoms.map((a, idx) => { const noise = a === "think" || a === "other"; return ``; }).join("") + ""; } function rpTerminal(atoms, n, color) { const out = []; let i = 0; while (i <= n && i < atoms.length) { const a = atoms[i]; if (a === "think" || a === "other") { let k = 0; while (i <= n && i < atoms.length && (atoms[i] === "think" || atoms[i] === "other")) { k++; i++; } out.push(`
┄ ${k} reasoning ${k > 1 ? "steps" : "step"}
`); } else { const now = i === n ? " rpnow" : ""; out.push(`
${i + 1}${ATOM_PHRASE[a] || a}${now ? ' ' : ""}
`); i++; } } return out.join(""); } function rpFire() { if (!RP) return; const pat = $("rp-q").value.trim(), el = $("rp-fire"); if (!pat) { el.textContent = ""; return; } let rx; try { rx = new RegExp(pat); } catch { el.innerHTML = ''; return; } const sp = RP.atoms.slice(0, RP.n + 1).join(" ") + " "; const m = rx.exec(sp); el.innerHTML = m ? `● fired at step ${sp.slice(0, m.index).split(" ").length}` : 'no match yet'; } function rpDraw() { $("rp-mini").innerHTML = rpMini(RP.atoms, RP.n, RP.color); $("rp-term").innerHTML = rpTerminal(RP.atoms, RP.n, RP.color); $("rp-seek").value = RP.n; $("rp-step").textContent = `${RP.n + 1} / ${RP.max + 1}`; rpFire(); const tm = $("rp-term"); if (tm) tm.scrollTop = tm.scrollHeight; } function rpPause() { if (!RP) return; RP.playing = false; if (RP.timer) { clearInterval(RP.timer); RP.timer = null; } const b = $("rp-play"); if (b) b.textContent = "▶ play"; } function rpToggle() { if (!RP) return; if (RP.playing) { rpPause(); return; } if (RP.n >= RP.max) RP.n = 0; RP.playing = true; $("rp-play").textContent = "❚❚ pause"; RP.timer = setInterval(() => { if (RP.n < RP.max) { RP.n++; rpDraw(); } else { rpPause(); } }, RP.speed); } function rpSeek(v) { rpPause(); RP.n = +v; rpDraw(); } function rpSpeed(ms) { if (ms === 0) { rpPause(); RP.n = RP.max; rpDraw(); return; } // instant: jump to end const was = RP.playing; RP.speed = ms; rpPause(); if (was) rpToggle(); } function rpClose() { rpPause(); RP = null; const s = document.querySelector(".sheet"); if (s) s.remove(); } function openTraceData(t, ds, color) { rpPause(); const src = `https://huggingface.co/datasets/${ds}`; const prob = t.task || t.trace_id || ""; const oc = t.outcome ? ` ${t.outcome}` : ""; const max = t.atoms.length - 1; RP = { atoms: t.atoms, color, n: max, max, playing: false, timer: null, speed: 380 }; const sheet = document.createElement("div"); sheet.className = "sheet"; sheet.onclick = (e) => { if (e.target === sheet) rpClose(); }; sheet.innerHTML = `
close ✕
${prettyModel(t.model)}${prob ? ` · ${prettyTask(prob)}` : ""}${oc}
${t.steps ?? t.atoms.length} steps${t.cot_tokens ? ` · ~${fmtTok(t.cot_tokens)} reasoning tokens` : ""} · structural trail, no raw text stored · press play to replay
▶ play 1x 4x end
live query
view full trace at source ↗ · ${ds}
`; document.body.appendChild(sheet); rpDraw(); } function openTrace(side, i) { const r = CMP.data; if (!r) return; const ds = CMP.axis === "eval" ? (side === "left" ? CMP.left : CMP.right) : CMP.ds; openTraceData(r[side].trails[i], ds, r.atom_color); } function openQHit(i) { openTraceData(QHITS[i], $("ds").value, QCOLOR); } // Render the matched-trace list with a "show more" control so the full match // set is browsable, not just the first server page. function renderHits() { const f = ($("qfilter") && $("qfilter").value || "").trim().toLowerCase(); const s = ($("qsort") && $("qsort").value) || "found"; const rows = QHITS.map((h, i) => [h, i]).filter(([h]) => !f || (h.task || h.trace_id || "").toLowerCase().includes(f)); const len = (h) => h.steps ?? h.atoms.length; const prob = (h) => h.task || h.trace_id || ""; if (s === "len-desc") rows.sort((a, b) => len(b[0]) - len(a[0])); else if (s === "len-asc") rows.sort((a, b) => len(a[0]) - len(b[0])); else if (s === "model") rows.sort((a, b) => prettyModel(a[0].model).localeCompare(prettyModel(b[0].model))); else if (s === "outcome") rows.sort((a, b) => (a[0].outcome || "~").localeCompare(b[0].outcome || "~")); else if (s === "prob") rows.sort((a, b) => prob(a[0]).localeCompare(prob(b[0]))); $("qhits").innerHTML = rows.map(([h, i]) => { const prob = h.task || h.trace_id || ""; const oc = h.outcome ? `${h.outcome}` : ""; return `
${prettyModel(h.model)}${prettyTask(prob)}${oc}${h.steps ?? h.atoms.length} steps${barcode(h.atoms, QCOLOR, matchSpan(h.atoms, QPAT))}
`; }).join(""); const left = QNH - QHITS.length; let more = ""; if (f) more += `
${rows.length} of ${QHITS.length} loaded match "${f}"
`; if (left > 0) more += `show more · ${QHITS.length} of ${QNH}`; else if (QNH && !f) more += `
showing all ${QNH}
`; $("qmore").innerHTML = more; } async function moreHits() { let r; try { r = await (await fetch("query", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ dataset: $("ds").value, pattern: QPAT, offset: QHITS.length }), })).json(); } catch { return; } if (r.hits && r.hits.length) { QHITS = QHITS.concat(r.hits); renderHits(); } } function trailStack(side) { const r = CMP.data, col = r[side], c = r.atom_color; const s = col.stats; const ds = CMP.axis === "eval" ? (side === "left" ? CMP.left : CMP.right) : CMP.ds; return `
${prettyModel(col.label)} source ↗
${col.n.toLocaleString()} traces · median ${s.median_len} steps · ${s.median_cot} reasoning${s.median_cot_tokens ? `, ~${fmtTok(s.median_cot_tokens)} tok` : ""} · diversity ${s.diversity_bits} bits
${col.trails.map((t, i) => { const prob = t.task || t.trace_id || ""; return `
${t.steps} st${prettyTask(prob)}${barcode(t.atoms, c)}
`; }).join("")}
`; } function diffStrip() { const r = CMP.data, c = r.atom_color, d = r.diff; const L = prettyModel(r.left.label), R = prettyModel(r.right.label); const procs = (d.procedures || []).slice(0, 7).map((p) => { const lean = p.log_odds >= 0 ? `${L} ◂` : `▸ ${R}`; return `
${lean}${barcode(p.atoms, c)}
`; }).join(""); return `
${d.jsd == null ? "—" : d.jsd}
action-mix JSD, 0 same to 1 disjoint
${d.len_delta > 0 ? "+" : ""}${d.len_delta}
median length, ${L} minus ${R}
${d.cot_delta > 0 ? "+" : ""}${d.cot_delta}
reasoning steps, same direction
procedures that most separate them
${procs || 'no distinguishing procedures found'}
◂ leans ${L} · leans ${R} ▸ · think/other runs collapse to gaps
`; } async function runCompare() { if (!CMP.left || !CMP.right) return; $("cmp-res").innerHTML = 'diffing the two groups on the server…'; let r; try { r = await (await fetch("compare", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ axis: CMP.axis, dataset: CMP.ds, left: CMP.left, right: CMP.right }), })).json(); } catch (e) { $("cmp-res").innerHTML = `request failed: ${e}`; return; } if (r.error) { $("cmp-res").innerHTML = `${r.error}`; return; } CMP.data = r; $("cmp-res").innerHTML = `${diffStrip()}
${trailStack("left")}${trailStack("right")}
`; } function opt(sel, list, val) { return ``; } async function renderCmpControls() { const box = $("cmp-controls"); if (CMP.axis === "eval") { if (!DSETS.includes(CMP.left) || CMP.left === CMP.right) { CMP.left = DSETS[0]; CMP.right = DSETS[1]; } box.innerHTML = `${opt("CMP.left=this.value;runCompare()", DSETS, CMP.left)} vs${opt("CMP.right=this.value;runCompare()", DSETS, CMP.right)}`; runCompare(); } else if (CMP.axis === "outcome") { if (!OUTCOME_DS.length) { box.innerHTML = 'no dataset in the store carries a resolved label'; $("cmp-res").innerHTML = 'The outcome axis needs a genuine resolved/unresolved label, which only some datasets provide.'; return; } if (!OUTCOME_DS.includes(CMP.ds)) CMP.ds = OUTCOME_DS[0]; CMP.left = "resolved"; CMP.right = "unresolved"; box.innerHTML = `${opt("CMP.ds=this.value;renderCmpControls()", OUTCOME_DS, CMP.ds)} · resolved vs unresolved`; runCompare(); } else { box.innerHTML = `${opt("CMP.ds=this.value;renderCmpControls()", DSETS, CMP.ds)} · loading agents…`; const g = await (await fetch(`groups?dataset=${encodeURIComponent(CMP.ds)}`)).json(); const agents = (g.agents || []).map((a) => a.agent); if (agents.length < 2) { $("agentpick").innerHTML = 'only one agent in this dataset; pick another or use by eval'; return; } CMP.left = agents[0]; CMP.right = agents[1]; $("agentpick").innerHTML = `${opt("CMP.left=this.value;runCompare()", agents, CMP.left)} vs${opt("CMP.right=this.value;runCompare()", agents, CMP.right)}`; runCompare(); } } function setAxis(axis) { CMP.axis = axis; $("seg-agent").classList.toggle("act", axis === "agent"); $("seg-eval").classList.toggle("act", axis === "eval"); $("seg-outcome").classList.toggle("act", axis === "outcome"); renderCmpControls(); } let CMP_INIT = false; function showView(name, axis) { $("view-query").classList.toggle("hidden", name !== "query"); $("view-compare").classList.toggle("hidden", name !== "compare"); $("tab-query").classList.toggle("act", name === "query"); $("tab-compare").classList.toggle("act", name === "compare"); if (name === "compare") { if (!CMP_INIT) { CMP_INIT = true; setAxis(axis || "outcome"); } else if (axis) setAxis(axis); } } loadDatasets().then(() => { run(SAMPLES[0].pat); // deep-link: #compare or #compare:agent|eval|outcome const h = (location.hash || "").replace("#", ""); if (h.startsWith("compare")) showView("compare", h.split(":")[1]); });