Spaces:
Running on Zero
Running on Zero
| """guidance.py — Guides (per-turn advice) + Findings (graph-level claims). | |
| NON-NEGOTIABLE #6: the advisor stays SILENT unless a NAMED, FIXABLE pattern fires. | |
| Cost alone is never advice. A turn gets a Guide ONLY when: | |
| - a re-read pattern fired → kind 'reread' | |
| - a real retry loop fired → kind 'loop' | |
| - heavy AND over the absolute cost → kind 'big' ("expensive because big, not | |
| floor, with NO churn wasteful; checkpoint to cap, but don't | |
| restart and lose context") | |
| Otherwise `guide` stays None. "Expensive but clean" is a valid, important output — | |
| and a cheap turn (a query/reply, a failed round-trip) gets NO 'big' card at all: | |
| the absolute-cost gate stops the relative top-N rank from faking expense (#6). | |
| NON-NEGOTIABLE #4/#7: Findings separate PROVEN from HYPOTHESIS. A value-flow edge | |
| (a distinctive value reappears verbatim) is `proven: true`. Temporal proximity is a | |
| hypothesis (`proven: false`). The guide/finding text SUGGESTS, never asserts a fix. | |
| Pure code, NO model. (The narrator turns these into prose later; it never invents | |
| new findings.) | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import asdict | |
| from typing import Any, Optional | |
| from engine.core.best_practices import practice_for, practices_for | |
| from engine.core.loops import TurnLoops | |
| from engine.core.rereads import Reread | |
| # Per-turn guide kinds that map to a fixable best practice. 'big' is EXCLUDED on | |
| # purpose: it fires only on heavy-with-no-churn ("expensive because big, not | |
| # wasteful"), so surfacing it as something to "do better" would manufacture advice | |
| # on a clean turn (build rule #6). | |
| _GUIDE_TO_SIGNAL = {"loop": "loop", "reread": "reread"} | |
| # --------------------------------------------------------------------------- # | |
| # Guides (attached to a turn ONLY when a pattern fires) | |
| # --------------------------------------------------------------------------- # | |
| def build_guide( | |
| turn, | |
| rereads: Optional[list[Reread]], | |
| loops: Optional[TurnLoops], | |
| ) -> Optional[dict[str, str]]: | |
| """Return a Guide dict for a turn, or None (silence) if no pattern fires. | |
| Priority when several could apply: a real loop (correctness) > re-read | |
| (wasted work) > big (informational). At most one guide per turn. | |
| """ | |
| # 1) real retry loop — the strongest, most actionable signal | |
| if loops and loops.loops: | |
| lp = max(loops.loops, key=lambda x: x.count) | |
| head = f"Retry loop: same command ran {lp.count}x, {lp.errored} errored" | |
| body = ( | |
| "The exact same command was re-run after it errored. Worth checking " | |
| "whether the command needs a fix (quoting, path, missing arg) rather " | |
| "than another retry." | |
| ) | |
| return {"kind": "loop", "head": head, "body": body} | |
| # 2) re-read — the same file opened >= 3x in one turn | |
| if rereads: | |
| rr = rereads[0] | |
| head = f"Re-read: {rr.file} opened {rr.count}x in this turn" | |
| body = ( | |
| f"{rr.file} was read {rr.count} times here. If the content is stable, " | |
| "reading it once and keeping it in context would avoid the repeat token " | |
| "cost — worth a look." | |
| ) | |
| return {"kind": "reread", "head": head, "body": body} | |
| # 3) heavy with no churn → 'big' (expensive because big, not wasteful). | |
| # Gate on the ABSOLUTE cost floor (overBudget), not merely the relative top-N | |
| # rank: in a tiny/cheap session every turn is "top-N", so rank alone would | |
| # paste "expensive because big" onto a turn that cost ~nothing — a single | |
| # query/reply, a failed auth round-trip. That is exactly the "cost alone is | |
| # never advice" rule (#6): calling a cheap turn expensive is simply false. | |
| has_churn = bool(rereads) or bool(loops and loops.loops) | |
| if turn.heavy and getattr(turn, "overBudget", False) and not has_churn: | |
| head = "Expensive because big, not wasteful" | |
| body = ( | |
| "This is one of the heaviest turns by cache-read, but there's no loop or " | |
| "re-read churn driving it — the cost is the size of the work, not waste. " | |
| "If you want to cap spend, checkpoint here; don't restart the session and " | |
| "lose the accumulated context." | |
| ) | |
| return {"kind": "big", "head": head, "body": body} | |
| return None | |
| # --------------------------------------------------------------------------- # | |
| # Findings (graph-level claims; proven vs hypothesis ALWAYS separated) | |
| # --------------------------------------------------------------------------- # | |
| def build_findings( | |
| turns, | |
| rereads_by_turn: dict[int, list[Reread]], | |
| loops_by_turn: dict[int, TurnLoops], | |
| heavy_indices: list[int], | |
| tool_clusters: Optional[list[dict[str, Any]]] = None, | |
| ) -> list[dict[str, Any]]: | |
| """Build Finding objects. `proven` is True ONLY for value-flow edges. | |
| Finding = { id, kind, severity, nodes:[id], edges:[id], proven:bool, text } | |
| """ | |
| findings: list[dict[str, Any]] = [] | |
| fid = 0 | |
| def _next_id() -> str: | |
| nonlocal fid | |
| fid += 1 | |
| return f"F{fid}" | |
| # --- PROVEN: value-flow edges (a distinctive value reappeared verbatim) --- # | |
| for t in turns: | |
| for tc in t.tools: | |
| if tc.provenance == "indirect" and tc.flowValue: | |
| findings.append( | |
| { | |
| "id": _next_id(), | |
| "kind": "value_flow", | |
| "severity": "info", | |
| "nodes": [tc.id] if tc.id else [], | |
| "edges": [], | |
| "proven": True, # value reappeared verbatim — asserted | |
| "text": ( | |
| f"Turn {t.i}: {tc.name} used '{tc.flowValue}', which first " | |
| f"appeared in an earlier {tc.sourceTool} result — a proven " | |
| f"value-flow (agent-driven, not from the human's prompt)." | |
| ), | |
| } | |
| ) | |
| # --- HYPOTHESIS / NAMED patterns: loops, re-reads, heavy ---------------- # | |
| for ti, tl in sorted(loops_by_turn.items()): | |
| for lp in tl.loops: | |
| findings.append( | |
| { | |
| "id": _next_id(), | |
| "kind": "loop", | |
| "severity": "warn", | |
| "nodes": [], | |
| "edges": [], | |
| "proven": True, # exact-cmd repeat + error is observed, not inferred | |
| "text": ( | |
| f"Turn {ti}: the same command ran {lp.count}x with " | |
| f"{lp.errored} errored — looks like a retry loop worth checking." | |
| ), | |
| } | |
| ) | |
| for ti, rrs in sorted(rereads_by_turn.items()): | |
| for rr in rrs: | |
| findings.append( | |
| { | |
| "id": _next_id(), | |
| "kind": "reread", | |
| "severity": "warn", | |
| "nodes": [], | |
| "edges": [], | |
| "proven": True, # the >=3 reads are counted, not inferred | |
| "text": ( | |
| f"Turn {ti}: {rr.file} was read {rr.count}x — a re-read pattern; " | |
| "caching it in context could avoid the repeat cost." | |
| ), | |
| } | |
| ) | |
| for ti in heavy_indices: | |
| t = turns[ti] | |
| churn = bool(rereads_by_turn.get(ti)) or bool( | |
| loops_by_turn.get(ti) and loops_by_turn[ti].loops | |
| ) | |
| findings.append( | |
| { | |
| "id": _next_id(), | |
| "kind": "heavy", | |
| "severity": "info", | |
| "nodes": [], | |
| "edges": [], | |
| "proven": True, # cacheRead ranking is computed, not inferred | |
| "text": ( | |
| f"Turn {ti} is among the top-3 by cache-read " | |
| f"({t.tokens.cacheRead:,} tokens)" | |
| + ( | |
| "; no loop/re-read churn — expensive because big, not wasteful." | |
| if not churn | |
| else "; see the loop/re-read finding on this turn." | |
| ) | |
| ), | |
| } | |
| ) | |
| # --- NAMED pattern: tool_cluster (CLI flailing, no skill) --------------- # | |
| # The call/error counts are OBSERVED (proven, like a loop); the fix is a cited | |
| # SUGGESTION (build rule #7). The Anthropic citation rides on the finding text. | |
| for c in tool_clusters or []: | |
| turns_str = ", ".join(f"turn {i}" for i in c.get("turns", [])) | |
| observed = ( | |
| f"Ran `{c['binary']}` {c['calls']}x ({c['errored']} errored) across " | |
| f"{turns_str} with no skill loaded for it" | |
| ) | |
| fix = c.get("fix") | |
| if fix: | |
| text = ( | |
| f"{observed}. {fix} " | |
| f"(Best practice: {c.get('practice', 'Use CLI tools / Create skills')} — " | |
| f"{c.get('source', '')})" | |
| ) | |
| else: | |
| # knowledge file absent — state the observation, suggest plainly, no citation | |
| text = ( | |
| f"{observed} — worth giving the agent that context up front " | |
| f"(a project skill or a service CLI) so it doesn't rediscover it by trial." | |
| ) | |
| findings.append( | |
| { | |
| "id": _next_id(), | |
| "kind": "tool_cluster", | |
| "severity": "warn", | |
| "nodes": c.get("toolIds", []), | |
| "edges": [], | |
| "proven": True, # the call/error counts are counted, not inferred | |
| "text": text, | |
| } | |
| ) | |
| return findings | |
| # --------------------------------------------------------------------------- # | |
| # Recommendations (session-level "what could have been better") | |
| # --------------------------------------------------------------------------- # | |
| def build_recommendations( | |
| turns, | |
| tool_clusters: Optional[list[dict[str, Any]]] = None, | |
| read_bursts: Optional[list[dict[str, Any]]] = None, | |
| unverified: Optional[dict[str, Any]] = None, | |
| near_repeats: Optional[list[dict[str, Any]]] = None, | |
| unloaded_mcp: Optional[dict[str, Any]] = None, | |
| npx_unpinned: Optional[dict[str, Any]] = None, | |
| ) -> list[dict[str, Any]]: | |
| """Abstract the fired, FIXABLE signals into a session-level list — one item per | |
| pattern (per attribution), each naming the turn(s) it touched. | |
| Item = { kind, turns:[i], headline, advice, practice, source, attribution }. | |
| Pure code: it reads the guides already attached to turns + the deterministic | |
| signal results, and pulls the fix text from the knowledge files (best_practices). | |
| Two attributions surface side by side: | |
| * "Anthropic" — the cited fix transcribed from Anthropic's doc. | |
| * "Generally recommended"— custom, editable, non-Anthropic craft. For every | |
| fired signal kind that ALSO has community practices, an extra rec is appended | |
| carrying the SAME turns, so e.g. a re-read shows the Anthropic rec AND the | |
| generally-recommended "read once, keep it in context" rec. | |
| Which signals fire is NOT decided here (deterministic detection upstream is | |
| untouched) — this only enriches the recommendation list. Empty list => silence | |
| is the honest result ("expensive but clean"). The narrator/UI may show ONLY | |
| these — no inventing a best practice that isn't in the knowledge files. | |
| """ | |
| recs: list[dict[str, Any]] = [] | |
| # (kind, turns) for every signal that actually fired — drives the community | |
| # supplements appended at the end (same kind, same turns, distinct attribution). | |
| fired: list[tuple[str, list[int]]] = [] | |
| # per-turn guides (loop / reread) — 'big'/heavy intentionally excluded | |
| for t in turns: | |
| g = getattr(t, "guide", None) | |
| if not g: | |
| continue | |
| sk = _GUIDE_TO_SIGNAL.get(g.get("kind")) | |
| if not sk: | |
| continue | |
| bp = practice_for(sk) | |
| recs.append( | |
| { | |
| "kind": sk, | |
| "turns": [t.i], | |
| "headline": g.get("head", ""), | |
| "advice": (bp.get("fix") if bp else g.get("body", "")), | |
| "practice": bp.get("practice") if bp else None, | |
| "source": bp.get("source") if bp else None, | |
| "attribution": "Anthropic", | |
| } | |
| ) | |
| fired.append((sk, [t.i])) | |
| # tool clusters (CLI flailing, no skill) — already carry the cited fix | |
| bp_tc = practice_for("tool_cluster") | |
| for c in tool_clusters or []: | |
| recs.append( | |
| { | |
| "kind": "tool_cluster", | |
| "turns": c.get("turns", []), | |
| "headline": ( | |
| f"Flailed on `{c['binary']}` — {c['calls']}x " | |
| f"({c['errored']} errored), no skill loaded" | |
| ), | |
| "advice": c.get("fix") or (bp_tc.get("fix") if bp_tc else ""), | |
| "practice": c.get("practice") or (bp_tc.get("practice") if bp_tc else None), | |
| "source": c.get("source") or (bp_tc.get("source") if bp_tc else None), | |
| "attribution": "Anthropic", | |
| } | |
| ) | |
| fired.append(("tool_cluster", c.get("turns", []))) | |
| # read-bursts (many files in one turn -> use a subagent) | |
| bp_rb = practice_for("read_burst") | |
| for rb in read_bursts or []: | |
| recs.append(_named_rec( | |
| "read_burst", [rb["turn"]], | |
| f"Read {rb['files']} different files in one turn", bp_rb, | |
| )) | |
| fired.append(("read_burst", [rb["turn"]])) | |
| # unverified edits (session-level: edits, no test/build/lint anywhere) | |
| bp_uv = practice_for("unverified_edit") | |
| if unverified: | |
| recs.append(_named_rec( | |
| "unverified_edit", unverified.get("turns", []), | |
| f"{unverified['edits']} edits, but no test/build/lint ran in the session", | |
| bp_uv, | |
| )) | |
| fired.append(("unverified_edit", unverified.get("turns", []))) | |
| # near-identical command repeats (circling -> redirect) | |
| bp_nr = practice_for("near_repeat") | |
| for nr in near_repeats or []: | |
| recs.append(_named_rec( | |
| "near_repeat", [nr["turn"]], | |
| f"Re-ran a near-identical command {nr['count']}x in one turn", bp_nr, | |
| )) | |
| fired.append(("near_repeat", [nr["turn"]])) | |
| # unloaded MCP probed manually (can't load mid-session -> restart, don't probe) | |
| if unloaded_mcp: | |
| bp_um = practice_for("unloaded_mcp") | |
| recs.append(_named_rec( | |
| "unloaded_mcp", unloaded_mcp.get("turns", []), | |
| f"Probed an MCP server that isn't loaded in this session " | |
| f"({unloaded_mcp.get('probes', 0)} manual reach attempts) — it can't appear mid-session", | |
| bp_um, | |
| )) | |
| fired.append(("unloaded_mcp", unloaded_mcp.get("turns", []))) | |
| # unpinned package runners (npx pkg with no @version) — there is NO Anthropic | |
| # practice for this kind, so it surfaces only as a generally-recommended rec | |
| # below; we just record it as fired (turns named for the cards). | |
| if npx_unpinned: | |
| fired.append(("npx_unpinned", npx_unpinned.get("turns", []))) | |
| # --- community supplements: for each fired kind that has 'Generally | |
| # recommended' practices, append one rec per practice with the SAME turns. | |
| # Pure enrichment — it never adds a new kind that didn't already fire. --- # | |
| for kind, turns_ in fired: | |
| for cp in _community_for(kind): | |
| recs.append( | |
| { | |
| "kind": kind, | |
| "turns": turns_, | |
| "headline": cp.get("practice", ""), | |
| "advice": cp.get("fix", ""), | |
| "practice": cp.get("practice"), | |
| "source": cp.get("source"), | |
| "attribution": "Generally recommended", | |
| } | |
| ) | |
| return recs | |
| def _community_for(kind: str) -> list[dict[str, Any]]: | |
| """The 'Generally recommended' (non-Anthropic) practices for a fired kind. | |
| Reuses best_practices.practices_for and drops the Anthropic entry (already | |
| emitted above), leaving only the community supplements. | |
| """ | |
| return [ | |
| p for p in practices_for(kind) | |
| if p.get("attribution") == "Generally recommended" | |
| ] | |
| def _named_rec(kind: str, turns_, headline: str, bp: Optional[dict]) -> dict[str, Any]: | |
| """Assemble a recommendation item from a cited best practice (or bare).""" | |
| return { | |
| "kind": kind, | |
| "turns": turns_, | |
| "headline": headline, | |
| "advice": bp.get("fix") if bp else "", | |
| "practice": bp.get("practice") if bp else None, | |
| "source": bp.get("source") if bp else None, | |
| "attribution": "Anthropic", | |
| } | |
| # --------------------------------------------------------------------------- # | |
| # orchestration helper: attach guides in place | |
| # --------------------------------------------------------------------------- # | |
| def attach_guides( | |
| turns, | |
| rereads_by_turn: dict[int, list[Reread]], | |
| loops_by_turn: dict[int, TurnLoops], | |
| ) -> None: | |
| """Set Turn.guide (or leave None) for every turn. In place.""" | |
| for t in turns: | |
| t.guide = build_guide( | |
| t, rereads_by_turn.get(t.i), loops_by_turn.get(t.i) | |
| ) | |