her / engine /core /guidance.py
geekwrestler's picture
Squash history (purge pre-scrub demo session blobs)
5f43c7d
"""guidance.py — Guides (per-turn advice) + Findings (graph-level claims).
NON-NEGOTIABLE #6: the advisor stays SILENT unless a NAMED, FIXABLE pattern fires.
Cost alone is never advice. A turn gets a Guide ONLY when:
- a re-read pattern fired → kind 'reread'
- a real retry loop fired → kind 'loop'
- heavy AND over the absolute cost → kind 'big' ("expensive because big, not
floor, with NO churn wasteful; checkpoint to cap, but don't
restart and lose context")
Otherwise `guide` stays None. "Expensive but clean" is a valid, important output —
and a cheap turn (a query/reply, a failed round-trip) gets NO 'big' card at all:
the absolute-cost gate stops the relative top-N rank from faking expense (#6).
NON-NEGOTIABLE #4/#7: Findings separate PROVEN from HYPOTHESIS. A value-flow edge
(a distinctive value reappears verbatim) is `proven: true`. Temporal proximity is a
hypothesis (`proven: false`). The guide/finding text SUGGESTS, never asserts a fix.
Pure code, NO model. (The narrator turns these into prose later; it never invents
new findings.)
"""
from __future__ import annotations
from dataclasses import asdict
from typing import Any, Optional
from engine.core.best_practices import practice_for, practices_for
from engine.core.loops import TurnLoops
from engine.core.rereads import Reread
# Per-turn guide kinds that map to a fixable best practice. 'big' is EXCLUDED on
# purpose: it fires only on heavy-with-no-churn ("expensive because big, not
# wasteful"), so surfacing it as something to "do better" would manufacture advice
# on a clean turn (build rule #6).
_GUIDE_TO_SIGNAL = {"loop": "loop", "reread": "reread"}
# --------------------------------------------------------------------------- #
# Guides (attached to a turn ONLY when a pattern fires)
# --------------------------------------------------------------------------- #
def build_guide(
turn,
rereads: Optional[list[Reread]],
loops: Optional[TurnLoops],
) -> Optional[dict[str, str]]:
"""Return a Guide dict for a turn, or None (silence) if no pattern fires.
Priority when several could apply: a real loop (correctness) > re-read
(wasted work) > big (informational). At most one guide per turn.
"""
# 1) real retry loop — the strongest, most actionable signal
if loops and loops.loops:
lp = max(loops.loops, key=lambda x: x.count)
head = f"Retry loop: same command ran {lp.count}x, {lp.errored} errored"
body = (
"The exact same command was re-run after it errored. Worth checking "
"whether the command needs a fix (quoting, path, missing arg) rather "
"than another retry."
)
return {"kind": "loop", "head": head, "body": body}
# 2) re-read — the same file opened >= 3x in one turn
if rereads:
rr = rereads[0]
head = f"Re-read: {rr.file} opened {rr.count}x in this turn"
body = (
f"{rr.file} was read {rr.count} times here. If the content is stable, "
"reading it once and keeping it in context would avoid the repeat token "
"cost — worth a look."
)
return {"kind": "reread", "head": head, "body": body}
# 3) heavy with no churn → 'big' (expensive because big, not wasteful).
# Gate on the ABSOLUTE cost floor (overBudget), not merely the relative top-N
# rank: in a tiny/cheap session every turn is "top-N", so rank alone would
# paste "expensive because big" onto a turn that cost ~nothing — a single
# query/reply, a failed auth round-trip. That is exactly the "cost alone is
# never advice" rule (#6): calling a cheap turn expensive is simply false.
has_churn = bool(rereads) or bool(loops and loops.loops)
if turn.heavy and getattr(turn, "overBudget", False) and not has_churn:
head = "Expensive because big, not wasteful"
body = (
"This is one of the heaviest turns by cache-read, but there's no loop or "
"re-read churn driving it — the cost is the size of the work, not waste. "
"If you want to cap spend, checkpoint here; don't restart the session and "
"lose the accumulated context."
)
return {"kind": "big", "head": head, "body": body}
return None
# --------------------------------------------------------------------------- #
# Findings (graph-level claims; proven vs hypothesis ALWAYS separated)
# --------------------------------------------------------------------------- #
def build_findings(
turns,
rereads_by_turn: dict[int, list[Reread]],
loops_by_turn: dict[int, TurnLoops],
heavy_indices: list[int],
tool_clusters: Optional[list[dict[str, Any]]] = None,
) -> list[dict[str, Any]]:
"""Build Finding objects. `proven` is True ONLY for value-flow edges.
Finding = { id, kind, severity, nodes:[id], edges:[id], proven:bool, text }
"""
findings: list[dict[str, Any]] = []
fid = 0
def _next_id() -> str:
nonlocal fid
fid += 1
return f"F{fid}"
# --- PROVEN: value-flow edges (a distinctive value reappeared verbatim) --- #
for t in turns:
for tc in t.tools:
if tc.provenance == "indirect" and tc.flowValue:
findings.append(
{
"id": _next_id(),
"kind": "value_flow",
"severity": "info",
"nodes": [tc.id] if tc.id else [],
"edges": [],
"proven": True, # value reappeared verbatim — asserted
"text": (
f"Turn {t.i}: {tc.name} used '{tc.flowValue}', which first "
f"appeared in an earlier {tc.sourceTool} result — a proven "
f"value-flow (agent-driven, not from the human's prompt)."
),
}
)
# --- HYPOTHESIS / NAMED patterns: loops, re-reads, heavy ---------------- #
for ti, tl in sorted(loops_by_turn.items()):
for lp in tl.loops:
findings.append(
{
"id": _next_id(),
"kind": "loop",
"severity": "warn",
"nodes": [],
"edges": [],
"proven": True, # exact-cmd repeat + error is observed, not inferred
"text": (
f"Turn {ti}: the same command ran {lp.count}x with "
f"{lp.errored} errored — looks like a retry loop worth checking."
),
}
)
for ti, rrs in sorted(rereads_by_turn.items()):
for rr in rrs:
findings.append(
{
"id": _next_id(),
"kind": "reread",
"severity": "warn",
"nodes": [],
"edges": [],
"proven": True, # the >=3 reads are counted, not inferred
"text": (
f"Turn {ti}: {rr.file} was read {rr.count}x — a re-read pattern; "
"caching it in context could avoid the repeat cost."
),
}
)
for ti in heavy_indices:
t = turns[ti]
churn = bool(rereads_by_turn.get(ti)) or bool(
loops_by_turn.get(ti) and loops_by_turn[ti].loops
)
findings.append(
{
"id": _next_id(),
"kind": "heavy",
"severity": "info",
"nodes": [],
"edges": [],
"proven": True, # cacheRead ranking is computed, not inferred
"text": (
f"Turn {ti} is among the top-3 by cache-read "
f"({t.tokens.cacheRead:,} tokens)"
+ (
"; no loop/re-read churn — expensive because big, not wasteful."
if not churn
else "; see the loop/re-read finding on this turn."
)
),
}
)
# --- NAMED pattern: tool_cluster (CLI flailing, no skill) --------------- #
# The call/error counts are OBSERVED (proven, like a loop); the fix is a cited
# SUGGESTION (build rule #7). The Anthropic citation rides on the finding text.
for c in tool_clusters or []:
turns_str = ", ".join(f"turn {i}" for i in c.get("turns", []))
observed = (
f"Ran `{c['binary']}` {c['calls']}x ({c['errored']} errored) across "
f"{turns_str} with no skill loaded for it"
)
fix = c.get("fix")
if fix:
text = (
f"{observed}. {fix} "
f"(Best practice: {c.get('practice', 'Use CLI tools / Create skills')} — "
f"{c.get('source', '')})"
)
else:
# knowledge file absent — state the observation, suggest plainly, no citation
text = (
f"{observed} — worth giving the agent that context up front "
f"(a project skill or a service CLI) so it doesn't rediscover it by trial."
)
findings.append(
{
"id": _next_id(),
"kind": "tool_cluster",
"severity": "warn",
"nodes": c.get("toolIds", []),
"edges": [],
"proven": True, # the call/error counts are counted, not inferred
"text": text,
}
)
return findings
# --------------------------------------------------------------------------- #
# Recommendations (session-level "what could have been better")
# --------------------------------------------------------------------------- #
def build_recommendations(
turns,
tool_clusters: Optional[list[dict[str, Any]]] = None,
read_bursts: Optional[list[dict[str, Any]]] = None,
unverified: Optional[dict[str, Any]] = None,
near_repeats: Optional[list[dict[str, Any]]] = None,
unloaded_mcp: Optional[dict[str, Any]] = None,
npx_unpinned: Optional[dict[str, Any]] = None,
) -> list[dict[str, Any]]:
"""Abstract the fired, FIXABLE signals into a session-level list — one item per
pattern (per attribution), each naming the turn(s) it touched.
Item = { kind, turns:[i], headline, advice, practice, source, attribution }.
Pure code: it reads the guides already attached to turns + the deterministic
signal results, and pulls the fix text from the knowledge files (best_practices).
Two attributions surface side by side:
* "Anthropic" — the cited fix transcribed from Anthropic's doc.
* "Generally recommended"— custom, editable, non-Anthropic craft. For every
fired signal kind that ALSO has community practices, an extra rec is appended
carrying the SAME turns, so e.g. a re-read shows the Anthropic rec AND the
generally-recommended "read once, keep it in context" rec.
Which signals fire is NOT decided here (deterministic detection upstream is
untouched) — this only enriches the recommendation list. Empty list => silence
is the honest result ("expensive but clean"). The narrator/UI may show ONLY
these — no inventing a best practice that isn't in the knowledge files.
"""
recs: list[dict[str, Any]] = []
# (kind, turns) for every signal that actually fired — drives the community
# supplements appended at the end (same kind, same turns, distinct attribution).
fired: list[tuple[str, list[int]]] = []
# per-turn guides (loop / reread) — 'big'/heavy intentionally excluded
for t in turns:
g = getattr(t, "guide", None)
if not g:
continue
sk = _GUIDE_TO_SIGNAL.get(g.get("kind"))
if not sk:
continue
bp = practice_for(sk)
recs.append(
{
"kind": sk,
"turns": [t.i],
"headline": g.get("head", ""),
"advice": (bp.get("fix") if bp else g.get("body", "")),
"practice": bp.get("practice") if bp else None,
"source": bp.get("source") if bp else None,
"attribution": "Anthropic",
}
)
fired.append((sk, [t.i]))
# tool clusters (CLI flailing, no skill) — already carry the cited fix
bp_tc = practice_for("tool_cluster")
for c in tool_clusters or []:
recs.append(
{
"kind": "tool_cluster",
"turns": c.get("turns", []),
"headline": (
f"Flailed on `{c['binary']}` — {c['calls']}x "
f"({c['errored']} errored), no skill loaded"
),
"advice": c.get("fix") or (bp_tc.get("fix") if bp_tc else ""),
"practice": c.get("practice") or (bp_tc.get("practice") if bp_tc else None),
"source": c.get("source") or (bp_tc.get("source") if bp_tc else None),
"attribution": "Anthropic",
}
)
fired.append(("tool_cluster", c.get("turns", [])))
# read-bursts (many files in one turn -> use a subagent)
bp_rb = practice_for("read_burst")
for rb in read_bursts or []:
recs.append(_named_rec(
"read_burst", [rb["turn"]],
f"Read {rb['files']} different files in one turn", bp_rb,
))
fired.append(("read_burst", [rb["turn"]]))
# unverified edits (session-level: edits, no test/build/lint anywhere)
bp_uv = practice_for("unverified_edit")
if unverified:
recs.append(_named_rec(
"unverified_edit", unverified.get("turns", []),
f"{unverified['edits']} edits, but no test/build/lint ran in the session",
bp_uv,
))
fired.append(("unverified_edit", unverified.get("turns", [])))
# near-identical command repeats (circling -> redirect)
bp_nr = practice_for("near_repeat")
for nr in near_repeats or []:
recs.append(_named_rec(
"near_repeat", [nr["turn"]],
f"Re-ran a near-identical command {nr['count']}x in one turn", bp_nr,
))
fired.append(("near_repeat", [nr["turn"]]))
# unloaded MCP probed manually (can't load mid-session -> restart, don't probe)
if unloaded_mcp:
bp_um = practice_for("unloaded_mcp")
recs.append(_named_rec(
"unloaded_mcp", unloaded_mcp.get("turns", []),
f"Probed an MCP server that isn't loaded in this session "
f"({unloaded_mcp.get('probes', 0)} manual reach attempts) — it can't appear mid-session",
bp_um,
))
fired.append(("unloaded_mcp", unloaded_mcp.get("turns", [])))
# unpinned package runners (npx pkg with no @version) — there is NO Anthropic
# practice for this kind, so it surfaces only as a generally-recommended rec
# below; we just record it as fired (turns named for the cards).
if npx_unpinned:
fired.append(("npx_unpinned", npx_unpinned.get("turns", [])))
# --- community supplements: for each fired kind that has 'Generally
# recommended' practices, append one rec per practice with the SAME turns.
# Pure enrichment — it never adds a new kind that didn't already fire. --- #
for kind, turns_ in fired:
for cp in _community_for(kind):
recs.append(
{
"kind": kind,
"turns": turns_,
"headline": cp.get("practice", ""),
"advice": cp.get("fix", ""),
"practice": cp.get("practice"),
"source": cp.get("source"),
"attribution": "Generally recommended",
}
)
return recs
def _community_for(kind: str) -> list[dict[str, Any]]:
"""The 'Generally recommended' (non-Anthropic) practices for a fired kind.
Reuses best_practices.practices_for and drops the Anthropic entry (already
emitted above), leaving only the community supplements.
"""
return [
p for p in practices_for(kind)
if p.get("attribution") == "Generally recommended"
]
def _named_rec(kind: str, turns_, headline: str, bp: Optional[dict]) -> dict[str, Any]:
"""Assemble a recommendation item from a cited best practice (or bare)."""
return {
"kind": kind,
"turns": turns_,
"headline": headline,
"advice": bp.get("fix") if bp else "",
"practice": bp.get("practice") if bp else None,
"source": bp.get("source") if bp else None,
"attribution": "Anthropic",
}
# --------------------------------------------------------------------------- #
# orchestration helper: attach guides in place
# --------------------------------------------------------------------------- #
def attach_guides(
turns,
rereads_by_turn: dict[int, list[Reread]],
loops_by_turn: dict[int, TurnLoops],
) -> None:
"""Set Turn.guide (or leave None) for every turn. In place."""
for t in turns:
t.guide = build_guide(
t, rereads_by_turn.get(t.i), loops_by_turn.get(t.i)
)