Spaces:

build-small-hackathon
/

her

Running on Zero

App Files Files Community

her / engine /core /guidance.py

geekwrestler

Squash history (purge pre-scrub demo session blobs)

5f43c7d 5 days ago

raw

history blame contribute delete

17.5 kB

	"""guidance.py — Guides (per-turn advice) + Findings (graph-level claims).

	NON-NEGOTIABLE #6: the advisor stays SILENT unless a NAMED, FIXABLE pattern fires.
	Cost alone is never advice. A turn gets a Guide ONLY when:
	- a re-read pattern fired → kind 'reread'
	- a real retry loop fired → kind 'loop'
	- heavy AND over the absolute cost → kind 'big' ("expensive because big, not
	floor, with NO churn wasteful; checkpoint to cap, but don't
	restart and lose context")
	Otherwise `guide` stays None. "Expensive but clean" is a valid, important output —
	and a cheap turn (a query/reply, a failed round-trip) gets NO 'big' card at all:
	the absolute-cost gate stops the relative top-N rank from faking expense (#6).

	NON-NEGOTIABLE #4/#7: Findings separate PROVEN from HYPOTHESIS. A value-flow edge
	(a distinctive value reappears verbatim) is `proven: true`. Temporal proximity is a
	hypothesis (`proven: false`). The guide/finding text SUGGESTS, never asserts a fix.

	Pure code, NO model. (The narrator turns these into prose later; it never invents
	new findings.)
	"""
	from __future__ import annotations

	from dataclasses import asdict
	from typing import Any, Optional

	from engine.core.best_practices import practice_for, practices_for
	from engine.core.loops import TurnLoops
	from engine.core.rereads import Reread

	# Per-turn guide kinds that map to a fixable best practice. 'big' is EXCLUDED on
	# purpose: it fires only on heavy-with-no-churn ("expensive because big, not
	# wasteful"), so surfacing it as something to "do better" would manufacture advice
	# on a clean turn (build rule #6).
	_GUIDE_TO_SIGNAL = {"loop": "loop", "reread": "reread"}


	# --------------------------------------------------------------------------- #
	# Guides (attached to a turn ONLY when a pattern fires)
	# --------------------------------------------------------------------------- #
	def build_guide(
	turn,
	rereads: Optional[list[Reread]],
	loops: Optional[TurnLoops],
	) -> Optional[dict[str, str]]:
	"""Return a Guide dict for a turn, or None (silence) if no pattern fires.

	Priority when several could apply: a real loop (correctness) > re-read
	(wasted work) > big (informational). At most one guide per turn.
	"""
	# 1) real retry loop — the strongest, most actionable signal
	if loops and loops.loops:
	lp = max(loops.loops, key=lambda x: x.count)
	head = f"Retry loop: same command ran {lp.count}x, {lp.errored} errored"
	body = (
	"The exact same command was re-run after it errored. Worth checking "
	"whether the command needs a fix (quoting, path, missing arg) rather "
	"than another retry."
	)
	return {"kind": "loop", "head": head, "body": body}

	# 2) re-read — the same file opened >= 3x in one turn
	if rereads:
	rr = rereads[0]
	head = f"Re-read: {rr.file} opened {rr.count}x in this turn"
	body = (
	f"{rr.file} was read {rr.count} times here. If the content is stable, "
	"reading it once and keeping it in context would avoid the repeat token "
	"cost — worth a look."
	)
	return {"kind": "reread", "head": head, "body": body}

	# 3) heavy with no churn → 'big' (expensive because big, not wasteful).
	# Gate on the ABSOLUTE cost floor (overBudget), not merely the relative top-N
	# rank: in a tiny/cheap session every turn is "top-N", so rank alone would
	# paste "expensive because big" onto a turn that cost ~nothing — a single
	# query/reply, a failed auth round-trip. That is exactly the "cost alone is
	# never advice" rule (#6): calling a cheap turn expensive is simply false.
	has_churn = bool(rereads) or bool(loops and loops.loops)
	if turn.heavy and getattr(turn, "overBudget", False) and not has_churn:
	head = "Expensive because big, not wasteful"
	body = (
	"This is one of the heaviest turns by cache-read, but there's no loop or "
	"re-read churn driving it — the cost is the size of the work, not waste. "
	"If you want to cap spend, checkpoint here; don't restart the session and "
	"lose the accumulated context."
	)
	return {"kind": "big", "head": head, "body": body}

	return None


	# --------------------------------------------------------------------------- #
	# Findings (graph-level claims; proven vs hypothesis ALWAYS separated)
	# --------------------------------------------------------------------------- #
	def build_findings(
	turns,
	rereads_by_turn: dict[int, list[Reread]],
	loops_by_turn: dict[int, TurnLoops],
	heavy_indices: list[int],
	tool_clusters: Optional[list[dict[str, Any]]] = None,
	) -> list[dict[str, Any]]:
	"""Build Finding objects. `proven` is True ONLY for value-flow edges.

	Finding = { id, kind, severity, nodes:[id], edges:[id], proven:bool, text }
	"""
	findings: list[dict[str, Any]] = []
	fid = 0

	def _next_id() -> str:
	nonlocal fid
	fid += 1
	return f"F{fid}"

	# --- PROVEN: value-flow edges (a distinctive value reappeared verbatim) --- #
	for t in turns:
	for tc in t.tools:
	if tc.provenance == "indirect" and tc.flowValue:
	findings.append(
	{
	"id": _next_id(),
	"kind": "value_flow",
	"severity": "info",
	"nodes": [tc.id] if tc.id else [],
	"edges": [],
	"proven": True, # value reappeared verbatim — asserted
	"text": (
	f"Turn {t.i}: {tc.name} used '{tc.flowValue}', which first "
	f"appeared in an earlier {tc.sourceTool} result — a proven "
	f"value-flow (agent-driven, not from the human's prompt)."
	),
	}
	)

	# --- HYPOTHESIS / NAMED patterns: loops, re-reads, heavy ---------------- #
	for ti, tl in sorted(loops_by_turn.items()):
	for lp in tl.loops:
	findings.append(
	{
	"id": _next_id(),
	"kind": "loop",
	"severity": "warn",
	"nodes": [],
	"edges": [],
	"proven": True, # exact-cmd repeat + error is observed, not inferred
	"text": (
	f"Turn {ti}: the same command ran {lp.count}x with "
	f"{lp.errored} errored — looks like a retry loop worth checking."
	),
	}
	)

	for ti, rrs in sorted(rereads_by_turn.items()):
	for rr in rrs:
	findings.append(
	{
	"id": _next_id(),
	"kind": "reread",
	"severity": "warn",
	"nodes": [],
	"edges": [],
	"proven": True, # the >=3 reads are counted, not inferred
	"text": (
	f"Turn {ti}: {rr.file} was read {rr.count}x — a re-read pattern; "
	"caching it in context could avoid the repeat cost."
	),
	}
	)

	for ti in heavy_indices:
	t = turns[ti]
	churn = bool(rereads_by_turn.get(ti)) or bool(
	loops_by_turn.get(ti) and loops_by_turn[ti].loops
	)
	findings.append(
	{
	"id": _next_id(),
	"kind": "heavy",
	"severity": "info",
	"nodes": [],
	"edges": [],
	"proven": True, # cacheRead ranking is computed, not inferred
	"text": (
	f"Turn {ti} is among the top-3 by cache-read "
	f"({t.tokens.cacheRead:,} tokens)"
	+ (
	"; no loop/re-read churn — expensive because big, not wasteful."
	if not churn
	else "; see the loop/re-read finding on this turn."
	)
	),
	}
	)

	# --- NAMED pattern: tool_cluster (CLI flailing, no skill) --------------- #
	# The call/error counts are OBSERVED (proven, like a loop); the fix is a cited
	# SUGGESTION (build rule #7). The Anthropic citation rides on the finding text.
	for c in tool_clusters or []:
	turns_str = ", ".join(f"turn {i}" for i in c.get("turns", []))
	observed = (
	f"Ran `{c['binary']}` {c['calls']}x ({c['errored']} errored) across "
	f"{turns_str} with no skill loaded for it"
	)
	fix = c.get("fix")
	if fix:
	text = (
	f"{observed}. {fix} "
	f"(Best practice: {c.get('practice', 'Use CLI tools / Create skills')} — "
	f"{c.get('source', '')})"
	)
	else:
	# knowledge file absent — state the observation, suggest plainly, no citation
	text = (
	f"{observed} — worth giving the agent that context up front "
	f"(a project skill or a service CLI) so it doesn't rediscover it by trial."
	)
	findings.append(
	{
	"id": _next_id(),
	"kind": "tool_cluster",
	"severity": "warn",
	"nodes": c.get("toolIds", []),
	"edges": [],
	"proven": True, # the call/error counts are counted, not inferred
	"text": text,
	}
	)

	return findings


	# --------------------------------------------------------------------------- #
	# Recommendations (session-level "what could have been better")
	# --------------------------------------------------------------------------- #
	def build_recommendations(
	turns,
	tool_clusters: Optional[list[dict[str, Any]]] = None,
	read_bursts: Optional[list[dict[str, Any]]] = None,
	unverified: Optional[dict[str, Any]] = None,
	near_repeats: Optional[list[dict[str, Any]]] = None,
	unloaded_mcp: Optional[dict[str, Any]] = None,
	npx_unpinned: Optional[dict[str, Any]] = None,
	) -> list[dict[str, Any]]:
	"""Abstract the fired, FIXABLE signals into a session-level list — one item per
	pattern (per attribution), each naming the turn(s) it touched.

	Item = { kind, turns:[i], headline, advice, practice, source, attribution }.
	Pure code: it reads the guides already attached to turns + the deterministic
	signal results, and pulls the fix text from the knowledge files (best_practices).
	Two attributions surface side by side:
	* "Anthropic" — the cited fix transcribed from Anthropic's doc.
	* "Generally recommended"— custom, editable, non-Anthropic craft. For every
	fired signal kind that ALSO has community practices, an extra rec is appended
	carrying the SAME turns, so e.g. a re-read shows the Anthropic rec AND the
	generally-recommended "read once, keep it in context" rec.

	Which signals fire is NOT decided here (deterministic detection upstream is
	untouched) — this only enriches the recommendation list. Empty list => silence
	is the honest result ("expensive but clean"). The narrator/UI may show ONLY
	these — no inventing a best practice that isn't in the knowledge files.
	"""
	recs: list[dict[str, Any]] = []
	# (kind, turns) for every signal that actually fired — drives the community
	# supplements appended at the end (same kind, same turns, distinct attribution).
	fired: list[tuple[str, list[int]]] = []

	# per-turn guides (loop / reread) — 'big'/heavy intentionally excluded
	for t in turns:
	g = getattr(t, "guide", None)
	if not g:
	continue
	sk = _GUIDE_TO_SIGNAL.get(g.get("kind"))
	if not sk:
	continue
	bp = practice_for(sk)
	recs.append(
	{
	"kind": sk,
	"turns": [t.i],
	"headline": g.get("head", ""),
	"advice": (bp.get("fix") if bp else g.get("body", "")),
	"practice": bp.get("practice") if bp else None,
	"source": bp.get("source") if bp else None,
	"attribution": "Anthropic",
	}
	)
	fired.append((sk, [t.i]))

	# tool clusters (CLI flailing, no skill) — already carry the cited fix
	bp_tc = practice_for("tool_cluster")
	for c in tool_clusters or []:
	recs.append(
	{
	"kind": "tool_cluster",
	"turns": c.get("turns", []),
	"headline": (
	f"Flailed on `{c['binary']}` — {c['calls']}x "
	f"({c['errored']} errored), no skill loaded"
	),
	"advice": c.get("fix") or (bp_tc.get("fix") if bp_tc else ""),
	"practice": c.get("practice") or (bp_tc.get("practice") if bp_tc else None),
	"source": c.get("source") or (bp_tc.get("source") if bp_tc else None),
	"attribution": "Anthropic",
	}
	)
	fired.append(("tool_cluster", c.get("turns", [])))

	# read-bursts (many files in one turn -> use a subagent)
	bp_rb = practice_for("read_burst")
	for rb in read_bursts or []:
	recs.append(_named_rec(
	"read_burst", [rb["turn"]],
	f"Read {rb['files']} different files in one turn", bp_rb,
	))
	fired.append(("read_burst", [rb["turn"]]))

	# unverified edits (session-level: edits, no test/build/lint anywhere)
	bp_uv = practice_for("unverified_edit")
	if unverified:
	recs.append(_named_rec(
	"unverified_edit", unverified.get("turns", []),
	f"{unverified['edits']} edits, but no test/build/lint ran in the session",
	bp_uv,
	))
	fired.append(("unverified_edit", unverified.get("turns", [])))

	# near-identical command repeats (circling -> redirect)
	bp_nr = practice_for("near_repeat")
	for nr in near_repeats or []:
	recs.append(_named_rec(
	"near_repeat", [nr["turn"]],
	f"Re-ran a near-identical command {nr['count']}x in one turn", bp_nr,
	))
	fired.append(("near_repeat", [nr["turn"]]))

	# unloaded MCP probed manually (can't load mid-session -> restart, don't probe)
	if unloaded_mcp:
	bp_um = practice_for("unloaded_mcp")
	recs.append(_named_rec(
	"unloaded_mcp", unloaded_mcp.get("turns", []),
	f"Probed an MCP server that isn't loaded in this session "
	f"({unloaded_mcp.get('probes', 0)} manual reach attempts) — it can't appear mid-session",
	bp_um,
	))
	fired.append(("unloaded_mcp", unloaded_mcp.get("turns", [])))

	# unpinned package runners (npx pkg with no @version) — there is NO Anthropic
	# practice for this kind, so it surfaces only as a generally-recommended rec
	# below; we just record it as fired (turns named for the cards).
	if npx_unpinned:
	fired.append(("npx_unpinned", npx_unpinned.get("turns", [])))

	# --- community supplements: for each fired kind that has 'Generally
	# recommended' practices, append one rec per practice with the SAME turns.
	# Pure enrichment — it never adds a new kind that didn't already fire. --- #
	for kind, turns_ in fired:
	for cp in _community_for(kind):
	recs.append(
	{
	"kind": kind,
	"turns": turns_,
	"headline": cp.get("practice", ""),
	"advice": cp.get("fix", ""),
	"practice": cp.get("practice"),
	"source": cp.get("source"),
	"attribution": "Generally recommended",
	}
	)

	return recs


	def _community_for(kind: str) -> list[dict[str, Any]]:
	"""The 'Generally recommended' (non-Anthropic) practices for a fired kind.

	Reuses best_practices.practices_for and drops the Anthropic entry (already
	emitted above), leaving only the community supplements.
	"""
	return [
	p for p in practices_for(kind)
	if p.get("attribution") == "Generally recommended"
	]


	def _named_rec(kind: str, turns_, headline: str, bp: Optional[dict]) -> dict[str, Any]:
	"""Assemble a recommendation item from a cited best practice (or bare)."""
	return {
	"kind": kind,
	"turns": turns_,
	"headline": headline,
	"advice": bp.get("fix") if bp else "",
	"practice": bp.get("practice") if bp else None,
	"source": bp.get("source") if bp else None,
	"attribution": "Anthropic",
	}


	# --------------------------------------------------------------------------- #
	# orchestration helper: attach guides in place
	# --------------------------------------------------------------------------- #
	def attach_guides(
	turns,
	rereads_by_turn: dict[int, list[Reread]],
	loops_by_turn: dict[int, TurnLoops],
	) -> None:
	"""Set Turn.guide (or leave None) for every turn. In place."""
	for t in turns:
	t.guide = build_guide(
	t, rereads_by_turn.get(t.i), loops_by_turn.get(t.i)
	)