her / engine /core /best_practices.py
geekwrestler's picture
Squash history (purge pre-scrub demo session blobs)
5f43c7d
"""best_practices.py — load the best-practice map for a fired signal.
Two reference files, kept SEPARATE on purpose:
* `narrator/knowledge/signals.json` — the CITED Anthropic guidance (transcribed
from the official Claude Code best-practices doc). This is the canonical, sourced
set; it is NOT edited by this feature.
* `narrator/knowledge/community-practices.json` — custom, NON-Anthropic, "Generally
recommended" practices the OWNER can edit directly (no code change) to add the
well-known craft of agent-driven coding alongside the Anthropic ones.
Both are static reference data, NOT a model and NOT a finding source — they only
supply the citable/suggested *fix text* the narrator attaches once a DETERMINISTIC
signal has already fired. No signal → never consulted.
Build rule #1 (deterministic core) is preserved: nothing here decides whether a
pattern occurred; the engine does that. Build rule #7: the text is suggest-only.
Robust to a missing/corrupt file — Anthropic lookups return None and community
lookups return [] so callers fall back to plain, un-cited wording rather than
crashing.
"""
from __future__ import annotations
import json
import os
from typing import Any, Optional
_HERE = os.path.dirname(os.path.abspath(__file__))
_REPO = os.path.dirname(os.path.dirname(_HERE))
_PATH = os.path.join(_REPO, "narrator", "knowledge", "signals.json")
_COMMUNITY_PATH = os.path.join(_REPO, "narrator", "knowledge", "community-practices.json")
_CACHE: Optional[dict[str, Any]] = None
# kind -> [community practice dict, …], lazily built from community-practices.json.
_COMMUNITY_CACHE: Optional[dict[str, list[dict[str, Any]]]] = None
def _load() -> dict[str, Any]:
global _CACHE
if _CACHE is None:
try:
with open(_PATH, encoding="utf-8") as f:
_CACHE = json.load(f)
except (OSError, ValueError):
_CACHE = {}
return _CACHE
def _load_community() -> dict[str, list[dict[str, Any]]]:
"""Index the custom 'Generally recommended' practices by signal `kind`.
Returns {kind: [practice, …]}. Robust to a missing/corrupt/oddly-shaped file:
any read or parse error, or a non-conforming entry, is skipped — the result is
just {} (or fewer entries), never an exception. Each kept practice carries a
normalized attribution of 'Generally recommended'.
"""
global _COMMUNITY_CACHE
if _COMMUNITY_CACHE is not None:
return _COMMUNITY_CACHE
index: dict[str, list[dict[str, Any]]] = {}
try:
with open(_COMMUNITY_PATH, encoding="utf-8") as f:
raw = json.load(f)
except (OSError, ValueError):
_COMMUNITY_CACHE = index
return index
practices = raw.get("practices") if isinstance(raw, dict) else raw
if isinstance(practices, list):
for p in practices:
if not isinstance(p, dict):
continue
kind = p.get("kind")
if not kind or not p.get("practice"):
continue
index.setdefault(str(kind), []).append(
{
"id": p.get("id"),
"kind": str(kind),
"attribution": "Generally recommended", # normalized, never trusted from file
"practice": p.get("practice"),
"fix": p.get("fix", ""),
"source": p.get("source"),
}
)
_COMMUNITY_CACHE = index
return index
def practice_for(kind: str) -> Optional[dict[str, str]]:
"""Return {practice, fix, section, source} for a fired signal kind, or None.
`kind` is one of the deterministic signal kinds: 'tool_cluster', 'loop',
'reread', 'heavy'. Returns None if the knowledge file is absent or has no
entry — callers must treat that as "cite nothing", never as "invent advice".
"""
entry = _load().get("signals", {}).get(kind)
if isinstance(entry, dict) and entry.get("fix"):
return entry
return None
def practices_for(kind: str) -> list[dict[str, Any]]:
"""Every practice attached to a fired signal kind — Anthropic AND community.
Returns a list (possibly empty): the single cited Anthropic practice first
(tagged attribution 'Anthropic', `source` = its doc URL) if one exists, then
every custom 'Generally recommended' practice whose `kind` matches, in file
order. `practice_for` is the back-compat single-Anthropic accessor; this is the
superset the recommendation builder uses so a signal can carry both.
"""
out: list[dict[str, Any]] = []
bp = practice_for(kind)
if bp:
anthropic = dict(bp) # don't mutate the cached signals.json entry
anthropic["kind"] = kind
anthropic["attribution"] = "Anthropic"
anthropic.setdefault("source", source_url())
out.append(anthropic)
out.extend(_load_community().get(kind, []))
return out
def source_url() -> str:
"""The canonical Anthropic doc URL (for a single citation line)."""
return _load().get("_provenance", {}).get(
"source", "https://code.claude.com/docs/en/best-practices"
)