composer-replication-framework / composer_replication /tests /test_layered_hint_generator.py
Codeseys's picture
feat(hints): ADR-009 layered HintGenerator; accepted
84740d4
Raw
History Blame Contribute Delete
5.82 kB
"""Tests for the layered HintGenerator architecture (ADR-009).
Covers ADR-009 acceptance gates:
- gate 1: HintGenerator Protocol; layers satisfy it (runtime_checkable).
- gate 2: TemplateHintGenerator is byte-identical to the existing dispatch()
for all 5 registered kinds (no regression).
- gate 3: CompositeHintGenerator tries layers cost-first — a tool_not_found
site is served by the template layer (no LLM call); a style site falls
through to the judge layer.
- gate 4: LLMJudgeHintGenerator caches (second identical call = zero
completions).
- gate 5: as_collator_hook() matches CollatorConfig.hint_generator's
(error_kind, error_meta) -> str | None signature.
All CPU-only, no network (LLM layer is a stub).
"""
from __future__ import annotations
from composer_replication.hint_generator import (
HINT_TEMPLATES,
CompositeHintGenerator,
HintGenerator,
LLMJudgeHintGenerator,
RawErrorHintGenerator,
TemplateHintGenerator,
default_composite,
dispatch,
)
# --- gate 1: Protocol -------------------------------------------------------
def test_layers_satisfy_protocol():
assert isinstance(TemplateHintGenerator(), HintGenerator)
assert isinstance(RawErrorHintGenerator(), HintGenerator)
assert isinstance(LLMJudgeHintGenerator(), HintGenerator)
assert isinstance(CompositeHintGenerator([]), HintGenerator)
# --- gate 2: template byte-identity ----------------------------------------
def test_template_layer_byte_identical_to_dispatch():
tmpl = TemplateHintGenerator()
meta = {
"available_tools": ["read", "write"],
"tool_name": "frobnicate",
"tool_schema": {"x": "int"},
"error_message": "boom",
}
for kind in HINT_TEMPLATES:
ctx = dict(meta)
ctx.setdefault("error_kind", kind)
expected = dispatch(kind, ctx)
got = tmpl.generate(kind, meta)
assert got == expected, f"template layer drifted from dispatch for {kind}"
def test_template_layer_returns_none_for_unknown_kind():
assert TemplateHintGenerator().generate("totally_unknown_kind", {}) is None
# --- gate 3: cost-ordered composite ----------------------------------------
def test_composite_serves_tool_error_from_template_no_llm():
calls = {"n": 0}
def fake_complete(prompt: str) -> str:
calls["n"] += 1
return "LLM HINT"
comp = default_composite(llm_complete=fake_complete)
hint = comp.generate("tool_not_found", {"available_tools": ["read", "write"]})
assert hint is not None
assert "Available tools" in hint # template output
assert calls["n"] == 0, "LLM judge must NOT be called for a template-covered site"
def test_composite_falls_through_to_judge_for_uncovered_site():
calls = {"n": 0}
def fake_complete(prompt: str) -> str:
calls["n"] += 1
return "Be more concise; you repeated the same explanation."
comp = default_composite(llm_complete=fake_complete, enable_raw_error=False)
# 'verbose_communication' has no template and no error_message -> judge.
hint = comp.generate("verbose_communication", {})
assert hint == "Be more concise; you repeated the same explanation."
assert calls["n"] == 1
def test_raw_error_layer_covers_unmatched_site_with_message():
comp = default_composite() # no LLM
hint = comp.generate("weird_unmapped_error", {"error_message": "Segfault at 0x0"})
assert hint is not None
assert "Segfault at 0x0" in hint
def test_composite_returns_none_when_all_layers_defer():
comp = default_composite() # templates + raw-error, no LLM
# unknown kind + no message -> nothing fires
assert comp.generate("unknown", {}) is None
# --- gate 4: LLM-judge cache ------------------------------------------------
def test_llm_judge_caches_in_memory(tmp_path):
calls = {"n": 0}
def fake_complete(prompt: str) -> str:
calls["n"] += 1
return f"hint #{calls['n']}"
judge = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path))
meta = {"error_message": "X"}
h1 = judge.generate("k", meta)
h2 = judge.generate("k", meta) # identical -> cache hit
assert h1 == h2
assert calls["n"] == 1, "second identical call must hit cache (zero completions)"
def test_llm_judge_disk_cache_survives_new_instance(tmp_path):
calls = {"n": 0}
def fake_complete(prompt: str) -> str:
calls["n"] += 1
return "persisted hint"
j1 = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path))
j1.generate("k", {"error_message": "X"})
# fresh instance, same cache dir -> disk hit, no completion
j2 = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path))
h = j2.generate("k", {"error_message": "X"})
assert h == "persisted hint"
assert calls["n"] == 1
def test_llm_judge_disabled_when_no_complete():
assert LLMJudgeHintGenerator(None).generate("k", {"error_message": "X"}) is None
# --- gate 5: collator-hook signature ---------------------------------------
def test_as_collator_hook_matches_collator_signature():
comp = default_composite()
hook = comp.as_collator_hook()
# CollatorConfig.hint_generator is Callable[[str, dict], str | None]
out = hook("tool_not_found", {"available_tools": ["read"]})
assert isinstance(out, str)
out_none = hook("unknown", {})
assert out_none is None
def test_as_collator_hook_drops_into_collator_config():
"""The hook is accepted by CollatorConfig without changes."""
from composer_replication.trainer.data_collator import CollatorConfig
comp = default_composite()
cfg = CollatorConfig(hint_generator=comp.as_collator_hook())
assert cfg.hint_generator is not None
assert cfg.hint_generator("json_decode", {}) is not None # template fires