feat(hints): ADR-009 layered HintGenerator; accepted

84740d4 about 1 month ago

5.82 kB

	"""Tests for the layered HintGenerator architecture (ADR-009).

	Covers ADR-009 acceptance gates:
	- gate 1: HintGenerator Protocol; layers satisfy it (runtime_checkable).
	- gate 2: TemplateHintGenerator is byte-identical to the existing dispatch()
	for all 5 registered kinds (no regression).
	- gate 3: CompositeHintGenerator tries layers cost-first — a tool_not_found
	site is served by the template layer (no LLM call); a style site falls
	through to the judge layer.
	- gate 4: LLMJudgeHintGenerator caches (second identical call = zero
	completions).
	- gate 5: as_collator_hook() matches CollatorConfig.hint_generator's
	(error_kind, error_meta) -> str \| None signature.

	All CPU-only, no network (LLM layer is a stub).
	"""
	from __future__ import annotations

	from composer_replication.hint_generator import (
	HINT_TEMPLATES,
	CompositeHintGenerator,
	HintGenerator,
	LLMJudgeHintGenerator,
	RawErrorHintGenerator,
	TemplateHintGenerator,
	default_composite,
	dispatch,
	)


	# --- gate 1: Protocol -------------------------------------------------------

	def test_layers_satisfy_protocol():
	assert isinstance(TemplateHintGenerator(), HintGenerator)
	assert isinstance(RawErrorHintGenerator(), HintGenerator)
	assert isinstance(LLMJudgeHintGenerator(), HintGenerator)
	assert isinstance(CompositeHintGenerator([]), HintGenerator)


	# --- gate 2: template byte-identity ----------------------------------------

	def test_template_layer_byte_identical_to_dispatch():
	tmpl = TemplateHintGenerator()
	meta = {
	"available_tools": ["read", "write"],
	"tool_name": "frobnicate",
	"tool_schema": {"x": "int"},
	"error_message": "boom",
	}
	for kind in HINT_TEMPLATES:
	ctx = dict(meta)
	ctx.setdefault("error_kind", kind)
	expected = dispatch(kind, ctx)
	got = tmpl.generate(kind, meta)
	assert got == expected, f"template layer drifted from dispatch for {kind}"


	def test_template_layer_returns_none_for_unknown_kind():
	assert TemplateHintGenerator().generate("totally_unknown_kind", {}) is None


	# --- gate 3: cost-ordered composite ----------------------------------------

	def test_composite_serves_tool_error_from_template_no_llm():
	calls = {"n": 0}

	def fake_complete(prompt: str) -> str:
	calls["n"] += 1
	return "LLM HINT"

	comp = default_composite(llm_complete=fake_complete)
	hint = comp.generate("tool_not_found", {"available_tools": ["read", "write"]})
	assert hint is not None
	assert "Available tools" in hint # template output
	assert calls["n"] == 0, "LLM judge must NOT be called for a template-covered site"


	def test_composite_falls_through_to_judge_for_uncovered_site():
	calls = {"n": 0}

	def fake_complete(prompt: str) -> str:
	calls["n"] += 1
	return "Be more concise; you repeated the same explanation."

	comp = default_composite(llm_complete=fake_complete, enable_raw_error=False)
	# 'verbose_communication' has no template and no error_message -> judge.
	hint = comp.generate("verbose_communication", {})
	assert hint == "Be more concise; you repeated the same explanation."
	assert calls["n"] == 1


	def test_raw_error_layer_covers_unmatched_site_with_message():
	comp = default_composite() # no LLM
	hint = comp.generate("weird_unmapped_error", {"error_message": "Segfault at 0x0"})
	assert hint is not None
	assert "Segfault at 0x0" in hint


	def test_composite_returns_none_when_all_layers_defer():
	comp = default_composite() # templates + raw-error, no LLM
	# unknown kind + no message -> nothing fires
	assert comp.generate("unknown", {}) is None


	# --- gate 4: LLM-judge cache ------------------------------------------------

	def test_llm_judge_caches_in_memory(tmp_path):
	calls = {"n": 0}

	def fake_complete(prompt: str) -> str:
	calls["n"] += 1
	return f"hint #{calls['n']}"

	judge = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path))
	meta = {"error_message": "X"}
	h1 = judge.generate("k", meta)
	h2 = judge.generate("k", meta) # identical -> cache hit
	assert h1 == h2
	assert calls["n"] == 1, "second identical call must hit cache (zero completions)"


	def test_llm_judge_disk_cache_survives_new_instance(tmp_path):
	calls = {"n": 0}

	def fake_complete(prompt: str) -> str:
	calls["n"] += 1
	return "persisted hint"

	j1 = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path))
	j1.generate("k", {"error_message": "X"})
	# fresh instance, same cache dir -> disk hit, no completion
	j2 = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path))
	h = j2.generate("k", {"error_message": "X"})
	assert h == "persisted hint"
	assert calls["n"] == 1


	def test_llm_judge_disabled_when_no_complete():
	assert LLMJudgeHintGenerator(None).generate("k", {"error_message": "X"}) is None


	# --- gate 5: collator-hook signature ---------------------------------------

	def test_as_collator_hook_matches_collator_signature():
	comp = default_composite()
	hook = comp.as_collator_hook()
	# CollatorConfig.hint_generator is Callable[[str, dict], str \| None]
	out = hook("tool_not_found", {"available_tools": ["read"]})
	assert isinstance(out, str)
	out_none = hook("unknown", {})
	assert out_none is None


	def test_as_collator_hook_drops_into_collator_config():
	"""The hook is accepted by CollatorConfig without changes."""
	from composer_replication.trainer.data_collator import CollatorConfig

	comp = default_composite()
	cfg = CollatorConfig(hint_generator=comp.as_collator_hook())
	assert cfg.hint_generator is not None
	assert cfg.hint_generator("json_decode", {}) is not None # template fires