"""Tests for the layered HintGenerator architecture (ADR-009). Covers ADR-009 acceptance gates: - gate 1: HintGenerator Protocol; layers satisfy it (runtime_checkable). - gate 2: TemplateHintGenerator is byte-identical to the existing dispatch() for all 5 registered kinds (no regression). - gate 3: CompositeHintGenerator tries layers cost-first — a tool_not_found site is served by the template layer (no LLM call); a style site falls through to the judge layer. - gate 4: LLMJudgeHintGenerator caches (second identical call = zero completions). - gate 5: as_collator_hook() matches CollatorConfig.hint_generator's (error_kind, error_meta) -> str | None signature. All CPU-only, no network (LLM layer is a stub). """ from __future__ import annotations from composer_replication.hint_generator import ( HINT_TEMPLATES, CompositeHintGenerator, HintGenerator, LLMJudgeHintGenerator, RawErrorHintGenerator, TemplateHintGenerator, default_composite, dispatch, ) # --- gate 1: Protocol ------------------------------------------------------- def test_layers_satisfy_protocol(): assert isinstance(TemplateHintGenerator(), HintGenerator) assert isinstance(RawErrorHintGenerator(), HintGenerator) assert isinstance(LLMJudgeHintGenerator(), HintGenerator) assert isinstance(CompositeHintGenerator([]), HintGenerator) # --- gate 2: template byte-identity ---------------------------------------- def test_template_layer_byte_identical_to_dispatch(): tmpl = TemplateHintGenerator() meta = { "available_tools": ["read", "write"], "tool_name": "frobnicate", "tool_schema": {"x": "int"}, "error_message": "boom", } for kind in HINT_TEMPLATES: ctx = dict(meta) ctx.setdefault("error_kind", kind) expected = dispatch(kind, ctx) got = tmpl.generate(kind, meta) assert got == expected, f"template layer drifted from dispatch for {kind}" def test_template_layer_returns_none_for_unknown_kind(): assert TemplateHintGenerator().generate("totally_unknown_kind", {}) is None # --- gate 3: cost-ordered composite ---------------------------------------- def test_composite_serves_tool_error_from_template_no_llm(): calls = {"n": 0} def fake_complete(prompt: str) -> str: calls["n"] += 1 return "LLM HINT" comp = default_composite(llm_complete=fake_complete) hint = comp.generate("tool_not_found", {"available_tools": ["read", "write"]}) assert hint is not None assert "Available tools" in hint # template output assert calls["n"] == 0, "LLM judge must NOT be called for a template-covered site" def test_composite_falls_through_to_judge_for_uncovered_site(): calls = {"n": 0} def fake_complete(prompt: str) -> str: calls["n"] += 1 return "Be more concise; you repeated the same explanation." comp = default_composite(llm_complete=fake_complete, enable_raw_error=False) # 'verbose_communication' has no template and no error_message -> judge. hint = comp.generate("verbose_communication", {}) assert hint == "Be more concise; you repeated the same explanation." assert calls["n"] == 1 def test_raw_error_layer_covers_unmatched_site_with_message(): comp = default_composite() # no LLM hint = comp.generate("weird_unmapped_error", {"error_message": "Segfault at 0x0"}) assert hint is not None assert "Segfault at 0x0" in hint def test_composite_returns_none_when_all_layers_defer(): comp = default_composite() # templates + raw-error, no LLM # unknown kind + no message -> nothing fires assert comp.generate("unknown", {}) is None # --- gate 4: LLM-judge cache ------------------------------------------------ def test_llm_judge_caches_in_memory(tmp_path): calls = {"n": 0} def fake_complete(prompt: str) -> str: calls["n"] += 1 return f"hint #{calls['n']}" judge = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path)) meta = {"error_message": "X"} h1 = judge.generate("k", meta) h2 = judge.generate("k", meta) # identical -> cache hit assert h1 == h2 assert calls["n"] == 1, "second identical call must hit cache (zero completions)" def test_llm_judge_disk_cache_survives_new_instance(tmp_path): calls = {"n": 0} def fake_complete(prompt: str) -> str: calls["n"] += 1 return "persisted hint" j1 = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path)) j1.generate("k", {"error_message": "X"}) # fresh instance, same cache dir -> disk hit, no completion j2 = LLMJudgeHintGenerator(fake_complete, cache_dir=str(tmp_path)) h = j2.generate("k", {"error_message": "X"}) assert h == "persisted hint" assert calls["n"] == 1 def test_llm_judge_disabled_when_no_complete(): assert LLMJudgeHintGenerator(None).generate("k", {"error_message": "X"}) is None # --- gate 5: collator-hook signature --------------------------------------- def test_as_collator_hook_matches_collator_signature(): comp = default_composite() hook = comp.as_collator_hook() # CollatorConfig.hint_generator is Callable[[str, dict], str | None] out = hook("tool_not_found", {"available_tools": ["read"]}) assert isinstance(out, str) out_none = hook("unknown", {}) assert out_none is None def test_as_collator_hook_drops_into_collator_config(): """The hook is accepted by CollatorConfig without changes.""" from composer_replication.trainer.data_collator import CollatorConfig comp = default_composite() cfg = CollatorConfig(hint_generator=comp.as_collator_hook()) assert cfg.hint_generator is not None assert cfg.hint_generator("json_decode", {}) is not None # template fires