Reinforcement Learning
Transformers
English
post-training
distillation
agentic-coding
composer-2.5
cursor
kimi-k2
grpo
dapo
diloco
openenv
trl
verl
research
methodology
Instructions to use Codeseys/composer-replication-framework with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Codeseys/composer-replication-framework with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Codeseys/composer-replication-framework", dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 3,996 Bytes
d02d724 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | """Tests for error-kind hint routing on the DEFAULT composite (ADR-012 #2).
The default composite is template -> raw-error -> judge. Before ADR-012 the
raw-error layer consumed ANY site carrying an `error_message`, including
style/communication/effort sites — exactly the sites the LLM judge exists to
cover. These tests validate the DEFAULT path (raw-error NOT disabled): a
style/communication site WITH an error_message routes through to the judge,
while tool/runtime sites still use the raw-error layer.
"""
from __future__ import annotations
from composer_replication.hint_generator import (
RoutingHintGenerator,
RawErrorHintGenerator,
default_composite,
is_tool_runtime_kind,
)
# --- the headline acceptance: style site reaches judge on the DEFAULT path ---
def test_style_site_with_error_message_reaches_judge_on_default_composite():
calls = {"n": 0}
def fake_complete(prompt: str) -> str:
calls["n"] += 1
return "Be more concise; you repeated the same explanation twice."
# NOTE: raw-error is ENABLED (the default). Pre-ADR-012 this would have been
# eaten by the raw-error layer and the judge never called.
comp = default_composite(llm_complete=fake_complete) # enable_raw_error=True
hint = comp.generate(
"verbose_communication",
{"error_message": "The agent restated the plan three times."},
)
assert hint == "Be more concise; you repeated the same explanation twice."
assert calls["n"] == 1, "style site must reach the judge, not the raw-error layer"
def test_effort_site_with_message_routes_to_judge():
calls = {"n": 0}
def fake_complete(prompt: str) -> str:
calls["n"] += 1
return "Don't pad the answer; one example suffices."
comp = default_composite(llm_complete=fake_complete)
hint = comp.generate("low_effort_style", {"error_message": "padding detected"})
assert hint == "Don't pad the answer; one example suffices."
assert calls["n"] == 1
# --- tool/runtime sites still served by raw-error (no regression) -----------
def test_tool_runtime_site_still_served_by_raw_error_no_judge():
calls = {"n": 0}
def fake_complete(prompt: str) -> str:
calls["n"] += 1
return "JUDGE (should not be called)"
comp = default_composite(llm_complete=fake_complete)
# an unmapped *runtime* error (no template) -> raw-error layer, not judge.
hint = comp.generate("weird_runtime_error", {"error_message": "Segfault at 0x0"})
assert hint is not None
assert "Segfault at 0x0" in hint
assert calls["n"] == 0, "tool/runtime sites must be served by raw-error, not judge"
def test_template_site_unaffected_by_routing():
comp = default_composite() # no judge
hint = comp.generate("tool_not_found", {"available_tools": ["read", "write"]})
assert hint is not None and "Available tools" in hint
# --- the route predicate ----------------------------------------------------
def test_route_predicate_classifies_kinds():
# tool/runtime
for k in ("tool_not_found", "json_decode", "type_error", "runtime_error",
"repeated_failure", "weird_runtime_error", "some_exception",
"weird_unmapped_error"):
assert is_tool_runtime_kind(k) is True, k
# style/communication/effort
for k in ("verbose_communication", "low_effort_style", "tone_violation",
"rambling_explanation", "bad_formatting"):
assert is_tool_runtime_kind(k) is False, k
def test_routing_generator_returns_none_for_style_kind():
routed = RoutingHintGenerator(RawErrorHintGenerator())
# style kind WITH a message -> None (defer to judge), even though the inner
# raw-error layer would have produced a hint.
assert routed.generate("verbose_style", {"error_message": "too long"}) is None
# tool/runtime kind WITH a message -> inner fires.
out = routed.generate("runtime_error", {"error_message": "boom"})
assert out is not None and "boom" in out
|