Reinforcement Learning
Transformers
English
post-training
distillation
agentic-coding
composer-2.5
cursor
kimi-k2
grpo
dapo
diloco
openenv
trl
verl
research
methodology
Instructions to use Codeseys/composer-replication-framework with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Codeseys/composer-replication-framework with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Codeseys/composer-replication-framework", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """Tests for error-kind hint routing on the DEFAULT composite (ADR-012 #2). | |
| The default composite is template -> raw-error -> judge. Before ADR-012 the | |
| raw-error layer consumed ANY site carrying an `error_message`, including | |
| style/communication/effort sites — exactly the sites the LLM judge exists to | |
| cover. These tests validate the DEFAULT path (raw-error NOT disabled): a | |
| style/communication site WITH an error_message routes through to the judge, | |
| while tool/runtime sites still use the raw-error layer. | |
| """ | |
| from __future__ import annotations | |
| from composer_replication.hint_generator import ( | |
| RoutingHintGenerator, | |
| RawErrorHintGenerator, | |
| default_composite, | |
| is_tool_runtime_kind, | |
| ) | |
| # --- the headline acceptance: style site reaches judge on the DEFAULT path --- | |
| def test_style_site_with_error_message_reaches_judge_on_default_composite(): | |
| calls = {"n": 0} | |
| def fake_complete(prompt: str) -> str: | |
| calls["n"] += 1 | |
| return "Be more concise; you repeated the same explanation twice." | |
| # NOTE: raw-error is ENABLED (the default). Pre-ADR-012 this would have been | |
| # eaten by the raw-error layer and the judge never called. | |
| comp = default_composite(llm_complete=fake_complete) # enable_raw_error=True | |
| hint = comp.generate( | |
| "verbose_communication", | |
| {"error_message": "The agent restated the plan three times."}, | |
| ) | |
| assert hint == "Be more concise; you repeated the same explanation twice." | |
| assert calls["n"] == 1, "style site must reach the judge, not the raw-error layer" | |
| def test_effort_site_with_message_routes_to_judge(): | |
| calls = {"n": 0} | |
| def fake_complete(prompt: str) -> str: | |
| calls["n"] += 1 | |
| return "Don't pad the answer; one example suffices." | |
| comp = default_composite(llm_complete=fake_complete) | |
| hint = comp.generate("low_effort_style", {"error_message": "padding detected"}) | |
| assert hint == "Don't pad the answer; one example suffices." | |
| assert calls["n"] == 1 | |
| # --- tool/runtime sites still served by raw-error (no regression) ----------- | |
| def test_tool_runtime_site_still_served_by_raw_error_no_judge(): | |
| calls = {"n": 0} | |
| def fake_complete(prompt: str) -> str: | |
| calls["n"] += 1 | |
| return "JUDGE (should not be called)" | |
| comp = default_composite(llm_complete=fake_complete) | |
| # an unmapped *runtime* error (no template) -> raw-error layer, not judge. | |
| hint = comp.generate("weird_runtime_error", {"error_message": "Segfault at 0x0"}) | |
| assert hint is not None | |
| assert "Segfault at 0x0" in hint | |
| assert calls["n"] == 0, "tool/runtime sites must be served by raw-error, not judge" | |
| def test_template_site_unaffected_by_routing(): | |
| comp = default_composite() # no judge | |
| hint = comp.generate("tool_not_found", {"available_tools": ["read", "write"]}) | |
| assert hint is not None and "Available tools" in hint | |
| # --- the route predicate ---------------------------------------------------- | |
| def test_route_predicate_classifies_kinds(): | |
| # tool/runtime | |
| for k in ("tool_not_found", "json_decode", "type_error", "runtime_error", | |
| "repeated_failure", "weird_runtime_error", "some_exception", | |
| "weird_unmapped_error"): | |
| assert is_tool_runtime_kind(k) is True, k | |
| # style/communication/effort | |
| for k in ("verbose_communication", "low_effort_style", "tone_violation", | |
| "rambling_explanation", "bad_formatting"): | |
| assert is_tool_runtime_kind(k) is False, k | |
| def test_routing_generator_returns_none_for_style_kind(): | |
| routed = RoutingHintGenerator(RawErrorHintGenerator()) | |
| # style kind WITH a message -> None (defer to judge), even though the inner | |
| # raw-error layer would have produced a hint. | |
| assert routed.generate("verbose_style", {"error_message": "too long"}) is None | |
| # tool/runtime kind WITH a message -> inner fires. | |
| out = routed.generate("runtime_error", {"error_message": "boom"}) | |
| assert out is not None and "boom" in out | |