Spaces:
Running on CPU Upgrade
Keep repetition guard internal (#144)
Browse files* Clarify repetition guard messaging
The loop breaker is intentional recovery behavior, but the user-facing 'doom loop' phrasing reads like a crash. Rename the visible log and system hint to a repetition guard while preserving the existing detector behavior and historical SFT tag compatibility.
Constraint: Existing trajectories may still contain the old wording, so the tagger must recognize both labels.
Rejected: Remove the event entirely | users would lose visibility into why the agent changed strategy.
Confidence: high
Scope-risk: narrow
Directive: Keep user-facing recovery logs operational and non-alarming; reserve internal jargon for code, not UI events.
Tested: UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/unit/test_doom_loop_polling.py tests/unit/test_sft_tagger.py
* Hide repetition guard from user surfaces
The repetition guard is an internal control-flow intervention, not a status event users need to act on. Remove the CLI/frontend tool_log emissions while keeping the internal corrective prompt and logger warnings.
Constraint: Historical trajectories can still contain the old doom-loop log text, so the SFT tagger test keeps compatibility for existing data.
Rejected: Show a renamed repetition-guard event | the requested behavior is that this remains fully internal and invisible in CLI/frontend surfaces.
Confidence: high
Scope-risk: narrow
Tested: UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/unit/test_doom_loop_polling.py tests/unit/test_sft_tagger.py
* Remove dead repetition tag branch
Review caught that no user-visible tool_log will contain the new repetition-guard text after this PR. Keep historical doom-loop log compatibility only and avoid a dead future-facing branch.
Confidence: high
Scope-risk: narrow
Tested: UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/unit/test_doom_loop_polling.py tests/unit/test_sft_tagger.py
|
@@ -681,15 +681,6 @@ class Handlers:
|
|
| 681 |
session.context_manager.add_message(
|
| 682 |
Message(role="user", content=doom_prompt)
|
| 683 |
)
|
| 684 |
-
await session.send_event(
|
| 685 |
-
Event(
|
| 686 |
-
event_type="tool_log",
|
| 687 |
-
data={
|
| 688 |
-
"tool": "system",
|
| 689 |
-
"log": "Doom loop detected β injecting corrective prompt",
|
| 690 |
-
},
|
| 691 |
-
)
|
| 692 |
-
)
|
| 693 |
|
| 694 |
malformed_tool = _detect_repeated_malformed(session.context_manager.items)
|
| 695 |
if malformed_tool:
|
|
|
|
| 681 |
session.context_manager.add_message(
|
| 682 |
Message(role="user", content=doom_prompt)
|
| 683 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
|
| 685 |
malformed_tool = _detect_repeated_malformed(session.context_manager.items)
|
| 686 |
if malformed_tool:
|
|
@@ -156,9 +156,13 @@ def check_for_doom_loop(messages: list[Message]) -> str | None:
|
|
| 156 |
# Check for identical consecutive calls
|
| 157 |
tool_name = detect_identical_consecutive(signatures, threshold=3)
|
| 158 |
if tool_name:
|
| 159 |
-
logger.warning(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
return (
|
| 161 |
-
f"[SYSTEM:
|
| 162 |
f"arguments multiple times in a row, getting the same result each time. "
|
| 163 |
f"STOP repeating this approach β it is not working. "
|
| 164 |
f"Step back and try a fundamentally different strategy. "
|
|
@@ -170,9 +174,9 @@ def check_for_doom_loop(messages: list[Message]) -> str | None:
|
|
| 170 |
pattern = detect_repeating_sequence(signatures)
|
| 171 |
if pattern:
|
| 172 |
pattern_desc = " β ".join(s.name for s in pattern)
|
| 173 |
-
logger.warning("
|
| 174 |
return (
|
| 175 |
-
f"[SYSTEM:
|
| 176 |
f"[{pattern_desc}]. This pattern has repeated multiple times without progress. "
|
| 177 |
f"STOP this cycle and try a fundamentally different approach. "
|
| 178 |
f"Consider: breaking down the problem differently, using alternative tools, "
|
|
|
|
| 156 |
# Check for identical consecutive calls
|
| 157 |
tool_name = detect_identical_consecutive(signatures, threshold=3)
|
| 158 |
if tool_name:
|
| 159 |
+
logger.warning(
|
| 160 |
+
"Repetition guard activated: %d+ identical consecutive calls to '%s'",
|
| 161 |
+
3,
|
| 162 |
+
tool_name,
|
| 163 |
+
)
|
| 164 |
return (
|
| 165 |
+
f"[SYSTEM: REPETITION GUARD] You have called '{tool_name}' with the same "
|
| 166 |
f"arguments multiple times in a row, getting the same result each time. "
|
| 167 |
f"STOP repeating this approach β it is not working. "
|
| 168 |
f"Step back and try a fundamentally different strategy. "
|
|
|
|
| 174 |
pattern = detect_repeating_sequence(signatures)
|
| 175 |
if pattern:
|
| 176 |
pattern_desc = " β ".join(s.name for s in pattern)
|
| 177 |
+
logger.warning("Repetition guard activated: repeating sequence [%s]", pattern_desc)
|
| 178 |
return (
|
| 179 |
+
f"[SYSTEM: REPETITION GUARD] You are stuck in a repeating cycle of tool calls: "
|
| 180 |
f"[{pattern_desc}]. This pattern has repeated multiple times without progress. "
|
| 181 |
f"STOP this cycle and try a fundamentally different approach. "
|
| 182 |
f"Consider: breaking down the problem differently, using alternative tools, "
|
|
@@ -306,8 +306,10 @@ async def research_handler(
|
|
| 306 |
# ββ Doom-loop detection ββ
|
| 307 |
doom_prompt = check_for_doom_loop(messages)
|
| 308 |
if doom_prompt:
|
| 309 |
-
logger.warning(
|
| 310 |
-
|
|
|
|
|
|
|
| 311 |
messages.append(Message(role="user", content=doom_prompt))
|
| 312 |
|
| 313 |
# ββ Context budget: warn at 75%, hard-stop at 95% ββ
|
|
|
|
| 306 |
# ββ Doom-loop detection ββ
|
| 307 |
doom_prompt = check_for_doom_loop(messages)
|
| 308 |
if doom_prompt:
|
| 309 |
+
logger.warning(
|
| 310 |
+
"Research sub-agent repetition guard activated at iteration %d",
|
| 311 |
+
_iteration,
|
| 312 |
+
)
|
| 313 |
messages.append(Message(role="user", content=doom_prompt))
|
| 314 |
|
| 315 |
# ββ Context budget: warn at 75%, hard-stop at 95% ββ
|
|
@@ -5,7 +5,7 @@ Reproduces the failure mode in observatory sessions 40fcb414 ($32.59),
|
|
| 5 |
long-running job with `bash sleep 300 && wc -l output` four times in a
|
| 6 |
row. The arguments were byte-identical, but the results moved (27210 β
|
| 7 |
36454 β 45770 β 55138 β actual progress). The detector hashed args only
|
| 8 |
-
and false-fired
|
| 9 |
polling.
|
| 10 |
|
| 11 |
After the fix the signature includes the tool result hash, so identical
|
|
@@ -66,7 +66,7 @@ def test_truly_stuck_polling_with_identical_results_still_fires():
|
|
| 66 |
]
|
| 67 |
prompt = check_for_doom_loop(msgs)
|
| 68 |
assert prompt is not None
|
| 69 |
-
assert "
|
| 70 |
assert "bash" in prompt
|
| 71 |
|
| 72 |
|
|
@@ -80,7 +80,7 @@ def test_identical_calls_with_no_results_yet_still_fires():
|
|
| 80 |
]
|
| 81 |
prompt = check_for_doom_loop(msgs)
|
| 82 |
assert prompt is not None
|
| 83 |
-
assert "
|
| 84 |
assert "write" in prompt
|
| 85 |
|
| 86 |
|
|
|
|
| 5 |
long-running job with `bash sleep 300 && wc -l output` four times in a
|
| 6 |
row. The arguments were byte-identical, but the results moved (27210 β
|
| 7 |
36454 β 45770 β 55138 β actual progress). The detector hashed args only
|
| 8 |
+
and false-fired the repetition guard, which made the agent abandon perfectly valid
|
| 9 |
polling.
|
| 10 |
|
| 11 |
After the fix the signature includes the tool result hash, so identical
|
|
|
|
| 66 |
]
|
| 67 |
prompt = check_for_doom_loop(msgs)
|
| 68 |
assert prompt is not None
|
| 69 |
+
assert "REPETITION GUARD" in prompt
|
| 70 |
assert "bash" in prompt
|
| 71 |
|
| 72 |
|
|
|
|
| 80 |
]
|
| 81 |
prompt = check_for_doom_loop(msgs)
|
| 82 |
assert prompt is not None
|
| 83 |
+
assert "REPETITION GUARD" in prompt
|
| 84 |
assert "write" in prompt
|
| 85 |
|
| 86 |
|
|
@@ -79,7 +79,7 @@ def test_outcome_ongoing():
|
|
| 79 |
|
| 80 |
def test_outcome_doom_loop_and_context():
|
| 81 |
events = [
|
| 82 |
-
_ev("tool_log", {"tool": "system", "log": "Doom loop detected
|
| 83 |
_ev("compacted", {"old_tokens": 100, "new_tokens": 50}),
|
| 84 |
_ev("turn_complete", {"history_size": 10}),
|
| 85 |
]
|
|
|
|
| 79 |
|
| 80 |
def test_outcome_doom_loop_and_context():
|
| 81 |
events = [
|
| 82 |
+
_ev("tool_log", {"tool": "system", "log": "Doom loop detected"}),
|
| 83 |
_ev("compacted", {"old_tokens": 100, "new_tokens": 50}),
|
| 84 |
_ev("turn_complete", {"history_size": 10}),
|
| 85 |
]
|