d πŸ”Ή d πŸ”Ή Aksel Joonas Reedi commited on
Commit
07c5699
Β·
unverified Β·
1 Parent(s): 8e93e94

fix(doom_loop): normalize tool-call args before hashing (#119)

Browse files

The doom-loop detector hashed raw `function.arguments` strings, so
semantically-identical tool calls hashed differently when the LLM emitted
them with different key orderings (`{"a":1,"b":2}` vs `{"b":2,"a":1}`)
or whitespace (`{"a":1}` vs `{"a": 1}`). This silently broke
`detect_identical_consecutive` and `detect_repeating_sequence`: the
agent could be calling the same tool with the same args repeatedly and
the detector would see three distinct signatures and stay quiet.

Issue #61 P1 explicitly calls this out:
> Add semantic-similarity or normalized-task matching for `research`.

Fix: parse-and-redump JSON via `json.dumps(..., sort_keys=True,
separators=(",", ":"))` before hashing. Falls back to the raw string
when the input isn't valid JSON so non-JSON `arguments` strings (rare
edge for some providers) keep the legacy behaviour and never raise.

Tests: 23 new cases in `tests/unit/test_doom_loop.py` covering
`_normalize_args`, `_hash_args`, `extract_recent_tool_signatures`,
`detect_identical_consecutive`, `detect_repeating_sequence`, and the
`check_for_doom_loop` entry point. Includes the headline regression β€”
three reordered-key calls collapsing to one signature β€” plus negative
cases (different values, different array orderings, sub-threshold
counts, broken pattern).

Co-authored-by: d πŸ”Ή <258577966+voidborne-d@users.noreply.github.com>
Co-authored-by: Aksel Joonas Reedi <125026660+akseljoonas@users.noreply.github.com>

agent/core/doom_loop.py CHANGED
@@ -24,9 +24,36 @@ class ToolCallSignature:
24
  result_hash: str | None = None
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def _hash_args(args_str: str) -> str:
28
- """Return a short hash of the JSON arguments string."""
29
- return hashlib.md5(args_str.encode()).hexdigest()[:12]
 
 
 
 
 
30
 
31
 
32
  def extract_recent_tool_signatures(
 
24
  result_hash: str | None = None
25
 
26
 
27
+ def _normalize_args(args_str: str) -> str:
28
+ """Canonicalise a tool-call arguments string before hashing.
29
+
30
+ LLMs can emit semantically-identical JSON for the same call with different
31
+ key orderings (``{"a": 1, "b": 2}`` vs ``{"b": 2, "a": 1}``) or whitespace
32
+ (``{"a":1}`` vs ``{"a": 1}``). Hashing the raw bytes makes the doom-loop
33
+ detector miss those repeats. We parse-and-redump with ``sort_keys=True``
34
+ plus the most compact separators so trivially-different spellings collapse
35
+ to the same canonical form.
36
+
37
+ Falls back to the original string if the input isn't valid JSON (e.g. a
38
+ handful of providers occasionally pass a bare string for ``arguments``);
39
+ that path keeps the legacy behaviour and never raises.
40
+ """
41
+ if not args_str:
42
+ return ""
43
+ try:
44
+ return json.dumps(json.loads(args_str), sort_keys=True, separators=(",", ":"))
45
+ except (json.JSONDecodeError, TypeError, ValueError):
46
+ return args_str
47
+
48
+
49
  def _hash_args(args_str: str) -> str:
50
+ """Return a short hash of the JSON arguments string.
51
+
52
+ The input is normalised via :func:`_normalize_args` first so that
53
+ semantically-identical tool calls produce the same hash regardless of key
54
+ order or whitespace.
55
+ """
56
+ return hashlib.md5(_normalize_args(args_str).encode()).hexdigest()[:12]
57
 
58
 
59
  def extract_recent_tool_signatures(
tests/unit/test_doom_loop.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the doom-loop detector β€” repeated/cycling tool call patterns."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from agent.core.doom_loop import (
6
+ ToolCallSignature,
7
+ _hash_args,
8
+ _normalize_args,
9
+ check_for_doom_loop,
10
+ detect_identical_consecutive,
11
+ detect_repeating_sequence,
12
+ extract_recent_tool_signatures,
13
+ )
14
+
15
+
16
+ # ── Lightweight stand-ins so we don't need the litellm message classes ──
17
+
18
+
19
+ @dataclass
20
+ class _Fn:
21
+ name: str
22
+ arguments: str
23
+
24
+
25
+ @dataclass
26
+ class _ToolCall:
27
+ function: _Fn
28
+
29
+
30
+ @dataclass
31
+ class _Msg:
32
+ role: str
33
+ tool_calls: list | None = None
34
+
35
+
36
+ def _assistant_call(name: str, args: str) -> _Msg:
37
+ return _Msg(role="assistant", tool_calls=[_ToolCall(_Fn(name, args))])
38
+
39
+
40
+ # ── _normalize_args / _hash_args ────────────────────────────────────────
41
+
42
+
43
+ def test_normalize_args_collapses_key_order():
44
+ a = '{"path": "/foo", "query": "bar"}'
45
+ b = '{"query": "bar", "path": "/foo"}'
46
+ assert _normalize_args(a) == _normalize_args(b)
47
+
48
+
49
+ def test_normalize_args_collapses_whitespace():
50
+ a = '{"path": "/foo", "query": "bar"}'
51
+ b = '{"path":"/foo","query":"bar"}'
52
+ assert _normalize_args(a) == _normalize_args(b)
53
+
54
+
55
+ def test_normalize_args_preserves_value_difference():
56
+ a = '{"path": "/foo"}'
57
+ b = '{"path": "/bar"}'
58
+ assert _normalize_args(a) != _normalize_args(b)
59
+
60
+
61
+ def test_normalize_args_preserves_nested_structure():
62
+ a = '{"a": {"x": 1, "y": 2}, "b": [3, 4]}'
63
+ b = '{"b": [3, 4], "a": {"y": 2, "x": 1}}'
64
+ assert _normalize_args(a) == _normalize_args(b)
65
+
66
+
67
+ def test_normalize_args_array_order_is_significant():
68
+ # Lists are positional β€” different orderings should NOT collapse.
69
+ a = '{"items": [1, 2, 3]}'
70
+ b = '{"items": [3, 2, 1]}'
71
+ assert _normalize_args(a) != _normalize_args(b)
72
+
73
+
74
+ def test_normalize_args_falls_back_for_invalid_json():
75
+ # Some providers occasionally pass a bare string; we shouldn't raise.
76
+ assert _normalize_args("not json") == "not json"
77
+ assert _normalize_args("{broken") == "{broken"
78
+
79
+
80
+ def test_normalize_args_handles_empty_string():
81
+ assert _normalize_args("") == ""
82
+
83
+
84
+ def test_hash_args_collapses_semantically_identical_calls():
85
+ # The headline regression: pre-fix these hashed differently and the
86
+ # doom-loop detector silently missed identical-consecutive calls.
87
+ a = '{"path": "/foo", "query": "bar"}'
88
+ b = '{"query": "bar", "path": "/foo"}'
89
+ assert _hash_args(a) == _hash_args(b)
90
+
91
+
92
+ def test_hash_args_still_differs_on_real_argument_change():
93
+ assert _hash_args('{"path": "/a"}') != _hash_args('{"path": "/b"}')
94
+
95
+
96
+ # ── extract_recent_tool_signatures ──────────────────────────────────────
97
+
98
+
99
+ def test_extract_recent_signatures_collapses_reordered_keys():
100
+ """Three calls with reordered keys should produce identical signatures."""
101
+ msgs = [
102
+ _assistant_call("read", '{"path": "/foo", "limit": 100}'),
103
+ _assistant_call("read", '{"limit": 100, "path": "/foo"}'),
104
+ _assistant_call("read", '{"path":"/foo","limit":100}'),
105
+ ]
106
+ sigs = extract_recent_tool_signatures(msgs)
107
+ assert len(sigs) == 3
108
+ assert sigs[0] == sigs[1] == sigs[2]
109
+
110
+
111
+ def test_extract_skips_non_assistant_messages():
112
+ msgs = [
113
+ _Msg(role="user", tool_calls=None),
114
+ _assistant_call("read", '{"path": "/x"}'),
115
+ _Msg(role="tool", tool_calls=None),
116
+ ]
117
+ sigs = extract_recent_tool_signatures(msgs)
118
+ assert len(sigs) == 1
119
+ assert sigs[0].name == "read"
120
+
121
+
122
+ def test_extract_skips_assistant_without_tool_calls():
123
+ msgs = [_Msg(role="assistant", tool_calls=None)]
124
+ assert extract_recent_tool_signatures(msgs) == []
125
+
126
+
127
+ # ── detect_identical_consecutive ────────────────────────────────────────
128
+
129
+
130
+ def _sig(name: str, args: str = "{}") -> ToolCallSignature:
131
+ return ToolCallSignature(name=name, args_hash=_hash_args(args))
132
+
133
+
134
+ def test_identical_consecutive_fires_at_threshold():
135
+ sigs = [_sig("read", '{"p": 1}')] * 3
136
+ assert detect_identical_consecutive(sigs, threshold=3) == "read"
137
+
138
+
139
+ def test_identical_consecutive_stays_silent_below_threshold():
140
+ sigs = [_sig("read", '{"p": 1}')] * 2
141
+ assert detect_identical_consecutive(sigs, threshold=3) is None
142
+
143
+
144
+ def test_identical_consecutive_resets_on_break():
145
+ # A, A, B, A, A β€” never 3 in a row.
146
+ sigs = [
147
+ _sig("read", '{"p": 1}'),
148
+ _sig("read", '{"p": 1}'),
149
+ _sig("read", '{"p": 2}'),
150
+ _sig("read", '{"p": 1}'),
151
+ _sig("read", '{"p": 1}'),
152
+ ]
153
+ assert detect_identical_consecutive(sigs, threshold=3) is None
154
+
155
+
156
+ def test_identical_consecutive_catches_reordered_args_after_normalization():
157
+ """Regression for the bug: same call with shuffled keys must collapse."""
158
+ msgs = [
159
+ _assistant_call("research", '{"task": "find paper", "depth": 3}'),
160
+ _assistant_call("research", '{"depth": 3, "task": "find paper"}'),
161
+ _assistant_call("research", '{"task":"find paper","depth":3}'),
162
+ ]
163
+ sigs = extract_recent_tool_signatures(msgs)
164
+ assert detect_identical_consecutive(sigs, threshold=3) == "research"
165
+
166
+
167
+ # ── detect_repeating_sequence ───────────────────────────────────────────
168
+
169
+
170
+ def test_repeating_sequence_catches_alternating_pair():
171
+ sigs = [_sig("a"), _sig("b")] * 3
172
+ pattern = detect_repeating_sequence(sigs)
173
+ assert pattern is not None
174
+ assert [s.name for s in pattern] == ["a", "b"]
175
+
176
+
177
+ def test_repeating_sequence_misses_when_pattern_breaks():
178
+ sigs = [_sig("a"), _sig("b"), _sig("a"), _sig("c")]
179
+ assert detect_repeating_sequence(sigs) is None
180
+
181
+
182
+ def test_repeating_sequence_normalizes_args_inside_pattern():
183
+ """Cycle [research, read, research, read, ...] survives key reordering."""
184
+ msgs = [
185
+ _assistant_call("research", '{"q": "x", "n": 1}'),
186
+ _assistant_call("read", '{"path": "/a"}'),
187
+ _assistant_call("research", '{"n": 1, "q": "x"}'),
188
+ _assistant_call("read", '{"path":"/a"}'),
189
+ _assistant_call("research", '{"q":"x","n":1}'),
190
+ _assistant_call("read", '{"path": "/a"}'),
191
+ ]
192
+ sigs = extract_recent_tool_signatures(msgs)
193
+ pattern = detect_repeating_sequence(sigs)
194
+ assert pattern is not None
195
+ assert [s.name for s in pattern] == ["research", "read"]
196
+
197
+
198
+ # ── check_for_doom_loop ─────────────────────────────────────────────────
199
+
200
+
201
+ def test_check_for_doom_loop_quiet_below_minimum_signatures():
202
+ msgs = [_assistant_call("read", '{"p": 1}'), _assistant_call("read", '{"p": 1}')]
203
+ assert check_for_doom_loop(msgs) is None
204
+
205
+
206
+ def test_check_for_doom_loop_returns_corrective_prompt_for_identical_run():
207
+ msgs = [_assistant_call("read", '{"p": 1}')] * 3
208
+ out = check_for_doom_loop(msgs)
209
+ assert out is not None
210
+ assert "DOOM LOOP DETECTED" in out
211
+ assert "'read'" in out
212
+
213
+
214
+ def test_check_for_doom_loop_returns_corrective_prompt_for_cycle():
215
+ msgs = []
216
+ for _ in range(3):
217
+ msgs.append(_assistant_call("a", "{}"))
218
+ msgs.append(_assistant_call("b", "{}"))
219
+ out = check_for_doom_loop(msgs)
220
+ assert out is not None
221
+ assert "DOOM LOOP DETECTED" in out
222
+ assert "a β†’ b" in out
223
+
224
+
225
+ def test_check_for_doom_loop_quiet_when_args_meaningfully_differ():
226
+ """Same tool, three different arg values β€” not a loop."""
227
+ msgs = [
228
+ _assistant_call("read", '{"path": "/a.py"}'),
229
+ _assistant_call("read", '{"path": "/b.py"}'),
230
+ _assistant_call("read", '{"path": "/c.py"}'),
231
+ ]
232
+ assert check_for_doom_loop(msgs) is None