ml-intern

Sleeping

d 🔹 d 🔹 Aksel Joonas Reedi commited on Apr 27

Commit

07c5699

unverified ·

1 Parent(s): 8e93e94

fix(doom_loop): normalize tool-call args before hashing (#119)

The doom-loop detector hashed raw `function.arguments` strings, so
semantically-identical tool calls hashed differently when the LLM emitted
them with different key orderings (`{"a":1,"b":2}` vs `{"b":2,"a":1}`)
or whitespace (`{"a":1}` vs `{"a": 1}`). This silently broke
`detect_identical_consecutive` and `detect_repeating_sequence`: the
agent could be calling the same tool with the same args repeatedly and
the detector would see three distinct signatures and stay quiet.

Issue #61 P1 explicitly calls this out:
> Add semantic-similarity or normalized-task matching for `research`.

Fix: parse-and-redump JSON via `json.dumps(..., sort_keys=True,
separators=(",", ":"))` before hashing. Falls back to the raw string
when the input isn't valid JSON so non-JSON `arguments` strings (rare
edge for some providers) keep the legacy behaviour and never raise.

Tests: 23 new cases in `tests/unit/test_doom_loop.py` covering
`_normalize_args`, `_hash_args`, `extract_recent_tool_signatures`,
`detect_identical_consecutive`, `detect_repeating_sequence`, and the
`check_for_doom_loop` entry point. Includes the headline regression —
three reordered-key calls collapsing to one signature — plus negative
cases (different values, different array orderings, sub-threshold
counts, broken pattern).

Co-authored-by: d 🔹 <258577966+voidborne-d@users.noreply.github.com>
Co-authored-by: Aksel Joonas Reedi <125026660+akseljoonas@users.noreply.github.com>

Files changed (2) hide show

agent/core/doom_loop.py +29 -2
tests/unit/test_doom_loop.py +232 -0

agent/core/doom_loop.py CHANGED Viewed

@@ -24,9 +24,36 @@ class ToolCallSignature:
     result_hash: str | None = None
 def _hash_args(args_str: str) -> str:
-    """Return a short hash of the JSON arguments string."""
-    return hashlib.md5(args_str.encode()).hexdigest()[:12]
 def extract_recent_tool_signatures(

     result_hash: str | None = None
+def _normalize_args(args_str: str) -> str:
+    """Canonicalise a tool-call arguments string before hashing.
+    LLMs can emit semantically-identical JSON for the same call with different
+    key orderings (``{"a": 1, "b": 2}`` vs ``{"b": 2, "a": 1}``) or whitespace
+    (``{"a":1}`` vs ``{"a": 1}``). Hashing the raw bytes makes the doom-loop
+    detector miss those repeats. We parse-and-redump with ``sort_keys=True``
+    plus the most compact separators so trivially-different spellings collapse
+    to the same canonical form.
+    Falls back to the original string if the input isn't valid JSON (e.g. a
+    handful of providers occasionally pass a bare string for ``arguments``);
+    that path keeps the legacy behaviour and never raises.
+    """
+    if not args_str:
+        return ""
+    try:
+        return json.dumps(json.loads(args_str), sort_keys=True, separators=(",", ":"))
+    except (json.JSONDecodeError, TypeError, ValueError):
+        return args_str
 def _hash_args(args_str: str) -> str:
+    """Return a short hash of the JSON arguments string.
+    The input is normalised via :func:`_normalize_args` first so that
+    semantically-identical tool calls produce the same hash regardless of key
+    order or whitespace.
+    """
+    return hashlib.md5(_normalize_args(args_str).encode()).hexdigest()[:12]
 def extract_recent_tool_signatures(

tests/unit/test_doom_loop.py ADDED Viewed

	@@ -0,0 +1,232 @@

+"""Tests for the doom-loop detector — repeated/cycling tool call patterns."""
+from dataclasses import dataclass
+from agent.core.doom_loop import (
+    ToolCallSignature,
+    _hash_args,
+    _normalize_args,
+    check_for_doom_loop,
+    detect_identical_consecutive,
+    detect_repeating_sequence,
+    extract_recent_tool_signatures,
+)
+# ── Lightweight stand-ins so we don't need the litellm message classes ──
+@dataclass
+class _Fn:
+    name: str
+    arguments: str
+@dataclass
+class _ToolCall:
+    function: _Fn
+@dataclass
+class _Msg:
+    role: str
+    tool_calls: list | None = None
+def _assistant_call(name: str, args: str) -> _Msg:
+    return _Msg(role="assistant", tool_calls=[_ToolCall(_Fn(name, args))])
+# ── _normalize_args / _hash_args ────────────────────────────────────────
+def test_normalize_args_collapses_key_order():
+    a = '{"path": "/foo", "query": "bar"}'
+    b = '{"query": "bar", "path": "/foo"}'
+    assert _normalize_args(a) == _normalize_args(b)
+def test_normalize_args_collapses_whitespace():
+    a = '{"path": "/foo", "query": "bar"}'
+    b = '{"path":"/foo","query":"bar"}'
+    assert _normalize_args(a) == _normalize_args(b)
+def test_normalize_args_preserves_value_difference():
+    a = '{"path": "/foo"}'
+    b = '{"path": "/bar"}'
+    assert _normalize_args(a) != _normalize_args(b)
+def test_normalize_args_preserves_nested_structure():
+    a = '{"a": {"x": 1, "y": 2}, "b": [3, 4]}'
+    b = '{"b": [3, 4], "a": {"y": 2, "x": 1}}'
+    assert _normalize_args(a) == _normalize_args(b)
+def test_normalize_args_array_order_is_significant():
+    # Lists are positional — different orderings should NOT collapse.
+    a = '{"items": [1, 2, 3]}'
+    b = '{"items": [3, 2, 1]}'
+    assert _normalize_args(a) != _normalize_args(b)
+def test_normalize_args_falls_back_for_invalid_json():
+    # Some providers occasionally pass a bare string; we shouldn't raise.
+    assert _normalize_args("not json") == "not json"
+    assert _normalize_args("{broken") == "{broken"
+def test_normalize_args_handles_empty_string():
+    assert _normalize_args("") == ""
+def test_hash_args_collapses_semantically_identical_calls():
+    # The headline regression: pre-fix these hashed differently and the
+    # doom-loop detector silently missed identical-consecutive calls.
+    a = '{"path": "/foo", "query": "bar"}'
+    b = '{"query": "bar", "path": "/foo"}'
+    assert _hash_args(a) == _hash_args(b)
+def test_hash_args_still_differs_on_real_argument_change():
+    assert _hash_args('{"path": "/a"}') != _hash_args('{"path": "/b"}')
+# ── extract_recent_tool_signatures ──────────────────────────────────────
+def test_extract_recent_signatures_collapses_reordered_keys():
+    """Three calls with reordered keys should produce identical signatures."""
+    msgs = [
+        _assistant_call("read", '{"path": "/foo", "limit": 100}'),
+        _assistant_call("read", '{"limit": 100, "path": "/foo"}'),
+        _assistant_call("read", '{"path":"/foo","limit":100}'),
+    ]
+    sigs = extract_recent_tool_signatures(msgs)
+    assert len(sigs) == 3
+    assert sigs[0] == sigs[1] == sigs[2]
+def test_extract_skips_non_assistant_messages():
+    msgs = [
+        _Msg(role="user", tool_calls=None),
+        _assistant_call("read", '{"path": "/x"}'),
+        _Msg(role="tool", tool_calls=None),
+    ]
+    sigs = extract_recent_tool_signatures(msgs)
+    assert len(sigs) == 1
+    assert sigs[0].name == "read"
+def test_extract_skips_assistant_without_tool_calls():
+    msgs = [_Msg(role="assistant", tool_calls=None)]
+    assert extract_recent_tool_signatures(msgs) == []
+# ── detect_identical_consecutive ────────────────────────────────────────
+def _sig(name: str, args: str = "{}") -> ToolCallSignature:
+    return ToolCallSignature(name=name, args_hash=_hash_args(args))
+def test_identical_consecutive_fires_at_threshold():
+    sigs = [_sig("read", '{"p": 1}')] * 3
+    assert detect_identical_consecutive(sigs, threshold=3) == "read"
+def test_identical_consecutive_stays_silent_below_threshold():
+    sigs = [_sig("read", '{"p": 1}')] * 2
+    assert detect_identical_consecutive(sigs, threshold=3) is None
+def test_identical_consecutive_resets_on_break():
+    # A, A, B, A, A — never 3 in a row.
+    sigs = [
+        _sig("read", '{"p": 1}'),
+        _sig("read", '{"p": 1}'),
+        _sig("read", '{"p": 2}'),
+        _sig("read", '{"p": 1}'),
+        _sig("read", '{"p": 1}'),
+    ]
+    assert detect_identical_consecutive(sigs, threshold=3) is None
+def test_identical_consecutive_catches_reordered_args_after_normalization():
+    """Regression for the bug: same call with shuffled keys must collapse."""
+    msgs = [
+        _assistant_call("research", '{"task": "find paper", "depth": 3}'),
+        _assistant_call("research", '{"depth": 3, "task": "find paper"}'),
+        _assistant_call("research", '{"task":"find paper","depth":3}'),
+    ]
+    sigs = extract_recent_tool_signatures(msgs)
+    assert detect_identical_consecutive(sigs, threshold=3) == "research"
+# ── detect_repeating_sequence ───────────────────────────────────────────
+def test_repeating_sequence_catches_alternating_pair():
+    sigs = [_sig("a"), _sig("b")] * 3
+    pattern = detect_repeating_sequence(sigs)
+    assert pattern is not None
+    assert [s.name for s in pattern] == ["a", "b"]
+def test_repeating_sequence_misses_when_pattern_breaks():
+    sigs = [_sig("a"), _sig("b"), _sig("a"), _sig("c")]
+    assert detect_repeating_sequence(sigs) is None
+def test_repeating_sequence_normalizes_args_inside_pattern():
+    """Cycle [research, read, research, read, ...] survives key reordering."""
+    msgs = [
+        _assistant_call("research", '{"q": "x", "n": 1}'),
+        _assistant_call("read", '{"path": "/a"}'),
+        _assistant_call("research", '{"n": 1, "q": "x"}'),
+        _assistant_call("read", '{"path":"/a"}'),
+        _assistant_call("research", '{"q":"x","n":1}'),
+        _assistant_call("read", '{"path": "/a"}'),
+    ]
+    sigs = extract_recent_tool_signatures(msgs)
+    pattern = detect_repeating_sequence(sigs)
+    assert pattern is not None
+    assert [s.name for s in pattern] == ["research", "read"]
+# ── check_for_doom_loop ─────────────────────────────────────────────────
+def test_check_for_doom_loop_quiet_below_minimum_signatures():
+    msgs = [_assistant_call("read", '{"p": 1}'), _assistant_call("read", '{"p": 1}')]
+    assert check_for_doom_loop(msgs) is None
+def test_check_for_doom_loop_returns_corrective_prompt_for_identical_run():
+    msgs = [_assistant_call("read", '{"p": 1}')] * 3
+    out = check_for_doom_loop(msgs)
+    assert out is not None
+    assert "DOOM LOOP DETECTED" in out
+    assert "'read'" in out
+def test_check_for_doom_loop_returns_corrective_prompt_for_cycle():
+    msgs = []
+    for _ in range(3):
+        msgs.append(_assistant_call("a", "{}"))
+        msgs.append(_assistant_call("b", "{}"))
+    out = check_for_doom_loop(msgs)
+    assert out is not None
+    assert "DOOM LOOP DETECTED" in out
+    assert "a → b" in out
+def test_check_for_doom_loop_quiet_when_args_meaningfully_differ():
+    """Same tool, three different arg values — not a loop."""
+    msgs = [
+        _assistant_call("read", '{"path": "/a.py"}'),
+        _assistant_call("read", '{"path": "/b.py"}'),
+        _assistant_call("read", '{"path": "/c.py"}'),
+    ]
+    assert check_for_doom_loop(msgs) is None