File size: 1,614 Bytes
5f43c7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""errors.py — deterministic error signal on a tool_result.

A tool_result is `errored` iff its text matches (case-insensitive, scanned over
the first ~1500 chars) the contract regex alternation. NO model. Pure code.

This sets `ToolCall.errored`. The loop detector (loops.py) consumes it: a real
loop requires the same exact Bash command twice AND at least one errored run.
"""
from __future__ import annotations

import re
from typing import Optional

# TRACE-CONTRACT §5 "Error signal". Case-insensitive, anchored nowhere (substring
# match is correct here — these are diagnostic phrases that appear inside output).
_ERROR_RE = re.compile(
    r"command not found"
    r"|no such file"
    r"|permission denied"
    r"|error:"
    r"|fatal:"
    r"|exit code [1-9]"
    r"|cannot"
    r"|unrecognized"
    r"|syntax error"
    r"|refused"
    r"|traceback",
    re.IGNORECASE,
)

# Only the first ~1500 chars are scanned: errors surface early; this bounds cost
# and avoids a benign later mention flipping the flag.
_SCAN_CHARS = 1500


def is_errored(result_text: Optional[str]) -> bool:
    """True iff the (first ~1500 chars of) tool_result text matches the error regex."""
    if not result_text:
        return False
    return _ERROR_RE.search(result_text[:_SCAN_CHARS]) is not None


def annotate_errors(turns) -> None:
    """Set `errored` on every ToolCall in every turn, in place.

    Reads ONLY ToolCall.result_text (populated by the loader from the linked
    tool_result). Idempotent.
    """
    for t in turns:
        for tc in t.tools:
            tc.errored = is_errored(tc.result_text)