Spaces:
Running on Zero
Running on Zero
| """errors.py — deterministic error signal on a tool_result. | |
| A tool_result is `errored` iff its text matches (case-insensitive, scanned over | |
| the first ~1500 chars) the contract regex alternation. NO model. Pure code. | |
| This sets `ToolCall.errored`. The loop detector (loops.py) consumes it: a real | |
| loop requires the same exact Bash command twice AND at least one errored run. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from typing import Optional | |
| # TRACE-CONTRACT §5 "Error signal". Case-insensitive, anchored nowhere (substring | |
| # match is correct here — these are diagnostic phrases that appear inside output). | |
| _ERROR_RE = re.compile( | |
| r"command not found" | |
| r"|no such file" | |
| r"|permission denied" | |
| r"|error:" | |
| r"|fatal:" | |
| r"|exit code [1-9]" | |
| r"|cannot" | |
| r"|unrecognized" | |
| r"|syntax error" | |
| r"|refused" | |
| r"|traceback", | |
| re.IGNORECASE, | |
| ) | |
| # Only the first ~1500 chars are scanned: errors surface early; this bounds cost | |
| # and avoids a benign later mention flipping the flag. | |
| _SCAN_CHARS = 1500 | |
| def is_errored(result_text: Optional[str]) -> bool: | |
| """True iff the (first ~1500 chars of) tool_result text matches the error regex.""" | |
| if not result_text: | |
| return False | |
| return _ERROR_RE.search(result_text[:_SCAN_CHARS]) is not None | |
| def annotate_errors(turns) -> None: | |
| """Set `errored` on every ToolCall in every turn, in place. | |
| Reads ONLY ToolCall.result_text (populated by the loader from the linked | |
| tool_result). Idempotent. | |
| """ | |
| for t in turns: | |
| for tc in t.tools: | |
| tc.errored = is_errored(tc.result_text) | |