awesome-loop-engineering / examples /runnable /test-repair-loop.sh
cy0307's picture
Sync awesome-loop-engineering
9ec4919 verified
Raw
History Blame Contribute Delete
4.08 kB
#!/usr/bin/env bash
# Minimal runnable Loop Engineering reference: a test-repair loop.
#
# Runs a deterministic check command, and while it fails, hands the evidence
# to an agent CLI for a narrow fix, then re-runs the check. Demonstrates the
# Loop Contract with no dependencies beyond bash, coreutils, and your agent CLI.
#
# Objective : make CHECK_CMD pass
# Trigger : manual bootstrap (run this script)
# Intake : failing check output
# Workspace : current directory (run it inside a branch or worktree)
# Delegation : AGENT_CMD receives the evidence as a prompt
# Verification : CHECK_CMD exit code, judged by the script, not the agent
# State : PROGRESS_FILE survives across iterations and runs
# Budget : MAX_ITERATIONS
# Escalation : exits non-zero with a reason when evidence stops changing
# Exit : check passes, budget exhausted, or repeated failure
#
# Usage:
# CHECK_CMD="pytest -x" AGENT_CMD="claude -p" ./test-repair-loop.sh
# CHECK_CMD="npm test" AGENT_CMD="codex exec" ./test-repair-loop.sh
#
# Exit codes: 0 check passes, 1 budget exhausted, 2 no new evidence, 3 usage.
set -u
CHECK_CMD="${CHECK_CMD:-${1:-}}"
AGENT_CMD="${AGENT_CMD:-${2:-}}"
MAX_ITERATIONS="${MAX_ITERATIONS:-5}"
PROGRESS_FILE="${PROGRESS_FILE:-LOOP_PROGRESS.md}"
EVIDENCE_LINES="${EVIDENCE_LINES:-80}"
if [ -z "$CHECK_CMD" ] || [ -z "$AGENT_CMD" ]; then
echo "Usage: CHECK_CMD='pytest -x' AGENT_CMD='claude -p' $0" >&2
echo " or: $0 'pytest -x' 'claude -p'" >&2
exit 3
fi
# AGENT_CMD is split on whitespace; the prompt is passed as one final argument.
read -r -a agent_argv <<< "$AGENT_CMD"
failure_digest() {
# Stable fingerprint of the failure so we can tell when evidence stops changing.
if command -v shasum >/dev/null 2>&1; then shasum | cut -d' ' -f1; else cksum | cut -d' ' -f1; fi
}
record() {
printf '%s\n' "$1" >> "$PROGRESS_FILE"
}
if [ ! -f "$PROGRESS_FILE" ]; then
record "# Loop progress"
record ""
record "- Objective: make \`$CHECK_CMD\` pass."
fi
record ""
record "## Run started $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
record ""
record "- Check command: \`$CHECK_CMD\`"
record "- Budget: $MAX_ITERATIONS iterations"
previous_digest=""
iteration=0
while [ "$iteration" -lt "$MAX_ITERATIONS" ]; do
iteration=$((iteration + 1))
echo "[loop] iteration $iteration/$MAX_ITERATIONS: running check..." >&2
check_output="$(bash -c "$CHECK_CMD" 2>&1)"
check_status=$?
if [ "$check_status" -eq 0 ]; then
echo "[loop] check passed." >&2
record "- Iteration $iteration: check passed. Loop complete."
exit 0
fi
evidence="$(printf '%s\n' "$check_output" | tail -n "$EVIDENCE_LINES")"
digest="$(printf '%s' "$evidence" | failure_digest)"
if [ "$digest" = "$previous_digest" ]; then
echo "[loop] same failure twice with no new evidence; escalating to a human." >&2
record "- Iteration $iteration: identical failure repeated (digest $digest). Escalated."
exit 2
fi
previous_digest="$digest"
record "- Iteration $iteration: check failed (exit $check_status, digest $digest). Delegating fix."
prompt="You are one iteration inside an automated test-repair loop.
Objective: make this command pass: $CHECK_CMD
Read $PROGRESS_FILE for what previous iterations already tried.
Latest failing output (last $EVIDENCE_LINES lines):
---
$evidence
---
Rules:
- Fix only the cause of this failure. Do not expand scope.
- Do not modify or delete tests to make them pass unless the test itself is provably wrong.
- Do not add dependencies, change CI config, or touch unrelated files.
- After editing, append one line to $PROGRESS_FILE describing what you changed and why.
- The loop re-runs the check itself; an external gate decides completion, not you."
"${agent_argv[@]}" "$prompt"
agent_status=$?
record "- Iteration $iteration: agent exited with status $agent_status."
done
echo "[loop] budget of $MAX_ITERATIONS iterations exhausted; escalating to a human." >&2
record "- Budget exhausted after $MAX_ITERATIONS iterations. Escalated."
exit 1