File size: 2,665 Bytes
b89e6d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""Throwaway script: exercise the agentic repair loop on a tricky problem.

The intent is chosen to be genuinely hard for a small model: balanced-bracket
checking with all three delimiter types. The check_fn appends a strict test
harness so the sandbox can report a traceback on failure.

Run:
    python scripts/test_repair_loop.py
Expected output: iteration trace showing whether the model self-corrects.
"""
import sys
from pathlib import Path

sys.path.append(str(Path(__file__).resolve().parents[1]))
from src.agent.repair_loop import make_repair_generator, repair_loop
from src.rag.generator import CodeAssistant

# ── test harness appended to every generated function ─────────────────────
_TEST_HARNESS = '''

# --- auto-injected test harness ---
assert is_balanced("") is True,       "empty string should be balanced"
assert is_balanced("()") is True,     "simple parens"
assert is_balanced("([{}])") is True, "nested mix"
assert is_balanced("([)]") is False,  "wrong order"
assert is_balanced("(") is False,     "unclosed"
assert is_balanced("{[}]") is False,  "interleaved wrong"
assert is_balanced("((()))") is True, "deep nesting"
print("all tests passed")
'''

INTENT = (
    "Write a Python function called `is_balanced` that returns True if the input "
    "string has balanced parentheses (), square brackets [], and curly braces {}, "
    "and False otherwise. Use a stack."
)


def check_fn(code: str) -> str:
    """Append the test harness to the generated function."""
    return code + _TEST_HARNESS


def main():
    print("=" * 60)
    print("REPAIR LOOP TEST")
    print("Intent:", INTENT[:80] + "...")
    print("=" * 60)

    print("\n[setup] Loading CodeAssistant (model + index)...")
    assistant = CodeAssistant.from_config(with_index=True)
    generate_fn = make_repair_generator(assistant)

    print("[repair] Starting loop (max_iters=3)...\n")
    trace = repair_loop(INTENT, generate_fn, check_fn, max_iters=3, timeout=10.0)

    print("\n" + "=" * 60)
    print("ITERATION TRACE")
    print("=" * 60)
    for i, (code, error) in enumerate(trace.history, 1):
        print(f"\n--- Iteration {i} ---")
        print("Generated code:")
        print(code)
        if error:
            print(f"\nError:\n  {error}")
        else:
            print("\nResult: PASSED")

    print("\n" + "=" * 60)
    print(f"Outcome: {'SUCCESS' if trace.success else 'BEST EFFORT (did not pass)'}")
    print(f"Iterations used: {trace.iterations}")
    print("=" * 60)
    if trace.success:
        print("\nFinal passing code:")
        print(trace.final_code)


if __name__ == "__main__":
    main()