File size: 2,595 Bytes
03a7eb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
"""
Quick debug test for CodeArena execution pipeline.
Tests reset and step endpoints to ensure they work before RL training.
"""

import requests
import time

def test_reset():
    """Test the reset endpoint"""
    print("πŸ”„ Testing /reset endpoint...")
    try:
        response = requests.post("http://localhost:7860/reset", json={"task_id": "easy-1"}, timeout=10)
        response.raise_for_status()
        data = response.json()
        print("βœ… Reset successful!")
        print(f"   Task: {data.get('task_id', 'unknown')}")
        print(f"   Buggy code length: {len(data.get('buggy_code', ''))}")
        return True
    except Exception as e:
        print(f"❌ Reset failed: {e}")
        return False

def test_step():
    """Test the step endpoint with a simple fix"""
    print("\nπŸš€ Testing /step endpoint...")

    # Simple fix attempt - just try to make it compile
    simple_fix = """
def add_numbers(a, b):
    return a + b
"""

    try:
        response = requests.post("http://localhost:7860/step", json={"proposed_fix": simple_fix}, timeout=15)
        response.raise_for_status()
        data = response.json()

        reward = data.get('reward', 0)
        done = data.get('done', False)
        info = data.get('info', {})

        print("βœ… Step successful!")
        print(".3f")
        print(f"   Done: {done}")
        print(f"   Test results: {info.get('test_results', 'unknown')}")

        reward_comps = info.get('reward_components', {})
        print("   Reward breakdown:")
        for k, v in reward_comps.items():
            print(".3f")
        return reward > 0.01  # Better than minimum

    except Exception as e:
        print(f"❌ Step failed: {e}")
        return False

def main():
    print("πŸ§ͺ CodeArena Execution Pipeline Test")
    print("=" * 50)

    # Check if server is running
    try:
        health = requests.get("http://localhost:7860/health", timeout=5)
        print("βœ… Server is running!")
    except:
        print("❌ Server not running on localhost:7860")
        print("   Start with: python -m uvicorn server.app:app --port 7860")
        return

    success = True
    success &= test_reset()
    time.sleep(1)  # Brief pause
    success &= test_step()

    print("\n" + "=" * 50)
    if success:
        print("πŸŽ‰ All tests passed! Execution pipeline is working.")
        print("   Ready for RL training.")
    else:
        print("⚠️  Some tests failed. Check debug output above.")
        print("   Fix issues before running RL training.")

if __name__ == "__main__":
    main()