Spaces:
Sleeping
Sleeping
File size: 2,595 Bytes
03a7eb9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | #!/usr/bin/env python3
"""
Quick debug test for CodeArena execution pipeline.
Tests reset and step endpoints to ensure they work before RL training.
"""
import requests
import time
def test_reset():
"""Test the reset endpoint"""
print("π Testing /reset endpoint...")
try:
response = requests.post("http://localhost:7860/reset", json={"task_id": "easy-1"}, timeout=10)
response.raise_for_status()
data = response.json()
print("β
Reset successful!")
print(f" Task: {data.get('task_id', 'unknown')}")
print(f" Buggy code length: {len(data.get('buggy_code', ''))}")
return True
except Exception as e:
print(f"β Reset failed: {e}")
return False
def test_step():
"""Test the step endpoint with a simple fix"""
print("\nπ Testing /step endpoint...")
# Simple fix attempt - just try to make it compile
simple_fix = """
def add_numbers(a, b):
return a + b
"""
try:
response = requests.post("http://localhost:7860/step", json={"proposed_fix": simple_fix}, timeout=15)
response.raise_for_status()
data = response.json()
reward = data.get('reward', 0)
done = data.get('done', False)
info = data.get('info', {})
print("β
Step successful!")
print(".3f")
print(f" Done: {done}")
print(f" Test results: {info.get('test_results', 'unknown')}")
reward_comps = info.get('reward_components', {})
print(" Reward breakdown:")
for k, v in reward_comps.items():
print(".3f")
return reward > 0.01 # Better than minimum
except Exception as e:
print(f"β Step failed: {e}")
return False
def main():
print("π§ͺ CodeArena Execution Pipeline Test")
print("=" * 50)
# Check if server is running
try:
health = requests.get("http://localhost:7860/health", timeout=5)
print("β
Server is running!")
except:
print("β Server not running on localhost:7860")
print(" Start with: python -m uvicorn server.app:app --port 7860")
return
success = True
success &= test_reset()
time.sleep(1) # Brief pause
success &= test_step()
print("\n" + "=" * 50)
if success:
print("π All tests passed! Execution pipeline is working.")
print(" Ready for RL training.")
else:
print("β οΈ Some tests failed. Check debug output above.")
print(" Fix issues before running RL training.")
if __name__ == "__main__":
main() |