CodeLens / tests /test_api.py
ArshVerma's picture
Implement Maximum Robustness for /reset to bypass body validation errors
8c875ce
Raw
History Blame Contribute Delete
6.16 kB
import pytest
from fastapi.testclient import TestClient
from app import app
from codelens_env.models import TaskId, ActionType, Category, Severity, Verdict
def test_api_health(client):
response = client.get("/health")
assert response.status_code == 200
assert response.json()["status"] == "ok"
assert response.json()["env_ready"] is True
def test_api_workflow(client):
# 1. Reset
reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 1})
assert reset_resp.status_code == 200
data = reset_resp.json()
episode_id = data["episode_id"]
assert "observation" in data["result"]
# 2. Step
action = {
"action_type": "comment",
"body": "Starting review",
}
step_resp = client.post(f"/step/{episode_id}", json=action)
assert step_resp.status_code == 200
assert "observation" in step_resp.json()
# 3. Get Result
result_resp = client.get(f"/result/{episode_id}")
assert result_resp.status_code == 200
assert result_resp.json()["final_score"] >= 0
def test_api_leaderboard(client):
# Submit a score
sub = {
"agent_name": "test_agent",
"task_id": "bug_detection",
"score": 0.95,
"seed": 42
}
resp = client.post("/submit", json=sub)
assert resp.status_code == 200
assert resp.json()["status"] == "submitted"
# Check leaderboard
lb_resp = client.get("/leaderboard")
assert lb_resp.status_code == 200
lb_data = lb_resp.json()
bug_entries = lb_data["bug_detection"]["entries"]
assert len(bug_entries) > 0
assert bug_entries[0]["agent_name"] == "test_agent"
def test_api_invalid_episode(client):
response = client.post("/step/nonexistent-id", json={
"action_type": "comment",
"body": "hello"
})
assert response.status_code == 404
def test_api_health_fields(client):
resp = client.get("/health")
data = resp.json()
assert "active_episodes" in data
assert "auth_enabled" in data
assert "env" in data
def test_api_reset_invalid_task(client):
resp = client.post("/reset", json={"task_id": "invalid_task", "seed": 0})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection" # Fallback
def test_api_step_invalid_action_type(client):
reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 0})
episode_id = reset_resp.json()["episode_id"]
resp = client.post(f"/step/{episode_id}", json={"action_type": "not_valid", "body": "x"})
assert resp.status_code == 422
def test_api_result_after_completion(client):
"""Result endpoint should return persisted data for completed episodes."""
reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 0})
episode_id = reset_resp.json()["episode_id"]
# Complete the episode
client.post(f"/step/{episode_id}", json={
"action_type": "approve", "body": "LGTM", "verdict": "lgtm"
})
# Result must be available
result_resp = client.get(f"/result/{episode_id}")
assert result_resp.status_code == 200
assert result_resp.json()["final_score"] >= 0
def test_api_stats_endpoint(client):
resp = client.get("/stats")
assert resp.status_code == 200
assert "total_episodes" in resp.json()
@pytest.mark.parametrize("task_id", ["bug_detection", "security_audit", "architectural_review"])
def test_api_full_workflow_all_tasks(client, task_id):
reset = client.post("/reset", json={"task_id": task_id, "seed": 1})
assert reset.status_code == 200
episode_id = reset.json()["episode_id"]
step = client.post(f"/step/{episode_id}", json={
"action_type": "approve", "body": "LGTM", "verdict": "lgtm"
})
assert step.status_code == 200
assert step.json()["done"] is True
def test_api_state_endpoint(client):
reset = client.post("/reset", json={"task_id": "bug_detection", "seed": 1})
episode_id = reset.json()["episode_id"]
# Test state retrieval
state_resp = client.get(f"/state/{episode_id}")
assert state_resp.status_code == 200
state_data = state_resp.json()
assert "observation" not in state_data # Pydantic model unwrapped
assert state_data["task_id"] == "bug_detection"
assert "max_steps" in state_data
# Test invalid state
invalid_state = client.get("/state/invalid-id")
assert invalid_state.status_code == 404
def test_api_leaderboard_pagination(client):
# Submit 3 entries
for i, score in enumerate([0.9, 0.7, 0.5]):
client.post("/submit", json={
"agent_name": f"agent_{i}", "task_id": "bug_detection",
"score": score, "seed": i
})
# Test limit
resp = client.get("/leaderboard?task_id=bug_detection&limit=2")
assert resp.status_code == 200
data = resp.json()
assert len(data["entries"]) == 2
assert data["total"] >= 3
# Test ordering (best first)
assert data["entries"][0]["score"] >= data["entries"][1]["score"]
def test_api_reset_robustness(client):
# 1. No body at all
resp = client.post("/reset")
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 2. Empty JSON body
resp = client.post("/reset", json={})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 3. Invalid JSON (should not trigger 422 now)
resp = client.post("/reset", content="invalid json {", headers={"Content-Type": "application/json"})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 4. Plain text body (unexpected header, should still pass)
resp = client.post("/reset", content="just some text", headers={"Content-Type": "text/plain"})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 5. Query params override
resp = client.post("/reset?task_id=security_audit&seed=100")
assert resp.status_code == 200
data = resp.json()
assert data["result"]["task_id"] == "security_audit"
assert data["result"]["seed"] == 100