File size: 6,160 Bytes
c34e3ac adea8c3 c34e3ac c90ac2d c34e3ac c90ac2d c34e3ac c90ac2d c34e3ac 4df824f c34e3ac c90ac2d c34e3ac f27b882 8c875ce f27b882 9eb1b4f f27b882 b0c19e3 8c875ce b0c19e3 8c875ce b0c19e3 8c875ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | import pytest
from fastapi.testclient import TestClient
from app import app
from codelens_env.models import TaskId, ActionType, Category, Severity, Verdict
def test_api_health(client):
response = client.get("/health")
assert response.status_code == 200
assert response.json()["status"] == "ok"
assert response.json()["env_ready"] is True
def test_api_workflow(client):
# 1. Reset
reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 1})
assert reset_resp.status_code == 200
data = reset_resp.json()
episode_id = data["episode_id"]
assert "observation" in data["result"]
# 2. Step
action = {
"action_type": "comment",
"body": "Starting review",
}
step_resp = client.post(f"/step/{episode_id}", json=action)
assert step_resp.status_code == 200
assert "observation" in step_resp.json()
# 3. Get Result
result_resp = client.get(f"/result/{episode_id}")
assert result_resp.status_code == 200
assert result_resp.json()["final_score"] >= 0
def test_api_leaderboard(client):
# Submit a score
sub = {
"agent_name": "test_agent",
"task_id": "bug_detection",
"score": 0.95,
"seed": 42
}
resp = client.post("/submit", json=sub)
assert resp.status_code == 200
assert resp.json()["status"] == "submitted"
# Check leaderboard
lb_resp = client.get("/leaderboard")
assert lb_resp.status_code == 200
lb_data = lb_resp.json()
bug_entries = lb_data["bug_detection"]["entries"]
assert len(bug_entries) > 0
assert bug_entries[0]["agent_name"] == "test_agent"
def test_api_invalid_episode(client):
response = client.post("/step/nonexistent-id", json={
"action_type": "comment",
"body": "hello"
})
assert response.status_code == 404
def test_api_health_fields(client):
resp = client.get("/health")
data = resp.json()
assert "active_episodes" in data
assert "auth_enabled" in data
assert "env" in data
def test_api_reset_invalid_task(client):
resp = client.post("/reset", json={"task_id": "invalid_task", "seed": 0})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection" # Fallback
def test_api_step_invalid_action_type(client):
reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 0})
episode_id = reset_resp.json()["episode_id"]
resp = client.post(f"/step/{episode_id}", json={"action_type": "not_valid", "body": "x"})
assert resp.status_code == 422
def test_api_result_after_completion(client):
"""Result endpoint should return persisted data for completed episodes."""
reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 0})
episode_id = reset_resp.json()["episode_id"]
# Complete the episode
client.post(f"/step/{episode_id}", json={
"action_type": "approve", "body": "LGTM", "verdict": "lgtm"
})
# Result must be available
result_resp = client.get(f"/result/{episode_id}")
assert result_resp.status_code == 200
assert result_resp.json()["final_score"] >= 0
def test_api_stats_endpoint(client):
resp = client.get("/stats")
assert resp.status_code == 200
assert "total_episodes" in resp.json()
@pytest.mark.parametrize("task_id", ["bug_detection", "security_audit", "architectural_review"])
def test_api_full_workflow_all_tasks(client, task_id):
reset = client.post("/reset", json={"task_id": task_id, "seed": 1})
assert reset.status_code == 200
episode_id = reset.json()["episode_id"]
step = client.post(f"/step/{episode_id}", json={
"action_type": "approve", "body": "LGTM", "verdict": "lgtm"
})
assert step.status_code == 200
assert step.json()["done"] is True
def test_api_state_endpoint(client):
reset = client.post("/reset", json={"task_id": "bug_detection", "seed": 1})
episode_id = reset.json()["episode_id"]
# Test state retrieval
state_resp = client.get(f"/state/{episode_id}")
assert state_resp.status_code == 200
state_data = state_resp.json()
assert "observation" not in state_data # Pydantic model unwrapped
assert state_data["task_id"] == "bug_detection"
assert "max_steps" in state_data
# Test invalid state
invalid_state = client.get("/state/invalid-id")
assert invalid_state.status_code == 404
def test_api_leaderboard_pagination(client):
# Submit 3 entries
for i, score in enumerate([0.9, 0.7, 0.5]):
client.post("/submit", json={
"agent_name": f"agent_{i}", "task_id": "bug_detection",
"score": score, "seed": i
})
# Test limit
resp = client.get("/leaderboard?task_id=bug_detection&limit=2")
assert resp.status_code == 200
data = resp.json()
assert len(data["entries"]) == 2
assert data["total"] >= 3
# Test ordering (best first)
assert data["entries"][0]["score"] >= data["entries"][1]["score"]
def test_api_reset_robustness(client):
# 1. No body at all
resp = client.post("/reset")
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 2. Empty JSON body
resp = client.post("/reset", json={})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 3. Invalid JSON (should not trigger 422 now)
resp = client.post("/reset", content="invalid json {", headers={"Content-Type": "application/json"})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 4. Plain text body (unexpected header, should still pass)
resp = client.post("/reset", content="just some text", headers={"Content-Type": "text/plain"})
assert resp.status_code == 200
assert resp.json()["result"]["task_id"] == "bug_detection"
# 5. Query params override
resp = client.post("/reset?task_id=security_audit&seed=100")
assert resp.status_code == 200
data = resp.json()
assert data["result"]["task_id"] == "security_audit"
assert data["result"]["seed"] == 100
|