Spaces:

ArshVerma
/

CodeLens

Sleeping

App Files Files Community

CodeLens / tests /test_api.py

ArshVerma

Implement Maximum Robustness for /reset to bypass body validation errors

8c875ce 3 months ago

Raw

History Blame Contribute Delete

6.16 kB

	import pytest
	from fastapi.testclient import TestClient
	from app import app
	from codelens_env.models import TaskId, ActionType, Category, Severity, Verdict

	def test_api_health(client):
	response = client.get("/health")
	assert response.status_code == 200
	assert response.json()["status"] == "ok"
	assert response.json()["env_ready"] is True

	def test_api_workflow(client):

	# 1. Reset
	reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 1})
	assert reset_resp.status_code == 200
	data = reset_resp.json()
	episode_id = data["episode_id"]
	assert "observation" in data["result"]

	# 2. Step
	action = {
	"action_type": "comment",
	"body": "Starting review",
	}
	step_resp = client.post(f"/step/{episode_id}", json=action)
	assert step_resp.status_code == 200
	assert "observation" in step_resp.json()

	# 3. Get Result
	result_resp = client.get(f"/result/{episode_id}")
	assert result_resp.status_code == 200
	assert result_resp.json()["final_score"] >= 0

	def test_api_leaderboard(client):
	# Submit a score
	sub = {
	"agent_name": "test_agent",
	"task_id": "bug_detection",
	"score": 0.95,
	"seed": 42
	}
	resp = client.post("/submit", json=sub)
	assert resp.status_code == 200
	assert resp.json()["status"] == "submitted"

	# Check leaderboard
	lb_resp = client.get("/leaderboard")
	assert lb_resp.status_code == 200
	lb_data = lb_resp.json()
	bug_entries = lb_data["bug_detection"]["entries"]
	assert len(bug_entries) > 0
	assert bug_entries[0]["agent_name"] == "test_agent"

	def test_api_invalid_episode(client):
	response = client.post("/step/nonexistent-id", json={
	"action_type": "comment",
	"body": "hello"
	})
	assert response.status_code == 404

	def test_api_health_fields(client):
	resp = client.get("/health")
	data = resp.json()
	assert "active_episodes" in data
	assert "auth_enabled" in data
	assert "env" in data

	def test_api_reset_invalid_task(client):
	resp = client.post("/reset", json={"task_id": "invalid_task", "seed": 0})
	assert resp.status_code == 200
	assert resp.json()["result"]["task_id"] == "bug_detection" # Fallback

	def test_api_step_invalid_action_type(client):
	reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 0})
	episode_id = reset_resp.json()["episode_id"]
	resp = client.post(f"/step/{episode_id}", json={"action_type": "not_valid", "body": "x"})
	assert resp.status_code == 422

	def test_api_result_after_completion(client):
	"""Result endpoint should return persisted data for completed episodes."""
	reset_resp = client.post("/reset", json={"task_id": "bug_detection", "seed": 0})
	episode_id = reset_resp.json()["episode_id"]
	# Complete the episode
	client.post(f"/step/{episode_id}", json={
	"action_type": "approve", "body": "LGTM", "verdict": "lgtm"
	})
	# Result must be available
	result_resp = client.get(f"/result/{episode_id}")
	assert result_resp.status_code == 200
	assert result_resp.json()["final_score"] >= 0

	def test_api_stats_endpoint(client):
	resp = client.get("/stats")
	assert resp.status_code == 200
	assert "total_episodes" in resp.json()

	@pytest.mark.parametrize("task_id", ["bug_detection", "security_audit", "architectural_review"])
	def test_api_full_workflow_all_tasks(client, task_id):
	reset = client.post("/reset", json={"task_id": task_id, "seed": 1})
	assert reset.status_code == 200
	episode_id = reset.json()["episode_id"]

	step = client.post(f"/step/{episode_id}", json={
	"action_type": "approve", "body": "LGTM", "verdict": "lgtm"
	})
	assert step.status_code == 200
	assert step.json()["done"] is True

	def test_api_state_endpoint(client):
	reset = client.post("/reset", json={"task_id": "bug_detection", "seed": 1})
	episode_id = reset.json()["episode_id"]

	# Test state retrieval
	state_resp = client.get(f"/state/{episode_id}")
	assert state_resp.status_code == 200
	state_data = state_resp.json()
	assert "observation" not in state_data # Pydantic model unwrapped
	assert state_data["task_id"] == "bug_detection"
	assert "max_steps" in state_data

	# Test invalid state
	invalid_state = client.get("/state/invalid-id")
	assert invalid_state.status_code == 404

	def test_api_leaderboard_pagination(client):
	# Submit 3 entries
	for i, score in enumerate([0.9, 0.7, 0.5]):
	client.post("/submit", json={
	"agent_name": f"agent_{i}", "task_id": "bug_detection",
	"score": score, "seed": i
	})

	# Test limit
	resp = client.get("/leaderboard?task_id=bug_detection&limit=2")
	assert resp.status_code == 200
	data = resp.json()
	assert len(data["entries"]) == 2
	assert data["total"] >= 3

	# Test ordering (best first)
	assert data["entries"][0]["score"] >= data["entries"][1]["score"]

	def test_api_reset_robustness(client):
	# 1. No body at all
	resp = client.post("/reset")
	assert resp.status_code == 200
	assert resp.json()["result"]["task_id"] == "bug_detection"

	# 2. Empty JSON body
	resp = client.post("/reset", json={})
	assert resp.status_code == 200
	assert resp.json()["result"]["task_id"] == "bug_detection"

	# 3. Invalid JSON (should not trigger 422 now)
	resp = client.post("/reset", content="invalid json {", headers={"Content-Type": "application/json"})
	assert resp.status_code == 200
	assert resp.json()["result"]["task_id"] == "bug_detection"

	# 4. Plain text body (unexpected header, should still pass)
	resp = client.post("/reset", content="just some text", headers={"Content-Type": "text/plain"})
	assert resp.status_code == 200
	assert resp.json()["result"]["task_id"] == "bug_detection"

	# 5. Query params override
	resp = client.post("/reset?task_id=security_audit&seed=100")
	assert resp.status_code == 200
	data = resp.json()
	assert data["result"]["task_id"] == "security_audit"
	assert data["result"]["seed"] == 100