| from sqlmodel import Session |
| from codelens_env.database import save_episode, get_episode, submit_leaderboard, get_leaderboard_db, get_stats |
| from codelens_env.models import EpisodeResult, TaskId, ActionRecord, ActionType |
|
|
| def make_result(episode_id="test-ep-1", score=0.85): |
| return EpisodeResult( |
| episode_id=episode_id, |
| task_id=TaskId.BUG_DETECTION, |
| scenario_hash="bug_001", |
| seed=0, |
| final_score=score, |
| steps_taken=3, |
| issues_found=1, |
| issues_total=1, |
| noise_penalties=0, |
| terminated_reason="terminal_action", |
| history=[ActionRecord(action_type=ActionType.APPROVE, body="LGTM")] |
| ) |
|
|
| def test_save_and_get_episode(session): |
| result = make_result() |
| save_episode(session, result) |
| record = get_episode(session, "test-ep-1") |
| assert record is not None |
| assert record.final_score == 0.85 |
| assert record.scenario_hash == "bug_001" |
|
|
| def test_get_nonexistent_episode(session): |
| record = get_episode(session, "does-not-exist") |
| assert record is None |
|
|
| def test_episode_history_serialized(session): |
| result = make_result() |
| save_episode(session, result) |
| record = get_episode(session, result.episode_id) |
| import json |
| history = json.loads(record.history_json) |
| assert len(history) == 1 |
| assert history[0]["action_type"] == "approve" |
|
|
| def test_leaderboard_submit_and_rank(session): |
| rank = submit_leaderboard(session, "agent_a", "bug_detection", 0.9, 0) |
| assert rank == 1 |
| rank2 = submit_leaderboard(session, "agent_b", "bug_detection", 0.7, 1) |
| assert rank2 == 2 |
|
|
| def test_leaderboard_ordering(session): |
| submit_leaderboard(session, "low", "security_audit", 0.3, 0) |
| submit_leaderboard(session, "high", "security_audit", 0.95, 1) |
| submit_leaderboard(session, "mid", "security_audit", 0.6, 2) |
| entries, total = get_leaderboard_db(session, "security_audit") |
| assert total == 3 |
| assert entries[0].agent_name == "high" |
| assert entries[0].score == 0.95 |
|
|
| def test_get_stats_empty(session): |
| stats = get_stats(session) |
| assert stats["total_episodes"] == 0 |
|
|
| def test_get_stats_populated(session): |
| save_episode(session, make_result("ep1", 0.9)) |
| save_episode(session, make_result("ep2", 0.5)) |
| stats = get_stats(session) |
| assert stats["total_episodes"] == 2 |
| assert abs(stats["avg_score"] - 0.7) < 0.001 |
|
|