Spaces:
Sleeping
Sleeping
File size: 5,232 Bytes
d25ab77 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | from __future__ import annotations
from fastapi.testclient import TestClient
import pytest
from models import (
ActionType,
IssueType,
PythonReviewAction,
Severity,
)
from server.app import app
from server.grading import grade_review
from server.python_env_environment import PythonEnvironment
from server.task_bank import load_task_bank
def _snippet_by_id(task_id: str, snippet_id: str):
return next(item for item in load_task_bank()[task_id] if item.snippet_id == snippet_id)
def test_add_comment_requires_fields() -> None:
with pytest.raises(Exception):
PythonReviewAction(action_type=ActionType.ADD_COMMENT)
def test_approve_rejects_extra_fields() -> None:
with pytest.raises(Exception):
PythonReviewAction(
action_type=ActionType.APPROVE,
comment="looks good",
)
def test_easy_grader_rewards_required_issue_and_request_changes() -> None:
snippet = load_task_bank()["task_easy"][0]
history = [
PythonReviewAction(
action_type=ActionType.ADD_COMMENT,
line_number=4,
issue_type=IssueType.STYLE,
severity=Severity.LOW,
comment="Ambiguous variable name l violates PEP8 E741.",
),
PythonReviewAction(action_type=ActionType.REQUEST_CHANGES),
]
comments = []
for step, action in enumerate(history, start=1):
comments.append(
{
"step_index": step,
"action_type": action.action_type,
"line_number": action.line_number,
"issue_type": action.issue_type,
"severity": action.severity,
"comment": action.comment,
}
)
from models import ReviewComment
result = grade_review(
"task_easy",
snippet,
[ReviewComment.model_validate(item) for item in comments],
duplicate_comments=0,
)
assert result.score > 0.35
assert result.required_found >= 1
def test_hard_grader_rewards_security_metadata() -> None:
snippet = load_task_bank()["task_hard"][0]
from models import ReviewComment
review = ReviewComment(
step_index=1,
action_type=ActionType.ADD_COMMENT,
line_number=2,
issue_type=IssueType.SECURITY,
severity=Severity.CRITICAL,
comment="SQL injection risk. This is an OWASP injection issue because the query interpolates user input.",
suggestion="Use a parameterized query with placeholders instead of string interpolation.",
)
result = grade_review("task_hard", snippet, [review], duplicate_comments=0)
assert result.score > 0.30
assert result.true_positives == 1
def test_environment_step_updates_metrics() -> None:
env = PythonEnvironment()
observation = env.reset(task_id="task_easy").model_copy()
snippet = _snippet_by_id("task_easy", observation.snippet_id)
issue = next(item for item in snippet.gold_issues if item.required)
next_observation = env.step(
PythonReviewAction(
action_type=ActionType.ADD_COMMENT,
line_number=issue.line,
issue_type=issue.issue_type,
severity=issue.severity,
comment=issue.description,
)
)
assert next_observation.reward is not None
assert next_observation.metrics.true_positives >= 1
assert next_observation.review_history[-1].matched_issue_ids
def test_environment_terminal_action_sets_done() -> None:
env = PythonEnvironment()
observation = env.reset(task_id="task_easy")
result = env.step(PythonReviewAction(action_type=ActionType.REQUEST_CHANGES))
assert result.done is True
assert result.metrics.current_score >= 0.0
def test_api_smoke_endpoints() -> None:
client = TestClient(app)
reset_response = client.post("/reset", json={"task_id": "task_easy"})
assert reset_response.status_code == 200
payload = reset_response.json()
assert payload["observation"]["task_id"] == "task_easy"
snippet = _snippet_by_id("task_easy", payload["observation"]["snippet_id"])
issue = next(item for item in snippet.gold_issues if item.required)
step_response = client.post(
"/step",
json={
"action": {
"action_type": "ADD_COMMENT",
"line_number": issue.line,
"issue_type": issue.issue_type.value,
"severity": issue.severity.value,
"comment": issue.description,
}
},
)
assert step_response.status_code == 200
assert step_response.json()["observation"]["metrics"]["true_positives"] >= 1
tasks_response = client.get("/tasks")
assert tasks_response.status_code == 200
assert len(tasks_response.json()["tasks"]) == 3
metrics_response = client.get("/metrics")
assert metrics_response.status_code == 200
assert "metrics" in metrics_response.json()
health_response = client.get("/health")
assert health_response.status_code == 200
assert health_response.json()["status"] == "ok"
schema_response = client.get("/schema")
assert schema_response.status_code == 200
assert "action" in schema_response.json()
|