Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from fastapi.testclient import TestClient | |
| import pytest | |
| from models import ( | |
| ActionType, | |
| IssueType, | |
| PythonReviewAction, | |
| Severity, | |
| ) | |
| from server.app import app | |
| from server.grading import grade_review | |
| from server.python_env_environment import PythonEnvironment | |
| from server.task_bank import load_task_bank | |
| def _snippet_by_id(task_id: str, snippet_id: str): | |
| return next(item for item in load_task_bank()[task_id] if item.snippet_id == snippet_id) | |
| def test_add_comment_requires_fields() -> None: | |
| with pytest.raises(Exception): | |
| PythonReviewAction(action_type=ActionType.ADD_COMMENT) | |
| def test_approve_rejects_extra_fields() -> None: | |
| with pytest.raises(Exception): | |
| PythonReviewAction( | |
| action_type=ActionType.APPROVE, | |
| comment="looks good", | |
| ) | |
| def test_easy_grader_rewards_required_issue_and_request_changes() -> None: | |
| snippet = load_task_bank()["task_easy"][0] | |
| history = [ | |
| PythonReviewAction( | |
| action_type=ActionType.ADD_COMMENT, | |
| line_number=4, | |
| issue_type=IssueType.STYLE, | |
| severity=Severity.LOW, | |
| comment="Ambiguous variable name l violates PEP8 E741.", | |
| ), | |
| PythonReviewAction(action_type=ActionType.REQUEST_CHANGES), | |
| ] | |
| comments = [] | |
| for step, action in enumerate(history, start=1): | |
| comments.append( | |
| { | |
| "step_index": step, | |
| "action_type": action.action_type, | |
| "line_number": action.line_number, | |
| "issue_type": action.issue_type, | |
| "severity": action.severity, | |
| "comment": action.comment, | |
| } | |
| ) | |
| from models import ReviewComment | |
| result = grade_review( | |
| "task_easy", | |
| snippet, | |
| [ReviewComment.model_validate(item) for item in comments], | |
| duplicate_comments=0, | |
| ) | |
| assert result.score > 0.35 | |
| assert result.required_found >= 1 | |
| def test_hard_grader_rewards_security_metadata() -> None: | |
| snippet = load_task_bank()["task_hard"][0] | |
| from models import ReviewComment | |
| review = ReviewComment( | |
| step_index=1, | |
| action_type=ActionType.ADD_COMMENT, | |
| line_number=2, | |
| issue_type=IssueType.SECURITY, | |
| severity=Severity.CRITICAL, | |
| comment="SQL injection risk. This is an OWASP injection issue because the query interpolates user input.", | |
| suggestion="Use a parameterized query with placeholders instead of string interpolation.", | |
| ) | |
| result = grade_review("task_hard", snippet, [review], duplicate_comments=0) | |
| assert result.score > 0.30 | |
| assert result.true_positives == 1 | |
| def test_environment_step_updates_metrics() -> None: | |
| env = PythonEnvironment() | |
| observation = env.reset(task_id="task_easy").model_copy() | |
| snippet = _snippet_by_id("task_easy", observation.snippet_id) | |
| issue = next(item for item in snippet.gold_issues if item.required) | |
| next_observation = env.step( | |
| PythonReviewAction( | |
| action_type=ActionType.ADD_COMMENT, | |
| line_number=issue.line, | |
| issue_type=issue.issue_type, | |
| severity=issue.severity, | |
| comment=issue.description, | |
| ) | |
| ) | |
| assert next_observation.reward is not None | |
| assert next_observation.metrics.true_positives >= 1 | |
| assert next_observation.review_history[-1].matched_issue_ids | |
| def test_environment_terminal_action_sets_done() -> None: | |
| env = PythonEnvironment() | |
| observation = env.reset(task_id="task_easy") | |
| result = env.step(PythonReviewAction(action_type=ActionType.REQUEST_CHANGES)) | |
| assert result.done is True | |
| assert result.metrics.current_score >= 0.0 | |
| def test_api_smoke_endpoints() -> None: | |
| client = TestClient(app) | |
| reset_response = client.post("/reset", json={"task_id": "task_easy"}) | |
| assert reset_response.status_code == 200 | |
| payload = reset_response.json() | |
| assert payload["observation"]["task_id"] == "task_easy" | |
| snippet = _snippet_by_id("task_easy", payload["observation"]["snippet_id"]) | |
| issue = next(item for item in snippet.gold_issues if item.required) | |
| step_response = client.post( | |
| "/step", | |
| json={ | |
| "action": { | |
| "action_type": "ADD_COMMENT", | |
| "line_number": issue.line, | |
| "issue_type": issue.issue_type.value, | |
| "severity": issue.severity.value, | |
| "comment": issue.description, | |
| } | |
| }, | |
| ) | |
| assert step_response.status_code == 200 | |
| assert step_response.json()["observation"]["metrics"]["true_positives"] >= 1 | |
| tasks_response = client.get("/tasks") | |
| assert tasks_response.status_code == 200 | |
| assert len(tasks_response.json()["tasks"]) == 3 | |
| metrics_response = client.get("/metrics") | |
| assert metrics_response.status_code == 200 | |
| assert "metrics" in metrics_response.json() | |
| health_response = client.get("/health") | |
| assert health_response.status_code == 200 | |
| assert health_response.json()["status"] == "ok" | |
| schema_response = client.get("/schema") | |
| assert schema_response.status_code == 200 | |
| assert "action" in schema_response.json() | |