File size: 5,232 Bytes
d25ab77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from __future__ import annotations

from fastapi.testclient import TestClient
import pytest

from models import (
    ActionType,
    IssueType,
    PythonReviewAction,
    Severity,
)
from server.app import app
from server.grading import grade_review
from server.python_env_environment import PythonEnvironment
from server.task_bank import load_task_bank


def _snippet_by_id(task_id: str, snippet_id: str):
    return next(item for item in load_task_bank()[task_id] if item.snippet_id == snippet_id)


def test_add_comment_requires_fields() -> None:
    with pytest.raises(Exception):
        PythonReviewAction(action_type=ActionType.ADD_COMMENT)


def test_approve_rejects_extra_fields() -> None:
    with pytest.raises(Exception):
        PythonReviewAction(
            action_type=ActionType.APPROVE,
            comment="looks good",
        )


def test_easy_grader_rewards_required_issue_and_request_changes() -> None:
    snippet = load_task_bank()["task_easy"][0]
    history = [
        PythonReviewAction(
            action_type=ActionType.ADD_COMMENT,
            line_number=4,
            issue_type=IssueType.STYLE,
            severity=Severity.LOW,
            comment="Ambiguous variable name l violates PEP8 E741.",
        ),
        PythonReviewAction(action_type=ActionType.REQUEST_CHANGES),
    ]
    comments = []
    for step, action in enumerate(history, start=1):
        comments.append(
            {
                "step_index": step,
                "action_type": action.action_type,
                "line_number": action.line_number,
                "issue_type": action.issue_type,
                "severity": action.severity,
                "comment": action.comment,
            }
        )
    from models import ReviewComment

    result = grade_review(
        "task_easy",
        snippet,
        [ReviewComment.model_validate(item) for item in comments],
        duplicate_comments=0,
    )
    assert result.score > 0.35
    assert result.required_found >= 1


def test_hard_grader_rewards_security_metadata() -> None:
    snippet = load_task_bank()["task_hard"][0]
    from models import ReviewComment

    review = ReviewComment(
        step_index=1,
        action_type=ActionType.ADD_COMMENT,
        line_number=2,
        issue_type=IssueType.SECURITY,
        severity=Severity.CRITICAL,
        comment="SQL injection risk. This is an OWASP injection issue because the query interpolates user input.",
        suggestion="Use a parameterized query with placeholders instead of string interpolation.",
    )
    result = grade_review("task_hard", snippet, [review], duplicate_comments=0)
    assert result.score > 0.30
    assert result.true_positives == 1


def test_environment_step_updates_metrics() -> None:
    env = PythonEnvironment()
    observation = env.reset(task_id="task_easy").model_copy()
    snippet = _snippet_by_id("task_easy", observation.snippet_id)
    issue = next(item for item in snippet.gold_issues if item.required)

    next_observation = env.step(
        PythonReviewAction(
            action_type=ActionType.ADD_COMMENT,
            line_number=issue.line,
            issue_type=issue.issue_type,
            severity=issue.severity,
            comment=issue.description,
        )
    )

    assert next_observation.reward is not None
    assert next_observation.metrics.true_positives >= 1
    assert next_observation.review_history[-1].matched_issue_ids


def test_environment_terminal_action_sets_done() -> None:
    env = PythonEnvironment()
    observation = env.reset(task_id="task_easy")
    result = env.step(PythonReviewAction(action_type=ActionType.REQUEST_CHANGES))
    assert result.done is True
    assert result.metrics.current_score >= 0.0


def test_api_smoke_endpoints() -> None:
    client = TestClient(app)

    reset_response = client.post("/reset", json={"task_id": "task_easy"})
    assert reset_response.status_code == 200
    payload = reset_response.json()
    assert payload["observation"]["task_id"] == "task_easy"
    snippet = _snippet_by_id("task_easy", payload["observation"]["snippet_id"])
    issue = next(item for item in snippet.gold_issues if item.required)

    step_response = client.post(
        "/step",
        json={
            "action": {
                "action_type": "ADD_COMMENT",
                "line_number": issue.line,
                "issue_type": issue.issue_type.value,
                "severity": issue.severity.value,
                "comment": issue.description,
            }
        },
    )
    assert step_response.status_code == 200
    assert step_response.json()["observation"]["metrics"]["true_positives"] >= 1

    tasks_response = client.get("/tasks")
    assert tasks_response.status_code == 200
    assert len(tasks_response.json()["tasks"]) == 3

    metrics_response = client.get("/metrics")
    assert metrics_response.status_code == 200
    assert "metrics" in metrics_response.json()

    health_response = client.get("/health")
    assert health_response.status_code == 200
    assert health_response.json()["status"] == "ok"

    schema_response = client.get("/schema")
    assert schema_response.status_code == 200
    assert "action" in schema_response.json()