File size: 5,786 Bytes
703aa57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# tests/test_api.py
"""Integration tests for the FastAPI endpoints."""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), "server"))

import pytest

# These tests require fastapi and httpx
try:
    from fastapi.testclient import TestClient
    from server.app import app
    HAS_DEPS = True
except ImportError:
    HAS_DEPS = False

pytestmark = pytest.mark.skipif(not HAS_DEPS, reason="FastAPI/httpx not installed")


@pytest.fixture
def client():
    return TestClient(app)


class TestHealthEndpoint:
    def test_health_returns_ok(self, client):
        r = client.get("/health")
        assert r.status_code == 200
        data = r.json()
        assert data.get("status") in ("ok", "healthy")


class TestTaskEndpoints:
    def test_list_tasks(self, client):
        r = client.get("/tasks")
        assert r.status_code == 200
        tasks = r.json()
        assert len(tasks) == 3
        ids = [t["id"] for t in tasks]
        assert "easy" in ids
        assert "medium" in ids
        assert "hard" in ids

    def test_get_specific_task(self, client):
        r = client.get("/tasks/easy")
        assert r.status_code == 200
        assert r.json()["id"] == "easy"

    def test_get_nonexistent_task(self, client):
        r = client.get("/tasks/impossible")
        assert r.status_code == 404


class TestResetEndpoint:
    def test_reset_returns_observation(self, client):
        r = client.post("/reset", json={"task_id": "easy"})
        assert r.status_code == 200
        data = r.json()
        assert "observation" in data
        assert "session_id" in data
        assert data["done"] is False

    def test_reset_with_empty_body(self, client):
        r = client.post("/reset", json={})
        assert r.status_code == 200

    def test_reset_returns_bug_report(self, client):
        r = client.post("/reset", json={"task_id": "medium"})
        data = r.json()
        obs = data["observation"]
        assert "bug_report" in obs
        assert "title" in obs["bug_report"]


class TestStepEndpoint:
    def test_investigation_step(self, client):
        # Reset first
        r = client.post("/reset", json={"task_id": "easy"})
        session_id = r.json()["session_id"]

        # Investigate
        r = client.post("/step", json={
            "session_id": session_id,
            "action": {"action_type": "read_body"},
        })
        assert r.status_code == 200
        data = r.json()
        assert data["done"] is False

    def test_submit_step(self, client):
        # Reset
        r = client.post("/reset", json={"task_id": "easy"})
        session_id = r.json()["session_id"]

        # Submit
        r = client.post("/step", json={
            "session_id": session_id,
            "action": {
                "action_type": "submit",
                "priority": "P0",
                "labels": ["bug"],
                "assigned_team": "backend",
            },
        })
        assert r.status_code == 200
        data = r.json()
        assert data["done"] is True
        assert 0 < data["reward"] < 1

    def test_full_episode_flow(self, client):
        # Reset
        r = client.post("/reset", json={"task_id": "hard"})
        assert r.status_code == 200
        session_id = r.json()["session_id"]

        # Investigate: read body
        r = client.post("/step", json={
            "session_id": session_id,
            "action": {"action_type": "read_body"},
        })
        assert r.status_code == 200
        assert r.json()["done"] is False

        # Investigate: read comments
        r = client.post("/step", json={
            "session_id": session_id,
            "action": {"action_type": "read_comments"},
        })
        assert r.status_code == 200
        assert r.json()["done"] is False

        # Submit triage
        r = client.post("/step", json={
            "session_id": session_id,
            "action": {
                "action_type": "submit",
                "priority": "P0",
                "labels": ["bug", "security"],
                "assigned_team": "security",
                "milestone": "hotfix",
                "reasoning": "Critical security vulnerability in production",
            },
        })
        assert r.status_code == 200
        data = r.json()
        assert data["done"] is True
        assert 0 < data["reward"] < 1

    def test_backward_compatible_no_session(self, client):
        """Old-style requests without session_id should still work."""
        r = client.post("/reset", json={"task_id": "easy"})
        assert r.status_code == 200

        r = client.post("/step", json={
            "action": {
                "priority": "P0",
                "labels": ["bug"],
            },
        })
        assert r.status_code == 200


class TestStateEndpoint:
    def test_state_returns_data(self, client):
        client.post("/reset", json={"task_id": "easy"})
        r = client.get("/state")
        assert r.status_code == 200
        data = r.json()
        assert "current_task" in data
        assert "step_count" in data


class TestLeaderboard:
    def test_get_empty_leaderboard(self, client):
        r = client.get("/leaderboard")
        assert r.status_code == 200
        assert isinstance(r.json(), list)

    def test_submit_to_leaderboard(self, client):
        r = client.post("/leaderboard/submit", json={
            "agent_name": "test-agent",
            "model": "test-model",
            "scores": {"easy": 0.9, "medium": 0.7, "hard": 0.5},
            "avg_score": 0.7,
        })
        assert r.status_code == 200
        data = r.json()
        assert data["status"] == "submitted"
        assert "rank" in data