File size: 4,861 Bytes
557930c
 
85b7ac8
 
498f684
85b7ac8
 
 
 
804f70e
85b7ac8
 
804f70e
 
 
 
 
 
 
557930c
 
85b7ac8
 
557930c
85b7ac8
 
557930c
 
 
 
 
 
 
85b7ac8
 
557930c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""Endpoint tests for the FastAPI server."""

from fastapi.testclient import TestClient

from server.app import app

client = TestClient(app)


def test_root_landing_page():
    response = client.get("/")
    assert response.status_code == 200
    assert "text/html" in response.headers.get("content-type", "")
    assert "CI/CD" in response.text


def test_health_endpoint():
    response = client.get("/health")
    assert response.status_code == 200
    data = response.json()
    assert data["status"] == "healthy"


def test_info_returns_all_tasks():
    info = client.get("/info")
    assert info.status_code == 200
    data = info.json()
    assert len(data.get("tasks", [])) >= 6
    assert "action_space" in data
    assert "observation_space" in data


def test_tasks_endpoint():
    tasks = client.get("/tasks")
    assert tasks.status_code == 200
    data = tasks.json()
    assert len(data.get("tasks", [])) >= 6
    task_ids = [t["id"] for t in data["tasks"]]
    assert "dockerfile_syntax" in task_ids
    assert "multi_stage_pipeline_matrix" in task_ids


def test_reset_default():
    resp = client.post("/reset", json={})
    assert resp.status_code == 200
    data = resp.json()
    assert "observation" in data
    obs = data["observation"]
    assert obs["total_issues"] >= 1
    assert obs["step_number"] == 0


def test_reset_specific_task():
    resp = client.post("/reset", json={"task_id": "dockerfile_syntax", "scenario_id": "typo_filename"})
    assert resp.status_code == 200
    obs = resp.json()["observation"]
    assert obs["task_id"] == "dockerfile_syntax"


def test_reset_with_seed():
    resp1 = client.post("/reset", json={"seed": 99})
    resp2 = client.post("/reset", json={"seed": 99})
    assert resp1.json()["observation"]["task_id"] == resp2.json()["observation"]["task_id"]


def test_reset_invalid_task():
    resp = client.post("/reset", json={"task_id": "nonexistent_task"})
    assert resp.status_code == 400


def test_state_without_reset():
    # Force a fresh app state by not resetting — this test relies on prior reset
    # Just verify the endpoint returns 200 (prior test did a reset)
    resp = client.get("/state")
    assert resp.status_code == 200
    data = resp.json()
    assert "observation" in data
    assert "episode_reward" in data


def test_step_edit_file():
    client.post("/reset", json={"task_id": "dockerfile_syntax", "scenario_id": "typo_filename"})
    resp = client.post("/step", json={
        "action": {
            "action_type": "edit_file",
            "edits": [{
                "file_path": "Dockerfile",
                "old_content": "COPY requirments.txt .",
                "new_content": "COPY requirements.txt .",
            }],
        }
    })
    assert resp.status_code == 200
    data = resp.json()
    assert data["reward"] > 0
    assert data["info"]["issues_fixed"] >= 1


def test_step_submit():
    client.post("/reset", json={"task_id": "dockerfile_syntax"})
    resp = client.post("/step", json={"action": {"action_type": "submit"}})
    assert resp.status_code == 200
    assert resp.json()["done"] is True


def test_step_request_hint():
    client.post("/reset", json={"task_id": "dockerfile_syntax"})
    resp = client.post("/step", json={"action": {"action_type": "request_hint"}})
    assert resp.status_code == 200
    obs = resp.json()["observation"]
    assert obs["hints_used"] == 1
    assert "Hint" in (obs.get("last_action_feedback") or "")


def test_grader_endpoint():
    trajectory = [
        {"step": 1, "action": {"action_type": "edit_file", "edits": [{"file_path": "Dockerfile"}]},
         "reward": 0.3, "done": True, "info": {"issues_fixed": 1, "issues_total": 1}},
    ]
    resp = client.post("/grader", json={"task_id": "dockerfile_syntax", "trajectory": trajectory})
    assert resp.status_code == 200
    result = resp.json()["result"]
    assert result["score"] == 1.0


def test_grader_empty_trajectory():
    resp = client.post("/grader", json={"task_id": "dockerfile_syntax", "trajectory": []})
    assert resp.status_code == 200
    assert resp.json()["result"]["score"] == 0.0


def test_full_episode_via_api():
    """Full episode: reset -> edit -> submit -> verify score."""
    client.post("/reset", json={"task_id": "dockerfile_syntax", "scenario_id": "typo_filename"})

    client.post("/step", json={
        "action": {
            "action_type": "edit_file",
            "edits": [{
                "file_path": "Dockerfile",
                "old_content": "COPY requirments.txt .",
                "new_content": "COPY requirements.txt .",
            }],
        }
    })

    resp = client.post("/step", json={"action": {"action_type": "submit"}})
    assert resp.json()["done"] is True

    state = client.get("/state")
    assert state.json()["done"] is True
    assert state.json()["episode_reward"] > 0