File size: 2,909 Bytes
fe21eda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from __future__ import annotations

"""Server smoke test for CodeReviewRuse.

This test imports the FastAPI app through the full repository package path.
That matters because server/app.py uses relative imports such as
``from ..evaluation_lab.models import ...``.
"""

import importlib
import sys
from pathlib import Path
from typing import Any

from fastapi.testclient import TestClient


SCENARIO = {
    "domain": "software_testing",
    "scenario_id": "codereviewruse",
    "scenario_name": "CodeReviewRuse",
    "upstream_track": "logomesh",
    "category": "indirect_injection",
    "adapter": "openenv",
    "assessment_mode": "purple_benchmark",
    "scenario_family": "agentbeats_sprint4",
    "benchmark": "AgentX-AgentBeats Phase 2 Sprint 4",
    "selected_opponent": "logomesh",
    "source_url": "https://agentbeats-lambda.s3.us-east-1.amazonaws.com/codereviewruse/index.html",
}


def _repo_root() -> Path:
    # tests/ -> omnibench_aegis_env/ -> envs/ -> openenv/ -> integrations/ -> repo
    return Path(__file__).resolve().parents[5]


def _load_app() -> Any:
    root = _repo_root()
    root_text = str(root)
    if root_text not in sys.path:
        sys.path.insert(0, root_text)

    module = importlib.import_module(
        "integrations.openenv.envs.omnibench_aegis_env.server.app"
    )
    app = getattr(module, "app")
    return app


def _state_envelope(payload: dict[str, Any]) -> dict[str, Any]:
    state = payload.get("state")
    return state if isinstance(state, dict) else payload


def test_server_software_testing_codereviewruse_flow() -> None:
    client = TestClient(_load_app())

    health = client.get("/health")
    assert health.status_code == 200
    assert isinstance(health.json(), dict)

    reset = client.post(
        "/reset",
        json={
            "seed": 42,
            "options": {
                "domain": SCENARIO["domain"],
                "scenario_id": SCENARIO["scenario_id"],
                "scenario_name": SCENARIO["scenario_name"],
                "upstream_track": SCENARIO["upstream_track"],
                "category": SCENARIO["category"],
                "assessment_mode": SCENARIO["assessment_mode"],
                "scenario_family": SCENARIO["scenario_family"],
            },
        },
    )
    assert reset.status_code == 200
    assert isinstance(reset.json(), dict)

    step = client.post("/step", json={"action": "advance", "value": 1})
    assert step.status_code == 200
    assert isinstance(step.json(), dict)

    state_response = client.get("/state")
    assert state_response.status_code == 200
    state_json = state_response.json()
    assert isinstance(state_json, dict)

    envelope = _state_envelope(state_json)
    assert isinstance(envelope, dict)

    assert any(
        key in state_json or key in envelope
        for key in ("score", "progress", "done", "success", "step_count", "last_action")
    )