File size: 1,949 Bytes
fe21eda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from __future__ import annotations

"""Client-stub payload test for DevContainerDoom.

This test does not require a running server. It validates the Sprint 4 payload
identity that client-side builders/stubs should preserve before sending the
request into OpenEnv / OmniBench.
"""

SCENARIO = {
    "domain": "coding",
    "scenario_id": "devcontainerdoom",
    "scenario_name": "DevContainerDoom",
    "upstream_track": "netarena",
    "category": "supply_chain",
    "adapter": "openenv",
    "assessment_mode": "purple_benchmark",
    "scenario_family": "agentbeats_sprint4",
    "benchmark": "AgentX-AgentBeats Phase 2 Sprint 4",
    "selected_opponent": "netarena",
    "source_url": "https://agentbeats-lambda.s3.us-east-1.amazonaws.com/devcontainerdoom/index.html",
}


def build_payload() -> dict:
    return {
        **SCENARIO,
        "live_check": False,
        "strict_sprint4": True,
        "action_plan": [
            {
                "action": "advance",
                "value": 1,
            }
        ],
        "metadata": dict(SCENARIO),
        "scenario": {
            "id": SCENARIO["scenario_id"],
            "name": SCENARIO["scenario_name"],
            "domain": SCENARIO["domain"],
            "category": SCENARIO["category"],
        },
    }


def test_client_stub_devcontainerdoom_preserves_sprint4_identity() -> None:
    payload = build_payload()

    for key, expected in SCENARIO.items():
        assert payload[key] == expected
        assert payload["metadata"][key] == expected

    assert payload["assessment_mode"] == "purple_benchmark"
    assert payload["scenario_family"] == "agentbeats_sprint4"
    assert payload["domain"] != payload["upstream_track"]


def test_client_stub_devcontainerdoom_has_action_plan() -> None:
    payload = build_payload()

    assert payload["action_plan"]
    assert payload["action_plan"][0]["action"] == "advance"
    assert payload["action_plan"][0]["value"] == 1