File size: 8,696 Bytes
e8e01c9
 
 
 
426093b
 
e8e01c9
 
 
 
 
 
 
 
93cd78f
e8e01c9
 
 
 
 
 
93cd78f
e8e01c9
 
 
 
 
 
 
d4716c0
e8e01c9
 
d4716c0
e8e01c9
 
 
d4716c0
e8e01c9
 
 
 
 
 
93cd78f
e8e01c9
 
 
 
 
 
93cd78f
e8e01c9
 
 
 
 
 
 
 
93cd78f
e8e01c9
 
 
 
 
 
 
 
93cd78f
e8e01c9
2733738
 
 
 
 
 
 
93cd78f
2733738
 
 
 
93cd78f
2733738
ca0ff77
 
d4716c0
ca0ff77
 
d4716c0
ca0ff77
 
 
 
c50d37a
ca0ff77
 
 
 
d4716c0
6c48e8f
d4716c0
6c48e8f
 
 
 
 
 
 
d4716c0
 
6c48e8f
 
d4716c0
6c48e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca0ff77
 
 
d4716c0
ca0ff77
 
 
087e169
 
 
 
 
387f878
 
 
 
 
 
087e169
d54ab68
 
 
 
 
d4716c0
d54ab68
 
 
 
 
 
 
 
 
 
d4716c0
d54ab68
 
 
 
 
 
 
d4716c0
d54ab68
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
"""Web memory-mode selection wired into /session (human) and /spectate (LLM):
default / none / persona[:id] / generate / latest, plus the raised action quota."""
from __future__ import annotations

import proteus.game.scenarios  # noqa: F401
from proteus.web.local import server


def _reg():
    return {}


def test_default_play_quota_is_100():
    _, payload, _ = server.handle_request(
        "POST", "/session", {"scenario": "template"}, _reg())
    assert payload["state"]["play_turns"] == 100


def test_persona_memory_attached_to_human_session():
    _, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "seed": 42, "play_turns": 3,
         "memory": "persona:risk_averse"}, _reg())
    assert payload["memory"]["attached"] is True
    assert payload["memory"]["persona"] == "risk_averse"
    assert payload["memory"]["turns"] >= 1


def test_none_forces_no_memory_even_when_scenario_has_a_default():
    # template attaches a persona memory by default; 'none' must override it.
    _, dflt, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "default"}, _reg())
    assert dflt["memory"]["attached"] is True             # scenario default = persona
    _, none, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "none"}, _reg())
    assert none["memory"]["attached"] is False            # forced off


def test_unknown_persona_is_400():
    status, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "memory": "persona:bogus"}, _reg())
    assert status == 400 and "error" in payload


def test_generate_memory_via_fake_model_on_spectate(tmp_path):
    status, payload, _ = server.handle_request(
        "POST", "/spectate",
        {"scenario": "template", "seed": 42, "play_turns": 2,
         "model": "fake:demo", "memory": "generate",
         "memory_root": str(tmp_path)}, _reg())
    assert status == 200
    assert payload["memory"]["attached"] is True
    assert payload["memory"]["turns"] >= 1
    # generate saved a checkpoint under the (tmp) root -> latest can find it.
    status, latest, _ = server.handle_request(
        "POST", "/spectate",
        {"scenario": "template", "seed": 42, "play_turns": 2,
         "model": "fake:demo", "memory": "latest",
         "memory_root": str(tmp_path)}, _reg())
    assert status == 200 and latest["memory"]["attached"] is True


def test_generate_without_model_on_human_is_400():
    status, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "memory": "generate"}, _reg())
    assert status == 400 and "needs a model" in payload["error"]


def test_response_carries_rendered_memory_block_for_display():
    # The web can show the actual memory at the start, so the response includes
    # the rendered block the model is given.
    _, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "seed": 42, "play_turns": 3,
         "memory": "persona:risk_averse"}, _reg())
    assert payload["memory"]["block"] and "MEMORY" in payload["memory"]["block"]
    # 'none' carries no block.
    _, none, _ = server.handle_request(
        "POST", "/session", {"scenario": "template", "memory": "none"}, _reg())
    assert none["memory"]["block"] is None


def test_memory_payload_includes_replay_frames_for_template():
    _, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "default"},
        _reg())
    frames = payload["memory"]["frames"]
    assert isinstance(frames, list) and len(frames) >= 1
    f0 = frames[0]
    assert {"turn_idx", "action", "grid"} <= set(f0)  # events key also present since Task 5
    assert len(f0["grid"]) == 64 and len(f0["grid"][0]) == 64  # 64x64 field
    assert any(3 in row for row in f0["grid"])  # walls present


def test_template_legacy_replay_block_sizes_are_3x3_predator_2x2_focal():
    """Regression: the legacy single-agent prose branch must paint a 3x3 predator
    and a 2x2 focal — not the stale 5x5/3x3 that pre-dated the template resize.
    """
    from proteus.game.runtime.memory import MemoryCheckpoint, MemoryTurn, memory_frames

    # Use a simple prose frame_ascii (non-grid) so the legacy paint branch fires.
    # Pick positions well clear of edges so no clipping masks the block size.
    predator_anchor = (10, 10)
    focal_anchor = (30, 30)
    PREDATOR_IDX = 2  # "B" in template legend
    FOCAL_IDX = 1     # "A" in template legend

    ck = MemoryCheckpoint(
        model="test", scenario="template", difficulty="easy", seed=1,
        created_at="t", outcome="survived", transparent_prompt="p",
        memory_turns=[
            MemoryTurn(
                turn_idx=1,
                frame_ascii="Open field 64x64. You are A (2x2) centered at (31,31). "
                            "Predator B (3x3) centered at (11,11). Manhattan distance 20.",
                action="right",
                focal_pos=focal_anchor,
                predator_pos=predator_anchor,
            )
        ],
    )
    legend = {5: ".", 1: "A", 2: "B", 3: "#", 14: "F"}
    frames = memory_frames(ck, legend=legend, grid_size=(64, 64))
    grid = frames[0]["grid"]

    # Assert the predator is painted as exactly 3x3 at its anchor.
    px, py = predator_anchor
    for dy in range(3):
        for dx in range(3):
            assert grid[py + dy][px + dx] == PREDATOR_IDX, (
                f"predator cell ({px+dx},{py+dy}) expected {PREDATOR_IDX}, "
                f"got {grid[py+dy][px+dx]}"
            )
    # The cell one step beyond the 3x3 predator block must NOT be predator colour
    # (catches the old 5x5 bug where cells up to anchor+4 were painted).
    assert grid[py][px + 3] != PREDATOR_IDX, "predator block wider than 3 (old 5x5 bug)"
    assert grid[py + 3][px] != PREDATOR_IDX, "predator block taller than 3 (old 5x5 bug)"

    # Assert the focal is painted as exactly 2x2 at its anchor.
    fx, fy = focal_anchor
    for dy in range(2):
        for dx in range(2):
            assert grid[fy + dy][fx + dx] == FOCAL_IDX, (
                f"focal cell ({fx+dx},{fy+dy}) expected {FOCAL_IDX}, "
                f"got {grid[fy+dy][fx+dx]}"
            )
    # The cell one step beyond the 2x2 focal block must NOT be focal colour
    # (catches the old 3x3 bug).
    assert grid[fy][fx + 2] != FOCAL_IDX, "focal block wider than 2 (old 3x3 bug)"
    assert grid[fy + 2][fx] != FOCAL_IDX, "focal block taller than 2 (old 3x3 bug)"


def test_memory_frames_empty_when_no_memory():
    _, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "none"},
        _reg())
    assert payload["memory"]["attached"] is False
    assert payload["memory"]["frames"] == []


def test_index_html_has_replay_controls():
    status, body, ctype = server.handle_request("GET", "/", None, _reg())
    html = body.decode() if isinstance(body, (bytes, bytearray)) else body
    # Memory replay now shares the single play board (#grid); the separate
    # #memReplayGrid / #memoryPanel region was removed and replaced by a compact
    # control bar with a "start playing" (memReplayDone) handover button.
    assert "memReplayGrid" not in html
    assert "memoryPanel" not in html
    assert 'id="memReplay"' in html and "memReplayDone" in html
    assert "memReplayPrev" in html and "memReplayNext" in html and "memReplayPlay" in html


def test_policy_memory_attached_to_session():
    _, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "seed": 7, "play_turns": 3,
         "memory": "policy:survival_refuge"}, _reg())
    assert payload["memory"]["attached"] is True
    assert payload["memory"]["source"] == "policy:survival_refuge"
    assert payload["memory"]["turns"] >= 1
    assert payload["memory"]["frames"]


def test_unknown_policy_is_400():
    status, payload, _ = server.handle_request(
        "POST", "/session",
        {"scenario": "template", "memory": "policy:bogus"}, _reg())
    assert status == 400 and "error" in payload


def test_spectate_bad_provider_is_400_not_500():
    # ollama with no key -> ValueError now; must surface as a clean 400, not a 500.
    status, payload, _ = server.handle_request(
        "POST", "/spectate",
        {"scenario": "template", "seed": 7, "play_turns": 2, "model": "ollama:x",
         "memory": "none"}, _reg())
    assert status == 400 and "error" in payload