Spaces:
Sleeping
Sleeping
| """Web memory-mode selection wired into /session (human) and /spectate (LLM): | |
| default / none / persona[:id] / generate / latest, plus the raised action quota.""" | |
| from __future__ import annotations | |
| import proteus.game.scenarios # noqa: F401 | |
| from proteus.web.local import server | |
| def _reg(): | |
| return {} | |
| def test_default_play_quota_is_100(): | |
| _, payload, _ = server.handle_request( | |
| "POST", "/session", {"scenario": "template"}, _reg()) | |
| assert payload["state"]["play_turns"] == 100 | |
| def test_persona_memory_attached_to_human_session(): | |
| _, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "seed": 42, "play_turns": 3, | |
| "memory": "persona:risk_averse"}, _reg()) | |
| assert payload["memory"]["attached"] is True | |
| assert payload["memory"]["persona"] == "risk_averse" | |
| assert payload["memory"]["turns"] >= 1 | |
| def test_none_forces_no_memory_even_when_scenario_has_a_default(): | |
| # template attaches a persona memory by default; 'none' must override it. | |
| _, dflt, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "default"}, _reg()) | |
| assert dflt["memory"]["attached"] is True # scenario default = persona | |
| _, none, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "none"}, _reg()) | |
| assert none["memory"]["attached"] is False # forced off | |
| def test_unknown_persona_is_400(): | |
| status, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "memory": "persona:bogus"}, _reg()) | |
| assert status == 400 and "error" in payload | |
| def test_generate_memory_via_fake_model_on_spectate(tmp_path): | |
| status, payload, _ = server.handle_request( | |
| "POST", "/spectate", | |
| {"scenario": "template", "seed": 42, "play_turns": 2, | |
| "model": "fake:demo", "memory": "generate", | |
| "memory_root": str(tmp_path)}, _reg()) | |
| assert status == 200 | |
| assert payload["memory"]["attached"] is True | |
| assert payload["memory"]["turns"] >= 1 | |
| # generate saved a checkpoint under the (tmp) root -> latest can find it. | |
| status, latest, _ = server.handle_request( | |
| "POST", "/spectate", | |
| {"scenario": "template", "seed": 42, "play_turns": 2, | |
| "model": "fake:demo", "memory": "latest", | |
| "memory_root": str(tmp_path)}, _reg()) | |
| assert status == 200 and latest["memory"]["attached"] is True | |
| def test_generate_without_model_on_human_is_400(): | |
| status, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "memory": "generate"}, _reg()) | |
| assert status == 400 and "needs a model" in payload["error"] | |
| def test_response_carries_rendered_memory_block_for_display(): | |
| # The web can show the actual memory at the start, so the response includes | |
| # the rendered block the model is given. | |
| _, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "seed": 42, "play_turns": 3, | |
| "memory": "persona:risk_averse"}, _reg()) | |
| assert payload["memory"]["block"] and "MEMORY" in payload["memory"]["block"] | |
| # 'none' carries no block. | |
| _, none, _ = server.handle_request( | |
| "POST", "/session", {"scenario": "template", "memory": "none"}, _reg()) | |
| assert none["memory"]["block"] is None | |
| def test_memory_payload_includes_replay_frames_for_template(): | |
| _, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "default"}, | |
| _reg()) | |
| frames = payload["memory"]["frames"] | |
| assert isinstance(frames, list) and len(frames) >= 1 | |
| f0 = frames[0] | |
| assert {"turn_idx", "action", "grid"} <= set(f0) # events key also present since Task 5 | |
| assert len(f0["grid"]) == 64 and len(f0["grid"][0]) == 64 # 64x64 field | |
| assert any(3 in row for row in f0["grid"]) # walls present | |
| def test_template_legacy_replay_block_sizes_are_3x3_predator_2x2_focal(): | |
| """Regression: the legacy single-agent prose branch must paint a 3x3 predator | |
| and a 2x2 focal — not the stale 5x5/3x3 that pre-dated the template resize. | |
| """ | |
| from proteus.game.runtime.memory import MemoryCheckpoint, MemoryTurn, memory_frames | |
| # Use a simple prose frame_ascii (non-grid) so the legacy paint branch fires. | |
| # Pick positions well clear of edges so no clipping masks the block size. | |
| predator_anchor = (10, 10) | |
| focal_anchor = (30, 30) | |
| PREDATOR_IDX = 2 # "B" in template legend | |
| FOCAL_IDX = 1 # "A" in template legend | |
| ck = MemoryCheckpoint( | |
| model="test", scenario="template", difficulty="easy", seed=1, | |
| created_at="t", outcome="survived", transparent_prompt="p", | |
| memory_turns=[ | |
| MemoryTurn( | |
| turn_idx=1, | |
| frame_ascii="Open field 64x64. You are A (2x2) centered at (31,31). " | |
| "Predator B (3x3) centered at (11,11). Manhattan distance 20.", | |
| action="right", | |
| focal_pos=focal_anchor, | |
| predator_pos=predator_anchor, | |
| ) | |
| ], | |
| ) | |
| legend = {5: ".", 1: "A", 2: "B", 3: "#", 14: "F"} | |
| frames = memory_frames(ck, legend=legend, grid_size=(64, 64)) | |
| grid = frames[0]["grid"] | |
| # Assert the predator is painted as exactly 3x3 at its anchor. | |
| px, py = predator_anchor | |
| for dy in range(3): | |
| for dx in range(3): | |
| assert grid[py + dy][px + dx] == PREDATOR_IDX, ( | |
| f"predator cell ({px+dx},{py+dy}) expected {PREDATOR_IDX}, " | |
| f"got {grid[py+dy][px+dx]}" | |
| ) | |
| # The cell one step beyond the 3x3 predator block must NOT be predator colour | |
| # (catches the old 5x5 bug where cells up to anchor+4 were painted). | |
| assert grid[py][px + 3] != PREDATOR_IDX, "predator block wider than 3 (old 5x5 bug)" | |
| assert grid[py + 3][px] != PREDATOR_IDX, "predator block taller than 3 (old 5x5 bug)" | |
| # Assert the focal is painted as exactly 2x2 at its anchor. | |
| fx, fy = focal_anchor | |
| for dy in range(2): | |
| for dx in range(2): | |
| assert grid[fy + dy][fx + dx] == FOCAL_IDX, ( | |
| f"focal cell ({fx+dx},{fy+dy}) expected {FOCAL_IDX}, " | |
| f"got {grid[fy+dy][fx+dx]}" | |
| ) | |
| # The cell one step beyond the 2x2 focal block must NOT be focal colour | |
| # (catches the old 3x3 bug). | |
| assert grid[fy][fx + 2] != FOCAL_IDX, "focal block wider than 2 (old 3x3 bug)" | |
| assert grid[fy + 2][fx] != FOCAL_IDX, "focal block taller than 2 (old 3x3 bug)" | |
| def test_memory_frames_empty_when_no_memory(): | |
| _, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "seed": 42, "play_turns": 3, "memory": "none"}, | |
| _reg()) | |
| assert payload["memory"]["attached"] is False | |
| assert payload["memory"]["frames"] == [] | |
| def test_index_html_has_replay_controls(): | |
| status, body, ctype = server.handle_request("GET", "/", None, _reg()) | |
| html = body.decode() if isinstance(body, (bytes, bytearray)) else body | |
| # Memory replay now shares the single play board (#grid); the separate | |
| # #memReplayGrid / #memoryPanel region was removed and replaced by a compact | |
| # control bar with a "start playing" (memReplayDone) handover button. | |
| assert "memReplayGrid" not in html | |
| assert "memoryPanel" not in html | |
| assert 'id="memReplay"' in html and "memReplayDone" in html | |
| assert "memReplayPrev" in html and "memReplayNext" in html and "memReplayPlay" in html | |
| def test_policy_memory_attached_to_session(): | |
| _, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "seed": 7, "play_turns": 3, | |
| "memory": "policy:survival_refuge"}, _reg()) | |
| assert payload["memory"]["attached"] is True | |
| assert payload["memory"]["source"] == "policy:survival_refuge" | |
| assert payload["memory"]["turns"] >= 1 | |
| assert payload["memory"]["frames"] | |
| def test_unknown_policy_is_400(): | |
| status, payload, _ = server.handle_request( | |
| "POST", "/session", | |
| {"scenario": "template", "memory": "policy:bogus"}, _reg()) | |
| assert status == 400 and "error" in payload | |
| def test_spectate_bad_provider_is_400_not_500(): | |
| # ollama with no key -> ValueError now; must surface as a clean 400, not a 500. | |
| status, payload, _ = server.handle_request( | |
| "POST", "/spectate", | |
| {"scenario": "template", "seed": 7, "play_turns": 2, "model": "ollama:x", | |
| "memory": "none"}, _reg()) | |
| assert status == 400 and "error" in payload | |