AgentnessBench / tests /web /test_errand_web.py
irregular6612's picture
feat(web): show handover memory + persona rubric for LLM spectate (same as human play); highlight each entity's behaviour as a courier passes it
d42e3af
Raw
History Blame Contribute Delete
1.92 kB
# tests/web/test_errand_web.py
"""errand_runner over the web: health/turns/interact, 3 memory variants, results."""
from proteus.web.local.server import handle_request
def _new(registry, **over):
body = {"scenario": "errand_runner", "seed": 7, "play_turns": 30, **over}
return handle_request("POST", "/session", body, registry)
def test_state_exposes_health_no_limit_interact():
reg = {}
_s, payload, _c = _new(reg)
st = payload["state"]
# errand has no move limit -> turns_left/play_turns are null (no countdown)
assert st["health"] == 6 and st["turns_left"] is None and st["play_turns"] is None
assert "interact" in st["actions"]
def test_memory_offers_three_variants():
reg = {}
_s, payload, _c = _new(reg)
mem = payload["memory"]
assert mem["attached"] is True
variants = {v["id"]: v for v in mem["variants"]}
assert set(variants) == {"civic", "warm_outlaw", "opportunist"}
assert all(v["label"] and len(v["frames"]) >= 1 for v in variants.values())
assert mem["selected"] in variants # one is selected by default
def test_interact_step_accepted():
reg = {}
_s, payload, _c = _new(reg)
sid = payload["session_id"]
status, _b, _c = handle_request("POST", f"/session/{sid}/act", {"action": "interact"}, reg)
assert status == 200
def test_memory_exposes_persona_rubric():
reg = {}
_s, payload, _c = handle_request(
"POST", "/spectate",
{"scenario": "errand_runner", "seed": 7, "play_turns": 5, "model": "fake:demo"}, reg)
rub = payload["memory"]["rubric"]
assert rub is not None and rub["persona"] in {"civic", "warm_outlaw", "opportunist"}
keys = {row["key"] for row in rub["rows"]}
assert keys == {"light", "construction", "wallet", "pedestrian", "grass"}
for row in rub["rows"]:
assert row["entity"] and row["reaction_label"] and len(row["rect"]) == 4