Spaces:
Running
Running
File size: 11,385 Bytes
80d8c84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | """API 14 — REST session isolation tests.
Proves that two REST users with different session_ids do not share
or corrupt each other's env state, episode IDs, protocol, or history.
"""
from __future__ import annotations
import pytest
from fastapi.testclient import TestClient
from server.app import app
@pytest.fixture()
def client():
return TestClient(app)
def _reset(client: TestClient, *, seed: int = 42, session_id: str | None = None, **kw) -> dict:
payload: dict = {"seed": seed, "scenario": "math_reasoning", "difficulty": "easy"}
if session_id is not None:
payload["session_id"] = session_id
payload.update(kw)
resp = client.post("/reset", json=payload)
assert resp.status_code == 200
return resp.json()
def _good_action(client: TestClient) -> dict:
"""Build a valid propose_protocol action from a fresh scenario."""
from replicalab.scenarios import generate_scenario
scenario = generate_scenario(seed=42, template="math_reasoning", difficulty="easy")
lab = scenario.lab_manager_observation
return {
"action_type": "propose_protocol",
"sample_size": 3,
"controls": ["baseline"],
"technique": scenario.hidden_reference_spec.required_elements[0]
if scenario.hidden_reference_spec.required_elements else "algebraic_proof",
"duration_days": min(1, lab.time_limit_days),
"required_equipment": list(lab.equipment_available[:1]) if lab.equipment_available else [],
"required_reagents": list(lab.reagents_in_stock[:1]) if lab.reagents_in_stock else [],
"questions": [],
"rationale": "Test protocol for isolation check.",
}
# ---------------------------------------------------------------------------
# Two resets produce isolated sessions
# ---------------------------------------------------------------------------
class TestSessionIsolation:
"""Two REST users with different session_ids must not share state."""
def test_two_resets_produce_different_sessions(self, client: TestClient) -> None:
d1 = _reset(client, seed=1)
d2 = _reset(client, seed=2)
assert d1["session_id"] != d2["session_id"]
assert d1["episode_id"] != d2["episode_id"]
def test_two_sessions_have_independent_observations(self, client: TestClient) -> None:
"""Different seeds → different observations, proving separate envs."""
d1 = _reset(client, seed=100, scenario="math_reasoning", difficulty="easy")
d2 = _reset(client, seed=200, scenario="ml_benchmark", difficulty="hard")
obs1 = d1["observation"]
obs2 = d2["observation"]
# Different scenarios produce different paper titles
assert obs1["scientist"]["paper_title"] != obs2["scientist"]["paper_title"]
def test_stepping_one_session_does_not_mutate_other(self, client: TestClient) -> None:
"""Step session A, then verify session B's next step is unaffected."""
sid_a = "isolation-a"
sid_b = "isolation-b"
d_a = _reset(client, seed=42, session_id=sid_a)
d_b = _reset(client, seed=42, session_id=sid_b)
# Both start with the same observation (same seed/scenario/difficulty)
assert d_a["observation"] == d_b["observation"]
action = _good_action(client)
# Step session A
resp_a = client.post("/step", json={"session_id": sid_a, "action": action})
assert resp_a.status_code == 200
step_a = resp_a.json()
assert step_a["done"] is False
# Step session B with the same action — should produce the same result
# because it started from the same state and hasn't been touched
resp_b = client.post("/step", json={"session_id": sid_b, "action": action})
assert resp_b.status_code == 200
step_b = resp_b.json()
# Both should have the same reward and observation since they saw the
# same state + same action
assert step_a["reward"] == step_b["reward"]
assert step_a["observation"] == step_b["observation"]
def test_sessions_have_independent_round_counts(self, client: TestClient) -> None:
"""Advancing one session by two steps doesn't advance the other."""
sid_a = "rounds-a"
sid_b = "rounds-b"
_reset(client, seed=42, session_id=sid_a)
_reset(client, seed=42, session_id=sid_b)
action = _good_action(client)
# Step session A twice
client.post("/step", json={"session_id": sid_a, "action": action})
resp_a2 = client.post("/step", json={"session_id": sid_a, "action": action})
obs_a = resp_a2.json()["observation"]
# Step session B once
resp_b1 = client.post("/step", json={"session_id": sid_b, "action": action})
obs_b = resp_b1.json()["observation"]
# Session A is at round 2, session B at round 1
assert obs_a["scientist"]["round_number"] != obs_b["scientist"]["round_number"]
assert obs_b["scientist"]["round_number"] < obs_a["scientist"]["round_number"]
def test_terminal_one_session_does_not_affect_other(self, client: TestClient) -> None:
"""Completing session A (accept) doesn't terminate session B."""
sid_a = "term-a"
sid_b = "term-b"
_reset(client, seed=42, session_id=sid_a)
_reset(client, seed=42, session_id=sid_b)
action = _good_action(client)
# Step A with propose, then accept
client.post("/step", json={"session_id": sid_a, "action": action})
accept = {
"action_type": "accept",
"sample_size": 0,
"controls": [],
"technique": "",
"duration_days": 0,
"required_equipment": [],
"required_reagents": [],
"questions": [],
"rationale": "",
}
resp_accept = client.post("/step", json={"session_id": sid_a, "action": accept})
assert resp_accept.json()["done"] is True
# Session B should still be alive and non-terminal
resp_b = client.post("/step", json={"session_id": sid_b, "action": action})
assert resp_b.status_code == 200
assert resp_b.json()["done"] is False
# ---------------------------------------------------------------------------
# Session ID reuse
# ---------------------------------------------------------------------------
class TestSessionReuse:
"""Reusing a session_id should close the old env and start fresh."""
def test_reuse_session_id_creates_new_episode(self, client: TestClient) -> None:
sid = "reuse-test"
d1 = _reset(client, seed=10, session_id=sid)
d2 = _reset(client, seed=20, session_id=sid)
assert d1["session_id"] == d2["session_id"] == sid
assert d1["episode_id"] != d2["episode_id"]
def test_reuse_session_id_resets_round_counter(self, client: TestClient) -> None:
"""After stepping session, reusing the ID should start at round 0."""
sid = "reuse-rounds"
_reset(client, seed=42, session_id=sid)
action = _good_action(client)
client.post("/step", json={"session_id": sid, "action": action})
# Now reset with the same session_id
d2 = _reset(client, seed=99, session_id=sid)
obs = d2["observation"]
assert obs["scientist"]["round_number"] == 0
def test_reuse_does_not_affect_other_sessions(self, client: TestClient) -> None:
"""Resetting session A with reuse doesn't touch session B."""
sid_a = "reuse-a"
sid_b = "reuse-b"
_reset(client, seed=42, session_id=sid_a)
d_b = _reset(client, seed=42, session_id=sid_b)
action = _good_action(client)
client.post("/step", json={"session_id": sid_b, "action": action})
# Reuse session A with a new seed
_reset(client, seed=99, session_id=sid_a)
# Session B should still be at round 1 from the step we did
resp_b = client.post("/step", json={"session_id": sid_b, "action": action})
assert resp_b.status_code == 200
# B is still alive and progressing independently
assert resp_b.json()["done"] is False
# ---------------------------------------------------------------------------
# Invalid session handling
# ---------------------------------------------------------------------------
class TestInvalidSession:
"""Invalid or missing session_id should return clean errors."""
def test_step_nonexistent_session_returns_404(self, client: TestClient) -> None:
action = _good_action(client)
resp = client.post("/step", json={
"session_id": "does-not-exist",
"action": action,
})
assert resp.status_code == 404
assert "Session not found" in resp.json()["detail"]
def test_step_after_terminal_session_raises(self, client: TestClient) -> None:
"""A terminated session's env is closed; stepping again should error."""
sid = "terminal-step"
_reset(client, seed=42, session_id=sid)
action = _good_action(client)
client.post("/step", json={"session_id": sid, "action": action})
accept = {
"action_type": "accept",
"sample_size": 0,
"controls": [],
"technique": "",
"duration_days": 0,
"required_equipment": [],
"required_reagents": [],
"questions": [],
"rationale": "",
}
resp_term = client.post("/step", json={"session_id": sid, "action": accept})
assert resp_term.json()["done"] is True
# Stepping a done env should return an error (env raises or returns error)
resp_again = client.post("/step", json={"session_id": sid, "action": action})
# The server should still return a response (200 with error or 500),
# not crash silently
assert resp_again.status_code in (200, 500)
def test_replay_isolation_between_sessions(self, client: TestClient) -> None:
"""Each session's terminal episode gets its own replay entry."""
sid_a = "replay-a"
sid_b = "replay-b"
d_a = _reset(client, seed=10, session_id=sid_a)
d_b = _reset(client, seed=20, session_id=sid_b)
action = _good_action(client)
accept = {
"action_type": "accept",
"sample_size": 0,
"controls": [],
"technique": "",
"duration_days": 0,
"required_equipment": [],
"required_reagents": [],
"questions": [],
"rationale": "",
}
# Complete both episodes
client.post("/step", json={"session_id": sid_a, "action": action})
client.post("/step", json={"session_id": sid_a, "action": accept})
client.post("/step", json={"session_id": sid_b, "action": action})
client.post("/step", json={"session_id": sid_b, "action": accept})
# Both replays should exist independently
ep_a = d_a["episode_id"]
ep_b = d_b["episode_id"]
resp_a = client.get(f"/replay/{ep_a}")
resp_b = client.get(f"/replay/{ep_b}")
assert resp_a.status_code == 200
assert resp_b.status_code == 200
# Different seeds → different episode content
assert resp_a.json()["seed"] != resp_b.json()["seed"]
|