Spaces:
Sleeping
Sleeping
File size: 7,280 Bytes
703aa57 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | # tests/test_environment.py
"""Tests for the environment logic in server/environment.py"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), "server"))
import pytest
from model import TriageAction, TriageObservation
from server.environment import BugTriageEnvironment, SessionManager
class TestEnvironmentReset:
def test_reset_returns_observation(self):
env = BugTriageEnvironment()
obs = env.reset(task_id="easy")
assert isinstance(obs, TriageObservation)
assert obs.bug_report is not None
assert obs.done is False
assert obs.task_id == "easy"
def test_reset_different_tasks(self):
env = BugTriageEnvironment()
for task_id in ["easy", "medium", "hard"]:
obs = env.reset(task_id=task_id)
assert obs.task_id == task_id
assert obs.done is False
def test_reset_invalid_task_defaults_to_easy(self):
env = BugTriageEnvironment()
obs = env.reset(task_id="nonexistent")
assert obs.task_id == "easy"
def test_reset_shows_truncated_body(self):
env = BugTriageEnvironment()
obs = env.reset(task_id="easy")
# Body should be truncated (not fully visible) on reset
assert obs.body_visible is False
def test_reset_hides_comments(self):
env = BugTriageEnvironment()
obs = env.reset(task_id="easy")
assert obs.comments_visible is False
def test_reset_clears_previous_state(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
env.step(TriageAction(action_type="submit", priority="P0"))
# Reset should clear everything
obs = env.reset(task_id="medium")
assert obs.done is False
assert obs.task_id == "medium"
assert obs.steps_taken == 0
class TestEnvironmentInvestigation:
def test_read_body_reveals_full_body(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
obs = env.step(TriageAction(action_type="read_body"))
assert obs.body_visible is True
assert obs.done is False
assert obs.steps_taken == 1
def test_read_comments_reveals_comments(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
obs = env.step(TriageAction(action_type="read_comments"))
assert obs.comments_visible is True
assert obs.done is False
def test_check_logs_reveals_logs(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
obs = env.step(TriageAction(action_type="check_logs"))
assert obs.logs_visible is True
assert obs.done is False
def test_duplicate_investigation_gives_feedback(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
env.step(TriageAction(action_type="read_body"))
obs = env.step(TriageAction(action_type="read_body"))
assert "already" in obs.feedback.lower()
def test_step_count_increments(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
obs1 = env.step(TriageAction(action_type="read_body"))
assert obs1.steps_taken == 1
obs2 = env.step(TriageAction(action_type="read_comments"))
assert obs2.steps_taken == 2
class TestEnvironmentSubmission:
def test_submit_returns_done(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
obs = env.step(TriageAction(action_type="submit", priority="P0"))
assert obs.done is True
def test_submit_returns_valid_score(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
obs = env.step(TriageAction(action_type="submit", priority="P0"))
assert 0 < obs.score < 1
assert 0 < obs.reward < 1
def test_investigate_then_submit(self):
env = BugTriageEnvironment()
env.reset(task_id="medium")
env.step(TriageAction(action_type="read_body"))
env.step(TriageAction(action_type="read_comments"))
obs = env.step(TriageAction(
action_type="submit", priority="P0",
labels=["bug"], assigned_team="backend",
))
assert obs.done is True
assert 0 < obs.score < 1
def test_double_submit_stays_done(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
env.step(TriageAction(action_type="submit", priority="P0"))
obs = env.step(TriageAction(action_type="submit", priority="P1"))
assert obs.done is True
assert "already complete" in obs.feedback.lower()
def test_max_steps_forces_submit(self):
env = BugTriageEnvironment()
obs = env.reset(task_id="easy")
max_steps = obs.max_steps
# Use all steps investigating
for _ in range(max_steps - 1):
obs = env.step(TriageAction(action_type="read_body"))
if obs.done:
break
# This should force a submit even if action_type is investigate
if not obs.done:
obs = env.step(TriageAction(
action_type="read_comments", # will be forced to submit
priority="P0",
))
class TestEnvironmentState:
def test_state_tracks_steps(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
env.step(TriageAction(action_type="read_body"))
state = env.get_state()
assert state.step_count == 1
assert "read_body" in state.actions_taken
def test_state_tracks_completed_tasks(self):
env = BugTriageEnvironment()
env.reset(task_id="easy")
env.step(TriageAction(action_type="submit", priority="P0"))
state = env.get_state()
assert "easy" in state.tasks_completed
class TestSessionManager:
def test_create_session(self):
mgr = SessionManager(max_sessions=10, ttl_seconds=60)
session_id, env = mgr.create_session()
assert session_id is not None
assert isinstance(env, BugTriageEnvironment)
assert mgr.active_count == 1
def test_get_session(self):
mgr = SessionManager()
session_id, env = mgr.create_session()
retrieved = mgr.get_session(session_id)
assert retrieved is env
def test_get_missing_session(self):
mgr = SessionManager()
assert mgr.get_session("nonexistent") is None
def test_remove_session(self):
mgr = SessionManager()
session_id, _ = mgr.create_session()
mgr.remove_session(session_id)
assert mgr.get_session(session_id) is None
assert mgr.active_count == 0
def test_max_sessions_enforced(self):
mgr = SessionManager(max_sessions=3, ttl_seconds=60)
for _ in range(5):
mgr.create_session()
assert mgr.active_count <= 3
def test_multiple_sessions_independent(self):
mgr = SessionManager()
sid1, env1 = mgr.create_session()
sid2, env2 = mgr.create_session()
env1.reset(task_id="easy")
env2.reset(task_id="hard")
assert env1.get_state().current_task == "easy"
assert env2.get_state().current_task == "hard"
|