Spaces:
Sleeping
Sleeping
File size: 5,697 Bytes
78ea1a9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | """Tests for MLOpsEnvironment — core episode flow, state management, and step logic."""
import pytest
from mlops_environment import MLOpsEnvironment, TASK_MAX_STEPS
from models import MLOpsAction
class TestReset:
"""reset() should produce a clean, valid initial state."""
@pytest.mark.parametrize("task_id", ["easy", "medium", "hard"])
def test_reset_returns_valid_observation(self, task_id):
env = MLOpsEnvironment(task_id=task_id)
obs = env.reset(seed=42)
assert obs.task_id == task_id
assert obs.step_count == 0
assert obs.max_steps == TASK_MAX_STEPS[task_id]
assert obs.done is False
assert len(obs.available_artifacts) == 6
assert obs.artifacts_read == []
def test_reset_with_same_seed_is_deterministic(self):
env1 = MLOpsEnvironment(task_id="easy")
env2 = MLOpsEnvironment(task_id="easy")
obs1 = env1.reset(seed=123)
obs2 = env2.reset(seed=123)
assert obs1.run_id == obs2.run_id
assert env1.bug_type == env2.bug_type
def test_reset_with_different_seeds_varies(self):
env = MLOpsEnvironment(task_id="easy")
obs1 = env.reset(seed=1)
run_id_1 = obs1.run_id
obs2 = env.reset(seed=999)
assert obs2.run_id != run_id_1
def test_reset_clears_previous_episode(self):
env = MLOpsEnvironment(task_id="easy")
env.reset(seed=42)
env.step(MLOpsAction(action_type="read_config"))
assert len(env._artifacts_read) == 1
env.reset(seed=42)
assert len(env._artifacts_read) == 0
assert env._step_count == 0
class TestStepActions:
"""Each action type should return expected structure and reward."""
@pytest.fixture
def env(self):
env = MLOpsEnvironment(task_id="easy")
env.reset(seed=42)
return env
def test_read_config(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
assert reward == 0.02
assert done is False
assert "config.yaml" in obs.artifacts_read
assert "content" in obs.last_action_result
def test_read_logs(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_logs"))
assert reward == 0.02
assert "train.log" in obs.artifacts_read
def test_read_logs_with_filter(self, env):
obs, reward, done, info = env.step(
MLOpsAction(action_type="read_logs", log_filter="epoch:1-3")
)
assert reward == 0.02
content = obs.last_action_result.get("content", "")
assert "EPOCH" in content or "No log lines" in content
def test_check_dataset_stats(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="check_dataset_stats"))
assert reward == 0.02
assert "dataset_stats.json" in obs.artifacts_read
def test_inspect_preprocessing(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="inspect_preprocessing"))
assert reward == 0.02
assert "preprocessing.py" in obs.artifacts_read
def test_read_eval_results(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_eval_results"))
assert reward == 0.02
assert "eval_results.json" in obs.artifacts_read
def test_run_sanity_check(self, env):
obs, reward, done, info = env.step(
MLOpsAction(action_type="run_sanity_check", sanity_check_type="loss_trajectory")
)
assert reward == 0.01
assert obs.last_action_result["status"] == "ok"
assert "sanity_check" in obs.last_action_result
def test_query_artifact(self, env):
env.step(MLOpsAction(action_type="read_config"))
obs, reward, done, info = env.step(
MLOpsAction(action_type="query_artifact", artifact_name="config.yaml", field_path="model.architecture")
)
assert obs.last_action_result["status"] == "ok"
def test_duplicate_read_penalty(self, env):
env.step(MLOpsAction(action_type="read_config"))
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
assert reward == -0.02
def test_step_count_increments(self, env):
env.step(MLOpsAction(action_type="read_config"))
env.step(MLOpsAction(action_type="read_logs"))
assert env._step_count == 2
def test_done_after_submit(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="submit_diagnosis"))
assert done is True
def test_step_after_done_returns_done(self, env):
env.step(MLOpsAction(action_type="submit_diagnosis"))
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
assert done is True
assert reward == 0.01 # clamped minimum
assert "score" in info
class TestEpisodeBoundaries:
"""Episode should terminate correctly on submit, timeout, and re-step."""
def test_timeout_at_max_steps(self):
env = MLOpsEnvironment(task_id="easy")
env.reset(seed=42)
for _ in range(TASK_MAX_STEPS["easy"]):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
if done:
break
assert done is True
assert "score" in info
def test_submit_ends_episode(self):
env = MLOpsEnvironment(task_id="medium")
env.reset(seed=42)
env.step(MLOpsAction(action_type="read_logs"))
obs, reward, done, info = env.step(MLOpsAction(action_type="submit_diagnosis"))
assert done is True
assert "score" in info
assert "breakdown" in info
|