mlops-openenv / tests /test_environment.py
Rockerleo's picture
Upload folder using huggingface_hub
78ea1a9 verified
"""Tests for MLOpsEnvironment — core episode flow, state management, and step logic."""
import pytest
from mlops_environment import MLOpsEnvironment, TASK_MAX_STEPS
from models import MLOpsAction
class TestReset:
"""reset() should produce a clean, valid initial state."""
@pytest.mark.parametrize("task_id", ["easy", "medium", "hard"])
def test_reset_returns_valid_observation(self, task_id):
env = MLOpsEnvironment(task_id=task_id)
obs = env.reset(seed=42)
assert obs.task_id == task_id
assert obs.step_count == 0
assert obs.max_steps == TASK_MAX_STEPS[task_id]
assert obs.done is False
assert len(obs.available_artifacts) == 6
assert obs.artifacts_read == []
def test_reset_with_same_seed_is_deterministic(self):
env1 = MLOpsEnvironment(task_id="easy")
env2 = MLOpsEnvironment(task_id="easy")
obs1 = env1.reset(seed=123)
obs2 = env2.reset(seed=123)
assert obs1.run_id == obs2.run_id
assert env1.bug_type == env2.bug_type
def test_reset_with_different_seeds_varies(self):
env = MLOpsEnvironment(task_id="easy")
obs1 = env.reset(seed=1)
run_id_1 = obs1.run_id
obs2 = env.reset(seed=999)
assert obs2.run_id != run_id_1
def test_reset_clears_previous_episode(self):
env = MLOpsEnvironment(task_id="easy")
env.reset(seed=42)
env.step(MLOpsAction(action_type="read_config"))
assert len(env._artifacts_read) == 1
env.reset(seed=42)
assert len(env._artifacts_read) == 0
assert env._step_count == 0
class TestStepActions:
"""Each action type should return expected structure and reward."""
@pytest.fixture
def env(self):
env = MLOpsEnvironment(task_id="easy")
env.reset(seed=42)
return env
def test_read_config(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
assert reward == 0.02
assert done is False
assert "config.yaml" in obs.artifacts_read
assert "content" in obs.last_action_result
def test_read_logs(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_logs"))
assert reward == 0.02
assert "train.log" in obs.artifacts_read
def test_read_logs_with_filter(self, env):
obs, reward, done, info = env.step(
MLOpsAction(action_type="read_logs", log_filter="epoch:1-3")
)
assert reward == 0.02
content = obs.last_action_result.get("content", "")
assert "EPOCH" in content or "No log lines" in content
def test_check_dataset_stats(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="check_dataset_stats"))
assert reward == 0.02
assert "dataset_stats.json" in obs.artifacts_read
def test_inspect_preprocessing(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="inspect_preprocessing"))
assert reward == 0.02
assert "preprocessing.py" in obs.artifacts_read
def test_read_eval_results(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_eval_results"))
assert reward == 0.02
assert "eval_results.json" in obs.artifacts_read
def test_run_sanity_check(self, env):
obs, reward, done, info = env.step(
MLOpsAction(action_type="run_sanity_check", sanity_check_type="loss_trajectory")
)
assert reward == 0.01
assert obs.last_action_result["status"] == "ok"
assert "sanity_check" in obs.last_action_result
def test_query_artifact(self, env):
env.step(MLOpsAction(action_type="read_config"))
obs, reward, done, info = env.step(
MLOpsAction(action_type="query_artifact", artifact_name="config.yaml", field_path="model.architecture")
)
assert obs.last_action_result["status"] == "ok"
def test_duplicate_read_penalty(self, env):
env.step(MLOpsAction(action_type="read_config"))
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
assert reward == -0.02
def test_step_count_increments(self, env):
env.step(MLOpsAction(action_type="read_config"))
env.step(MLOpsAction(action_type="read_logs"))
assert env._step_count == 2
def test_done_after_submit(self, env):
obs, reward, done, info = env.step(MLOpsAction(action_type="submit_diagnosis"))
assert done is True
def test_step_after_done_returns_done(self, env):
env.step(MLOpsAction(action_type="submit_diagnosis"))
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
assert done is True
assert reward == 0.01 # clamped minimum
assert "score" in info
class TestEpisodeBoundaries:
"""Episode should terminate correctly on submit, timeout, and re-step."""
def test_timeout_at_max_steps(self):
env = MLOpsEnvironment(task_id="easy")
env.reset(seed=42)
for _ in range(TASK_MAX_STEPS["easy"]):
obs, reward, done, info = env.step(MLOpsAction(action_type="read_config"))
if done:
break
assert done is True
assert "score" in info
def test_submit_ends_episode(self):
env = MLOpsEnvironment(task_id="medium")
env.reset(seed=42)
env.step(MLOpsAction(action_type="read_logs"))
obs, reward, done, info = env.step(MLOpsAction(action_type="submit_diagnosis"))
assert done is True
assert "score" in info
assert "breakdown" in info