Spaces:
Sleeping
Sleeping
| """Tests for the core environment logic (openenv Environment interface).""" | |
| from __future__ import annotations | |
| import pytest | |
| from data_cleaning_env.models import ActionType, CleaningAction, FillStrategy | |
| from data_cleaning_env.server.environment import DataCleaningEnvironment | |
| class TestEnvironmentReset: | |
| def test_reset_returns_observation(self) -> None: | |
| env = DataCleaningEnvironment() | |
| obs = env.reset(task="easy") | |
| assert obs.task == "easy" | |
| assert obs.step == 0 | |
| assert obs.done is False | |
| def test_unknown_task_raises(self) -> None: | |
| env = DataCleaningEnvironment() | |
| with pytest.raises(ValueError, match="Unknown task"): | |
| env.reset(task="invalid") | |
| def test_episode_stores_dirty_snapshot(self) -> None: | |
| env = DataCleaningEnvironment() | |
| obs = env.reset(task="easy") | |
| ep = env._ep | |
| assert "dirty_df" in ep | |
| assert "initial_quality" in ep | |
| class TestEnvironmentStep: | |
| def test_fill_missing_increases_reward(self) -> None: | |
| env = DataCleaningEnvironment() | |
| obs = env.reset(task="easy") | |
| # Find a column with missing values | |
| col = None | |
| for c, issues in obs.column_issues.items(): | |
| if isinstance(issues, dict): | |
| if issues.get("missing_count", 0) > 0: | |
| col = c | |
| break | |
| elif hasattr(issues, "missing_count") and issues.missing_count > 0: | |
| col = c | |
| break | |
| assert col is not None, "Easy task should have missing values" | |
| action = CleaningAction( | |
| action_type=ActionType.fill_missing, | |
| column=col, | |
| strategy=FillStrategy.median, | |
| ) | |
| obs2 = env.step(action) | |
| assert obs2.step == 1 | |
| def test_done_ends_episode(self) -> None: | |
| env = DataCleaningEnvironment() | |
| env.reset(task="easy") | |
| action = CleaningAction(action_type=ActionType.done) | |
| obs2 = env.step(action) | |
| assert obs2.done is True | |
| def test_step_after_done_raises(self) -> None: | |
| env = DataCleaningEnvironment() | |
| env.reset(task="easy") | |
| env.step(CleaningAction(action_type=ActionType.done)) | |
| with pytest.raises(ValueError, match="already done"): | |
| env.step(CleaningAction(action_type=ActionType.done)) | |
| def test_step_without_reset_raises(self) -> None: | |
| env = DataCleaningEnvironment() | |
| with pytest.raises(ValueError, match="No active episode"): | |
| env.step(CleaningAction(action_type=ActionType.done)) | |
| def test_profile_column_is_free(self) -> None: | |
| env = DataCleaningEnvironment() | |
| obs = env.reset(task="easy") | |
| col = obs.columns[0] | |
| action = CleaningAction(action_type=ActionType.profile_column, column=col) | |
| obs2 = env.step(action) | |
| assert obs2.profile_result is not None | |
| assert env._ep["cost_used"] == 0.0 | |
| def test_observation_columns_reflect_current_state(self) -> None: | |
| """After a schema-changing action, obs.columns should reflect | |
| the current DataFrame, not the clean reference.""" | |
| env = DataCleaningEnvironment() | |
| obs = env.reset(task="easy") | |
| col = obs.columns[0] | |
| new_name = col + "_renamed" | |
| action = CleaningAction( | |
| action_type=ActionType.rename_column, | |
| column=col, | |
| new_name=new_name, | |
| ) | |
| obs2 = env.step(action) | |
| assert new_name in obs2.columns | |
| assert col not in obs2.columns | |