File size: 1,928 Bytes
7a14011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Tests for Pydantic models and enum definitions."""

from __future__ import annotations

from data_cleaning_env.models import ActionType, CleaningAction, Observation


class TestActionType:
    def test_has_16_members(self) -> None:
        assert len(ActionType) == 16

    def test_all_expected_actions_present(self) -> None:
        expected = {
            "fill_missing",
            "drop_duplicates",
            "fix_type",
            "normalize",
            "drop_outliers",
            "fix_schema_violation",
            "done",
            "rename_column",
            "cast_datetime",
            "deduplicate_fuzzy",
            "split_column",
            "merge_columns",
            "fix_format_regex",
            "standardize_categories",
            "undo",
            "profile_column",
        }
        actual = {a.value for a in ActionType}
        assert actual == expected


class TestCleaningAction:
    def test_fill_missing_action(self) -> None:
        action = CleaningAction(
            action_type=ActionType.fill_missing,
            column="age",
            strategy="median",
        )
        assert action.action_type == ActionType.fill_missing
        assert action.column == "age"

    def test_done_action_minimal(self) -> None:
        action = CleaningAction(action_type=ActionType.done)
        assert action.action_type == ActionType.done
        assert action.column is None


class TestObservation:
    def test_required_fields(self) -> None:
        fields = set(Observation.model_fields.keys())
        required = {
            "task",
            "step",
            "max_steps",
            "columns",
            "column_issues",
            "column_stats",
            "reward",
            "done",
            "sample_rows",
            "action_history",
            "budget_remaining",
            "profile_result",
        }
        assert required.issubset(fields)