File size: 1,367 Bytes
84ca609
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""Data models for the DataSage Cleaning Environment."""

from openenv.core.env_server.types import Action, Observation
from pydantic import Field
from typing import Optional


class CleaningAction(Action):
    """Action for the Cleaning environment - a data cleaning operation."""

    operation: str = Field(
        ...,
        description="Cleaning operation: fill_null|fix_type|remove_duplicate|standardize|trim|correct_typo",
    )
    column: str = Field(..., description="Target column name")
    value: Optional[str] = Field(
        None,
        description="Replacement value or rule (e.g., 'median', 'mode', a specific value)",
    )
    params: dict = Field(default_factory=dict)


class CleaningObservation(Observation):
    """Observation from the Cleaning environment - data quality state."""

    domain: str = Field(default="", description="Current domain: hr|sales|pm|it_ops")
    data_preview: str = Field(default="", description="First 5 rows as text table")
    dq_report: str = Field(
        default="",
        description="Completeness, consistency, uniqueness breakdown",
    )
    dq_score: float = Field(default=0.0, description="Overall DQ score 0-1")
    columns_info: str = Field(
        default="", description="Column names, types, null counts"
    )
    step_number: int = Field(default=0)
    max_steps: int = Field(default=15)