Spaces:
Sleeping
Sleeping
| name: data-cleaning-env | |
| version: "1.0.0" | |
| description: | | |
| A reinforcement learning environment for data cleaning tasks. | |
| An AI agent receives messy datasets and must apply the correct | |
| cleaning operations. Supports 3 tasks of increasing difficulty: | |
| null removal (easy), date standardization (medium), and | |
| outlier detection (hard). | |
| author: "Soham Sandeep Kamathi" | |
| email: "2023.soham.kamathi@ves.ac.in" | |
| tasks: | |
| - id: 1 | |
| name: remove_nulls | |
| difficulty: easy | |
| description: "Remove rows with null values from the dataset" | |
| min_score: 0.0 | |
| max_score: 1.0 | |
| - id: 2 | |
| name: fix_dates | |
| difficulty: medium | |
| description: "Standardise inconsistent date formats to YYYY-MM-DD" | |
| min_score: 0.0 | |
| max_score: 1.0 | |
| - id: 3 | |
| name: remove_outliers | |
| difficulty: hard | |
| description: "Detect and remove statistical outliers using IQR method" | |
| min_score: 0.0 | |
| max_score: 1.0 | |
| api: | |
| endpoints: | |
| - path: /reset | |
| method: POST | |
| description: "Start a new episode, returns DatasetObservation" | |
| - path: /step | |
| method: POST | |
| description: "Take a cleaning action, returns observation + reward" | |
| - path: /state | |
| method: GET | |
| description: "Get current episode metadata" | |
| - path: /tasks | |
| method: GET | |
| description: "List all available tasks" | |
| observation_space: | |
| type: object | |
| description: "DatasetObservation with preview, null_count, date_errors, outlier_count" | |
| action_space: | |
| type: object | |
| description: "CleaningAction with action_type and optional column" | |
| reward: | |
| type: continuous | |
| range: [0.0, 1.0] | |
| description: "Graded score based on cleaning quality" | |
| infrastructure: | |
| min_cpu: 2 | |
| min_memory_gb: 4 | |
| requires_gpu: false | |
| runtime_limit_minutes: 20 | |