data-cleaning-openenv / openenv.yaml
Manas281's picture
Upload 7 files
d7cc083 verified
name: data-cleaning-env
version: "1.0.0"
description: |
A reinforcement learning environment for data cleaning tasks.
An AI agent receives messy datasets and must apply the correct
cleaning operations. Supports 3 tasks of increasing difficulty:
null removal (easy), date standardization (medium), and
outlier detection (hard).
author: "Soham Sandeep Kamathi"
email: "2023.soham.kamathi@ves.ac.in"
tasks:
- id: 1
name: remove_nulls
difficulty: easy
description: "Remove rows with null values from the dataset"
min_score: 0.0
max_score: 1.0
- id: 2
name: fix_dates
difficulty: medium
description: "Standardise inconsistent date formats to YYYY-MM-DD"
min_score: 0.0
max_score: 1.0
- id: 3
name: remove_outliers
difficulty: hard
description: "Detect and remove statistical outliers using IQR method"
min_score: 0.0
max_score: 1.0
api:
endpoints:
- path: /reset
method: POST
description: "Start a new episode, returns DatasetObservation"
- path: /step
method: POST
description: "Take a cleaning action, returns observation + reward"
- path: /state
method: GET
description: "Get current episode metadata"
- path: /tasks
method: GET
description: "List all available tasks"
observation_space:
type: object
description: "DatasetObservation with preview, null_count, date_errors, outlier_count"
action_space:
type: object
description: "CleaningAction with action_type and optional column"
reward:
type: continuous
range: [0.0, 1.0]
description: "Graded score based on cleaning quality"
infrastructure:
min_cpu: 2
min_memory_gb: 4
requires_gpu: false
runtime_limit_minutes: 20