File size: 1,111 Bytes
dce68a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c22bf49
dce68a7
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
spec_version: 1
name: data_cleaning_env
type: space
runtime: fastapi
app: server.app:app
port: 7860
description: "RL environment for interactive tabular data cleaning and preparation. Agent must identify and fix data quality issues including missing values, duplicates, wrong dtypes, inconsistent categories, and feature creation."
version: "1.0.0"

observation_space:
  type: dict
  description: "Contains data_preview, columns, pending_issues, resolved_issues, action_history, quality_score, steps_remaining"

action_space:
  type: dict
  description: "Action with action_type, column, and params fields"

reward_range: [0.01, 0.99]

tasks:
  - name: basic_cleaning
    description: "Easy: fill missing values in a small dataset (20 rows, 2 issues)"
    difficulty: easy
  - name: moderate_cleaning
    description: "Medium: handle missing values, duplicates, and wrong dtypes (50 rows, 5 issues in practice)"
    difficulty: medium
  - name: full_pipeline
    description: "Hard: full cleaning pipeline with category normalization and feature creation (100 rows, 10 issues in practice)"
    difficulty: hard