Spaces:
Sleeping
Sleeping
| name: data-quality-env | |
| version: "2.0.0" | |
| description: > | |
| RL environment where an AI agent acts as a data quality auditor. | |
| Multi-table, adversarial injection, budget-constrained exploration, | |
| confidence-calibrated Brier grading, and post-audit fix verification loop. | |
| author: "" | |
| license: MIT | |
| tags: | |
| - openenv | |
| - data-quality | |
| - sql | |
| - rl-environment | |
| - multi-table | |
| - adversarial | |
| tasks: | |
| - id: 1 | |
| name: null_and_duplicate_detection | |
| difficulty: easy | |
| max_steps: 12 | |
| description: "Find real nulls, disguised nulls (stored as 'N/A'/'NULL'), exact duplicates, and near-duplicates in a customers table." | |
| expected_baseline_score: 0.82 | |
| - id: 2 | |
| name: schema_violation_repair | |
| difficulty: medium | |
| max_steps: 12 | |
| description: "Detect type violations, format violations, range violations, and unparseable values in an orders table." | |
| expected_baseline_score: 0.61 | |
| - id: 3 | |
| name: silent_data_drift_detection | |
| difficulty: hard | |
| max_steps: 12 | |
| description: "Compare two transaction snapshots. Detect mean shifts, new category values, and referential drift — nothing is labelled wrong." | |
| expected_baseline_score: 0.34 | |
| - id: 4 | |
| name: multi_table_relational_audit | |
| difficulty: expert | |
| max_steps: 12 | |
| description: "Audit 3 joined tables (customers, orders, line_items). Find orphaned FKs, temporal violations, and aggregate mismatches using JOIN queries." | |
| expected_baseline_score: 0.19 | |
| action_space: | |
| type: json | |
| actions: | |
| - name: query | |
| description: "Execute a SELECT query. Costs 1 query credit. Blocked: DROP/DELETE/UPDATE/CREATE." | |
| fields: {sql: string} | |
| - name: submit_report | |
| description: "Submit the structured AuditReport. Triggers grading. Unlocks fix phase." | |
| fields: {report: AuditReport} | |
| - name: fix_sql | |
| description: "Post-audit: submit corrective UPDATE SQL. Earns fix bonus up to +0.25." | |
| fields: {sql: string} | |
| observation_space: | |
| fields: | |
| task_id: int | |
| task_description: string | |
| tables: "dict[table_name -> dict[col -> dtype]]" | |
| row_counts: "dict[table_name -> int]" | |
| step: int | |
| max_steps: int | |
| query_credits_remaining: int | |
| phase: "audit | fix" | |
| last_query_result: "list[dict] | null (max 50 rows)" | |
| last_action_error: "string | null" | |
| last_fix_score: "float | null" | |
| reward_range: [0.0, 1.25] | |
| reward_design: | |
| audit_score: "0.0–1.0, Brier-adjusted per finding confidence" | |
| valid_query_no_signal: "+0.01 for syntactically valid exploratory SQL that returns no obvious issue signal" | |
| valid_query_finds_issue: "+0.1 for valid SQL that surfaces NULLs, duplicates, or other clear audit evidence" | |
| budget_bonus: "up to +0.10 for early report submission" | |
| fix_bonus: "up to +0.25 for correct fix_sql repairs" | |
| invalid_sql_penalty: 0.0 | |
| api: | |
| reset: "POST /reset {task_id: int, seed: int}" | |
| step: "POST /step {action: Action}" | |
| state: "GET /state" | |
| health: "GET /health" | |