Spaces:
Running
Running
| name: sql-debug-env | |
| version: 0.1.0 | |
| description: > | |
| A reinforcement learning environment for training AI agents to debug SQL queries. | |
| Agents receive broken SQL queries against a live SQLite database and must fix them | |
| through iterative actions: submitting queries, inspecting schemas, and analyzing errors. | |
| Models a real-world task performed daily by data analysts, engineers, and scientists. | |
| author: md-ayan | |
| license: apache-2.0 | |
| tags: | |
| - openenv | |
| - sql | |
| - debugging | |
| - data-engineering | |
| - real-world | |
| - analytics | |
| tasks: | |
| - id: easy_syntax_fix | |
| name: "Top Customers by Revenue — Syntax Error Fix" | |
| difficulty: easy | |
| max_steps: 10 | |
| description: "Fix 2 syntax/reference bugs in a customer analytics query" | |
| - id: medium_logic_fix | |
| name: "Department Headcount Report — Logic Error Fix" | |
| difficulty: medium | |
| max_steps: 20 | |
| description: "Fix JOIN type, WHERE clause placement, and aggregation scope bugs" | |
| - id: hard_multi_bug | |
| name: "SaaS Cohort Activation Report — Multi-Bug Fix" | |
| difficulty: hard | |
| max_steps: 30 | |
| description: "Fix 5 bugs: correlated subquery, window function, duplicate rows, date logic, CTE scope" | |
| - id: hard_finance_explosion | |
| name: "Financial Cartesian Explosion Fix" | |
| difficulty: expert | |
| max_steps: 12 | |
| description: "Fix fan-trap (cartesian explosion) revenue multiplication via pre-aggregation" | |
| api: | |
| base_url: "https://md896-sql-debug-env.hf.space" | |
| reset: "/reset" | |
| step: "/step" | |
| state: "/state" | |
| health: "/health" | |
| tasks: "/tasks" | |
| observation_space: | |
| type: structured | |
| fields: | |
| - name: task_description | |
| type: string | |
| - name: original_query | |
| type: string | |
| - name: current_query | |
| type: string_or_null | |
| - name: last_query_result | |
| type: object_or_null | |
| - name: steps_taken | |
| type: integer | |
| - name: current_score | |
| type: float | |
| action_space: | |
| type: structured | |
| actions: | |
| - id: submit_query | |
| description: "Submit a fixed SQL query for evaluation" | |
| required_fields: [query] | |
| - id: inspect_schema | |
| description: "Get database schema (free action)" | |
| - id: inspect_error | |
| description: "Get last error details (free action)" | |
| - id: inspect_sample | |
| description: "Get 3 sample rows from a table" | |
| required_fields: [table_name] | |
| - id: reset_query | |
| description: "Reset to original broken query (penalty: -0.05)" | |
| reward: | |
| range: [0.001, 0.999] | |
| components: | |
| - name: correctness | |
| range: [0.0, 0.6] | |
| description: "Row-level match vs expected output" | |
| - name: efficiency | |
| range: [0.0, 0.2] | |
| description: "Bonus for solving with fewer steps" | |
| - name: syntax_progress | |
| range: [0.0, 0.1] | |
| description: "Valid SQL even if wrong content" | |
| - name: schema_bonus | |
| range: [0.0, 0.1] | |
| description: "Correct table/column references" | |
| - name: penalty | |
| range: [0.0, 0.2] | |
| description: "Penalty deduction magnitude for bad actions / urgency" | |
| runtime: | |
| max_concurrent_sessions: 64 | |
| episode_timeout_seconds: 300 | |
| machine_requirements: | |
| vcpu: 2 | |
| memory_gb: 8 | |