Spaces:
Running
Running
File size: 3,108 Bytes
30cf758 d061422 30cf758 9b71d1b 30cf758 9b71d1b 30cf758 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | name: sql-debug-env
version: 0.1.0
description: >
A reinforcement learning environment for training AI agents to debug SQL queries.
Agents receive broken SQL queries against a live SQLite database and must fix them
through iterative actions: submitting queries, inspecting schemas, and analyzing errors.
Models a real-world task performed daily by data analysts, engineers, and scientists.
author: md-ayan
license: apache-2.0
tags:
- openenv
- sql
- debugging
- data-engineering
- real-world
- analytics
tasks:
- id: easy_syntax_fix
name: "Top Customers by Revenue — Syntax Error Fix"
difficulty: easy
max_steps: 10
description: "Fix 2 syntax/reference bugs in a customer analytics query"
- id: medium_logic_fix
name: "Department Headcount Report — Logic Error Fix"
difficulty: medium
max_steps: 20
description: "Fix JOIN type, WHERE clause placement, and aggregation scope bugs"
- id: hard_multi_bug
name: "SaaS Cohort Activation Report — Multi-Bug Fix"
difficulty: hard
max_steps: 30
description: "Fix 5 bugs: correlated subquery, window function, duplicate rows, date logic, CTE scope"
- id: hard_finance_explosion
name: "Financial Cartesian Explosion Fix"
difficulty: expert
max_steps: 12
description: "Fix fan-trap (cartesian explosion) revenue multiplication via pre-aggregation"
api:
base_url: "https://md896-sql-debug-env.hf.space"
reset: "/reset"
step: "/step"
state: "/state"
health: "/health"
tasks: "/tasks"
observation_space:
type: structured
fields:
- name: task_description
type: string
- name: original_query
type: string
- name: current_query
type: string_or_null
- name: last_query_result
type: object_or_null
- name: steps_taken
type: integer
- name: current_score
type: float
action_space:
type: structured
actions:
- id: submit_query
description: "Submit a fixed SQL query for evaluation"
required_fields: [query]
- id: inspect_schema
description: "Get database schema (free action)"
- id: inspect_error
description: "Get last error details (free action)"
- id: inspect_sample
description: "Get 3 sample rows from a table"
required_fields: [table_name]
- id: reset_query
description: "Reset to original broken query (penalty: -0.05)"
reward:
range: [0.001, 0.999]
components:
- name: correctness
range: [0.0, 0.6]
description: "Row-level match vs expected output"
- name: efficiency
range: [0.0, 0.2]
description: "Bonus for solving with fewer steps"
- name: syntax_progress
range: [0.0, 0.1]
description: "Valid SQL even if wrong content"
- name: schema_bonus
range: [0.0, 0.1]
description: "Correct table/column references"
- name: penalty
range: [0.0, 0.2]
description: "Penalty deduction magnitude for bad actions / urgency"
runtime:
max_concurrent_sessions: 64
episode_timeout_seconds: 300
machine_requirements:
vcpu: 2
memory_gb: 8
|