File size: 3,108 Bytes
30cf758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d061422
 
 
 
 
 
30cf758
9b71d1b
30cf758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b71d1b
30cf758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
name: sql-debug-env
version: 0.1.0
description: >
  A reinforcement learning environment for training AI agents to debug SQL queries.
  Agents receive broken SQL queries against a live SQLite database and must fix them
  through iterative actions: submitting queries, inspecting schemas, and analyzing errors.
  Models a real-world task performed daily by data analysts, engineers, and scientists.

author: md-ayan
license: apache-2.0

tags:
  - openenv
  - sql
  - debugging
  - data-engineering
  - real-world
  - analytics

tasks:
  - id: easy_syntax_fix
    name: "Top Customers by Revenue — Syntax Error Fix"
    difficulty: easy
    max_steps: 10
    description: "Fix 2 syntax/reference bugs in a customer analytics query"

  - id: medium_logic_fix
    name: "Department Headcount Report — Logic Error Fix"
    difficulty: medium
    max_steps: 20
    description: "Fix JOIN type, WHERE clause placement, and aggregation scope bugs"

  - id: hard_multi_bug
    name: "SaaS Cohort Activation Report — Multi-Bug Fix"
    difficulty: hard
    max_steps: 30
    description: "Fix 5 bugs: correlated subquery, window function, duplicate rows, date logic, CTE scope"

  - id: hard_finance_explosion
    name: "Financial Cartesian Explosion Fix"
    difficulty: expert
    max_steps: 12
    description: "Fix fan-trap (cartesian explosion) revenue multiplication via pre-aggregation"

api:
  base_url: "https://md896-sql-debug-env.hf.space"
  reset: "/reset"
  step: "/step"
  state: "/state"
  health: "/health"
  tasks: "/tasks"

observation_space:
  type: structured
  fields:
    - name: task_description
      type: string
    - name: original_query
      type: string
    - name: current_query
      type: string_or_null
    - name: last_query_result
      type: object_or_null
    - name: steps_taken
      type: integer
    - name: current_score
      type: float

action_space:
  type: structured
  actions:
    - id: submit_query
      description: "Submit a fixed SQL query for evaluation"
      required_fields: [query]
    - id: inspect_schema
      description: "Get database schema (free action)"
    - id: inspect_error
      description: "Get last error details (free action)"
    - id: inspect_sample
      description: "Get 3 sample rows from a table"
      required_fields: [table_name]
    - id: reset_query
      description: "Reset to original broken query (penalty: -0.05)"

reward:
  range: [0.001, 0.999]
  components:
    - name: correctness
      range: [0.0, 0.6]
      description: "Row-level match vs expected output"
    - name: efficiency
      range: [0.0, 0.2]
      description: "Bonus for solving with fewer steps"
    - name: syntax_progress
      range: [0.0, 0.1]
      description: "Valid SQL even if wrong content"
    - name: schema_bonus
      range: [0.0, 0.1]
      description: "Correct table/column references"
    - name: penalty
      range: [0.0, 0.2]
      description: "Penalty deduction magnitude for bad actions / urgency"

runtime:
  max_concurrent_sessions: 64
  episode_timeout_seconds: 300
  machine_requirements:
    vcpu: 2
    memory_gb: 8