Spaces:
Sleeping
Sleeping
File size: 5,486 Bytes
f30d05a b2742eb 3c1b0c7 f30d05a 3c1b0c7 f30d05a 3c1b0c7 f30d05a 3c1b0c7 f30d05a 3c1b0c7 f30d05a 3c1b0c7 f30d05a b2742eb 3c1b0c7 f30d05a b2742eb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 | name: sql-db-engineer-agent
version: '2.0.0'
description: 'SQL Database Engineer Agent β An OpenEnv environment where AI agents learn to act like senior database engineers. Evolved from SQL Query Debugger (Round 1). Agents manage production databases over 50+ steps: inspecting slow queries, creating indexes, rewriting queries, partitioning tables.'
tags: [openenv, real-world, sql, database, engineering, indexing, reinforcement-learning, long-horizon, world-modeling, self-improvement, wildcard]
hackathon: 'META x PyTorch x SST OpenEnv Hackathon 2026'
domain: 'SQL Database Engineering'
tasks:
# ββ Round 1: SQL Query Debugger (15 tasks β backward compatible) ββ
- id: easy_001
difficulty: easy
description: 'Fix SQL syntax error: missing commas in SELECT clause'
- id: easy_002
difficulty: easy
description: 'Fix SQL syntax error: missing WHERE keyword'
- id: easy_003
difficulty: easy
description: 'Fix SQL syntax error: unclosed string literal'
- id: easy_004
difficulty: easy
description: 'Fix SQL syntax error: ORDER used instead of ORDER BY'
- id: easy_005
difficulty: easy
description: 'Fix SQL syntax error: GROUP used instead of GROUP BY'
- id: medium_001
difficulty: medium
description: 'Fix wrong JOIN type: INNER JOIN excludes users with no orders'
- id: medium_002
difficulty: medium
description: 'Fix wrong JOIN condition causing incorrect product associations'
- id: medium_003
difficulty: medium
description: 'Fix aggregation logic: HAVING clause used incorrectly as WHERE'
- id: medium_004
difficulty: medium
description: 'Fix correlated subquery correlating on wrong column'
- id: medium_005
difficulty: medium
description: 'Fix DISTINCT misuse with COUNT causing invalid query'
- id: hard_001
difficulty: hard
description: 'Detect and fix N+1 query pattern with correlated subqueries'
- id: hard_002
difficulty: hard
description: 'Fix function on indexed column preventing index usage'
- id: hard_003
difficulty: hard
description: 'Fix implicit cartesian product from missing JOIN condition'
- id: hard_004
difficulty: hard
description: 'Fix SELECT * in multi-table JOIN causing over-fetching'
- id: hard_005
difficulty: hard
description: 'Fix window function misuse with missing PARTITION BY and ORDER BY'
# ββ Round 2: DB Engineering Scenarios (15 tasks β long-horizon) ββ
- id: easy_s001
difficulty: easy
description: 'User lookup query taking 2s on 10K users table β add email index'
- id: easy_s002
difficulty: easy
description: 'Order status query scanning 50K orders β composite index needed'
- id: easy_s003
difficulty: easy
description: 'Product search doing full table scan on 20K products'
- id: easy_s004
difficulty: easy
description: 'Session lookup hitting 15K sessions without index'
- id: easy_s005
difficulty: easy
description: 'Log table filter slow on 30K entries β compound index fix'
- id: medium_s001
difficulty: medium
description: 'E-commerce DB: 50K orders + 8K users, two slow queries'
- id: medium_s002
difficulty: medium
description: 'Blog platform: 100K posts + 20K authors, search and lookup slow'
- id: medium_s003
difficulty: medium
description: 'Inventory: 80K products + 200K stock movements, rewrite + index'
- id: medium_s004
difficulty: medium
description: 'Ticketing system: 60K tickets + 5K agents, status queue slow'
- id: medium_s005
difficulty: medium
description: 'Analytics DB: 150K events + 10K users, funnel query slow'
- id: hard_s001
difficulty: hard
description: 'Financial DB: 500K transactions across 4 tables, 3 slow queries'
- id: hard_s002
difficulty: hard
description: 'SaaS platform: 8-table schema, 200K+ records, dashboard 20s+'
- id: hard_s003
difficulty: hard
description: 'Healthcare: 1M patient records, compliance + clinical queries'
- id: hard_s004
difficulty: hard
description: 'Gaming leaderboard: 2M players, 5M matches, ranking degraded'
- id: hard_s005
difficulty: hard
description: 'Logistics: 6 tables, 3M shipments + 10M tracking records'
action_space:
type: discrete
actions:
- identify_error
- propose_fix
- submit_answer
- request_hint
- explain_issue
- optimize_query
- inspect_query
- analyze_indexes
- create_index
- rewrite_query
- add_column
- drop_index
- partition_table
- analyze_statistics
- submit_report
observation_space:
type: dict
fields:
- task_id
- task_description
- current_context
- step_count
- difficulty
- max_steps
- hints_used
- previous_actions
- metadata
reward:
min: 0.001
max: 0.999
type: dense_with_milestones
description: >
Dense reward at every step.
Round 1: partial credit for identification, fixing, explanation quality.
Round 2: step reward + delta reward (DB performance change) +
milestone bonuses at 25%/50%/75% improvement + terminal score.
episode:
max_steps: 50
termination: 'submit_report (Round 2) or submit_answer/optimize_query (Round 1) or max_steps reached'
api:
port: 7860
endpoints:
- GET /health
- POST /reset
- POST /step
- GET /state
- GET /tasks
- POST /grader
- POST /baseline
- GET /progress
|