Spaces:
Sleeping
Sleeping
| name: sql-db-engineer-agent | |
| version: '2.0.0' | |
| description: 'SQL Database Engineer Agent β An OpenEnv environment where AI agents learn to act like senior database engineers. Evolved from SQL Query Debugger (Round 1). Agents manage production databases over 50+ steps: inspecting slow queries, creating indexes, rewriting queries, partitioning tables.' | |
| tags: [openenv, real-world, sql, database, engineering, indexing, reinforcement-learning, long-horizon, world-modeling, self-improvement, wildcard] | |
| hackathon: 'META x PyTorch x SST OpenEnv Hackathon 2026' | |
| domain: 'SQL Database Engineering' | |
| tasks: | |
| # ββ Round 1: SQL Query Debugger (15 tasks β backward compatible) ββ | |
| - id: easy_001 | |
| difficulty: easy | |
| description: 'Fix SQL syntax error: missing commas in SELECT clause' | |
| - id: easy_002 | |
| difficulty: easy | |
| description: 'Fix SQL syntax error: missing WHERE keyword' | |
| - id: easy_003 | |
| difficulty: easy | |
| description: 'Fix SQL syntax error: unclosed string literal' | |
| - id: easy_004 | |
| difficulty: easy | |
| description: 'Fix SQL syntax error: ORDER used instead of ORDER BY' | |
| - id: easy_005 | |
| difficulty: easy | |
| description: 'Fix SQL syntax error: GROUP used instead of GROUP BY' | |
| - id: medium_001 | |
| difficulty: medium | |
| description: 'Fix wrong JOIN type: INNER JOIN excludes users with no orders' | |
| - id: medium_002 | |
| difficulty: medium | |
| description: 'Fix wrong JOIN condition causing incorrect product associations' | |
| - id: medium_003 | |
| difficulty: medium | |
| description: 'Fix aggregation logic: HAVING clause used incorrectly as WHERE' | |
| - id: medium_004 | |
| difficulty: medium | |
| description: 'Fix correlated subquery correlating on wrong column' | |
| - id: medium_005 | |
| difficulty: medium | |
| description: 'Fix DISTINCT misuse with COUNT causing invalid query' | |
| - id: hard_001 | |
| difficulty: hard | |
| description: 'Detect and fix N+1 query pattern with correlated subqueries' | |
| - id: hard_002 | |
| difficulty: hard | |
| description: 'Fix function on indexed column preventing index usage' | |
| - id: hard_003 | |
| difficulty: hard | |
| description: 'Fix implicit cartesian product from missing JOIN condition' | |
| - id: hard_004 | |
| difficulty: hard | |
| description: 'Fix SELECT * in multi-table JOIN causing over-fetching' | |
| - id: hard_005 | |
| difficulty: hard | |
| description: 'Fix window function misuse with missing PARTITION BY and ORDER BY' | |
| # ββ Round 2: DB Engineering Scenarios (15 tasks β long-horizon) ββ | |
| - id: easy_s001 | |
| difficulty: easy | |
| description: 'User lookup query taking 2s on 10K users table β add email index' | |
| - id: easy_s002 | |
| difficulty: easy | |
| description: 'Order status query scanning 50K orders β composite index needed' | |
| - id: easy_s003 | |
| difficulty: easy | |
| description: 'Product search doing full table scan on 20K products' | |
| - id: easy_s004 | |
| difficulty: easy | |
| description: 'Session lookup hitting 15K sessions without index' | |
| - id: easy_s005 | |
| difficulty: easy | |
| description: 'Log table filter slow on 30K entries β compound index fix' | |
| - id: medium_s001 | |
| difficulty: medium | |
| description: 'E-commerce DB: 50K orders + 8K users, two slow queries' | |
| - id: medium_s002 | |
| difficulty: medium | |
| description: 'Blog platform: 100K posts + 20K authors, search and lookup slow' | |
| - id: medium_s003 | |
| difficulty: medium | |
| description: 'Inventory: 80K products + 200K stock movements, rewrite + index' | |
| - id: medium_s004 | |
| difficulty: medium | |
| description: 'Ticketing system: 60K tickets + 5K agents, status queue slow' | |
| - id: medium_s005 | |
| difficulty: medium | |
| description: 'Analytics DB: 150K events + 10K users, funnel query slow' | |
| - id: hard_s001 | |
| difficulty: hard | |
| description: 'Financial DB: 500K transactions across 4 tables, 3 slow queries' | |
| - id: hard_s002 | |
| difficulty: hard | |
| description: 'SaaS platform: 8-table schema, 200K+ records, dashboard 20s+' | |
| - id: hard_s003 | |
| difficulty: hard | |
| description: 'Healthcare: 1M patient records, compliance + clinical queries' | |
| - id: hard_s004 | |
| difficulty: hard | |
| description: 'Gaming leaderboard: 2M players, 5M matches, ranking degraded' | |
| - id: hard_s005 | |
| difficulty: hard | |
| description: 'Logistics: 6 tables, 3M shipments + 10M tracking records' | |
| action_space: | |
| type: discrete | |
| actions: | |
| - identify_error | |
| - propose_fix | |
| - submit_answer | |
| - request_hint | |
| - explain_issue | |
| - optimize_query | |
| - inspect_query | |
| - analyze_indexes | |
| - create_index | |
| - rewrite_query | |
| - add_column | |
| - drop_index | |
| - partition_table | |
| - analyze_statistics | |
| - submit_report | |
| observation_space: | |
| type: dict | |
| fields: | |
| - task_id | |
| - task_description | |
| - current_context | |
| - step_count | |
| - difficulty | |
| - max_steps | |
| - hints_used | |
| - previous_actions | |
| - metadata | |
| reward: | |
| min: 0.001 | |
| max: 0.999 | |
| type: dense_with_milestones | |
| description: > | |
| Dense reward at every step. | |
| Round 1: partial credit for identification, fixing, explanation quality. | |
| Round 2: step reward + delta reward (DB performance change) + | |
| milestone bonuses at 25%/50%/75% improvement + terminal score. | |
| episode: | |
| max_steps: 50 | |
| termination: 'submit_report (Round 2) or submit_answer/optimize_query (Round 1) or max_steps reached' | |
| api: | |
| port: 7860 | |
| endpoints: | |
| - GET /health | |
| - POST /reset | |
| - POST /step | |
| - GET /state | |
| - GET /tasks | |
| - POST /grader | |
| - POST /baseline | |
| - GET /progress | |