Spaces:

laterabhi
/

SQL-Query-Env

Paused

Abhinav Singh

feat(v2): execution-grounded rewards via DuckDB -- the key differentiator

c15d346 26 days ago

2.94 kB

	name: sql-optim-env
	version: "2.0.0"
	description: >
	An OpenEnv-compliant reinforcement learning environment where AI agents
	learn to diagnose and optimize SQL queries. Unlike any other submission,
	optimized queries are ACTUALLY EXECUTED against a DuckDB in-memory
	database with realistic synthetic data (500k orders, 1M events).
	Reward is computed from real execution speedup + result correctness —
	not keyword heuristics. Five tasks from easy anti-patterns to expert
	window function audits.

	tags:
	- openenv
	- sql
	- database
	- performance
	- optimization
	- duckdb
	- execution-grounded
	- llm-agent

	language: python
	framework: openenv

	observation_space:
	type: object
	fields:
	task_id: string
	task_name: string
	task_description: string
	sql_query: string
	schema_info: string
	dialect: string
	difficulty: string
	step_count: integer
	max_steps: integer
	issues_found_so_far: array
	last_execution: object

	action_space:
	type: object
	fields:
	suggestions: array
	optimized_query: string
	summary: string
	estimated_improvement: string
	approved: boolean

	reward:
	type: float
	min: 0.0
	max: 1.0
	description: >
	Execution-grounded composite score:
	Real Speedup (35%) — actual DuckDB timing ratio,
	Result Correctness (20%) — both queries return identical data,
	Issue Detection (25%) — keyword match vs ground truth,
	Approval Correctness (8%), Summary Quality (7%), Severity Labels (5%).

	tasks:
	- id: task_1_basic_antipatterns
	name: "Basic SQL Anti-pattern Detection"
	difficulty: easy
	max_steps: 3
	description: "SELECT *, CAST on filter column, YEAR() function — 3 classic anti-patterns on 500k rows"

	- id: task_2_correlated_subqueries
	name: "N+1 Correlated Subquery Elimination"
	difficulty: medium
	max_steps: 4
	description: "3 correlated subqueries causing ~10M row reads — rewrite to single aggregation JOIN"

	- id: task_3_wildcard_scan
	name: "Wildcard LIKE & Projection Optimization"
	difficulty: medium-hard
	max_steps: 4
	description: "Leading-wildcard LIKE on 1M events, SELECT *, pre-filter push-down"

	- id: task_4_implicit_join
	name: "Implicit Cross Join & Scalar Subquery Elimination"
	difficulty: hard
	max_steps: 5
	description: "Comma-syntax join risk + 2 correlated global aggregations — rewrite with CTE"

	- id: task_5_window_functions
	name: "Window Function & Full-Scan Audit"
	difficulty: expert
	max_steps: 5
	description: "5 window functions over 1M unfiltered rows including a global RANK() sort"

	endpoints:
	reset: POST /reset
	step: POST /step
	state: GET /state
	tasks: GET /tasks
	grader: POST /grader
	baseline: POST /baseline
	execute: POST /execute
	leaderboard: GET /leaderboard

	deployment:
	platform: huggingface-spaces
	port: 7860
	dockerfile: Dockerfile