Spaces:
Sleeping
Sleeping
Commit ·
3898ed7
1
Parent(s): 3e61438
fix: db_schema typo fix and final submission
Browse files- .github/agents/Enter.agent.md +10 -0
- Dockerfile +12 -0
- inference.py +1 -1
- models.py +11 -28
- server/requirements.txt +1 -1
- server/sql_query_debugger_environment.py +27 -27
.github/agents/Enter.agent.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
name: Enter
|
| 3 |
+
description: Describe what this custom agent does and when to use it.
|
| 4 |
+
argument-hint: The inputs this agent expects, e.g., "a task to implement" or "a question to answer".
|
| 5 |
+
# tools: ['vscode', 'execute', 'read', 'agent', 'edit', 'search', 'web', 'todo'] # specify the tools this agent can use. If not set, all enabled tools are allowed.
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
<!-- Tip: Use /create-agent in chat to generate content with agent assistance -->
|
| 9 |
+
|
| 10 |
+
Define what this custom agent does, including its behavior, capabilities, and any specific instructions for its operation.
|
Dockerfile
CHANGED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY server/requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
EXPOSE 8000
|
| 11 |
+
|
| 12 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
inference.py
CHANGED
|
@@ -164,4 +164,4 @@ async def main() -> None:
|
|
| 164 |
print(f"[DEBUG] Average score across all tasks: {avg:.3f}", flush=True)
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
| 167 |
-
asyncio.run(main())
|
|
|
|
| 164 |
print(f"[DEBUG] Average score across all tasks: {avg:.3f}", flush=True)
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
| 167 |
+
asyncio.run(main())
|
models.py
CHANGED
|
@@ -1,38 +1,21 @@
|
|
| 1 |
from openenv.core.env_server.types import Action, Observation
|
| 2 |
from pydantic import Field
|
| 3 |
from typing import Optional
|
| 4 |
-
|
| 5 |
|
| 6 |
class SqlQueryDebuggerAction(Action):
|
| 7 |
"""What the agent does — submits a fixed SQL query."""
|
| 8 |
-
|
| 9 |
fixed_query: str = Field(..., description="The corrected SQL query")
|
| 10 |
|
| 11 |
-
|
| 12 |
class SqlQueryDebuggerObservation(Observation):
|
| 13 |
"""What the agent sees each step."""
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
)
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
)
|
| 24 |
-
sample_rows: str = Field(
|
| 25 |
-
default="", description="Sample data from the tables as JSON string"
|
| 26 |
-
)
|
| 27 |
-
expected_output_hint: str = Field(
|
| 28 |
-
default="", description="Natural language hint of what correct output looks like"
|
| 29 |
-
)
|
| 30 |
-
task_id: str = Field(
|
| 31 |
-
default="", description="Which task: syntax_fix, logic_bug, multi_table"
|
| 32 |
-
)
|
| 33 |
-
attempts_remaining: int = Field(
|
| 34 |
-
default=5, description="How many fix attempts left"
|
| 35 |
-
)
|
| 36 |
-
last_result: Optional[str] = Field(
|
| 37 |
-
default=None, description="Result rows from agent's last query attempt"
|
| 38 |
-
)
|
|
|
|
| 1 |
from openenv.core.env_server.types import Action, Observation
|
| 2 |
from pydantic import Field
|
| 3 |
from typing import Optional
|
| 4 |
+
from pydantic import ConfigDict
|
| 5 |
|
| 6 |
class SqlQueryDebuggerAction(Action):
|
| 7 |
"""What the agent does — submits a fixed SQL query."""
|
|
|
|
| 8 |
fixed_query: str = Field(..., description="The corrected SQL query")
|
| 9 |
|
|
|
|
| 10 |
class SqlQueryDebuggerObservation(Observation):
|
| 11 |
"""What the agent sees each step."""
|
| 12 |
+
model_config = ConfigDict(populate_by_name=True)
|
| 13 |
+
|
| 14 |
+
broken_query: str = Field(default="", description="The SQL query containing errors")
|
| 15 |
+
db_schema: str = Field(default="", description="CREATE TABLE statements for the database")
|
| 16 |
+
error_message: str = Field(default="", description="Error from running the broken query")
|
| 17 |
+
sample_rows: str = Field(default="", description="Sample data from the tables as JSON string")
|
| 18 |
+
expected_output_hint: str = Field(default="", description="Natural language hint of what correct output looks like")
|
| 19 |
+
task_id: str = Field(default="", description="Which task: syntax_fix, logic_bug, multi_table")
|
| 20 |
+
attempts_remaining: int = Field(default=5, description="How many fix attempts left")
|
| 21 |
+
last_result: Optional[str] = Field(default=None, description="Result rows from agent's last query attempt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
server/requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
openenv
|
| 2 |
fastapi>=0.115.0
|
| 3 |
uvicorn>=0.24.0
|
| 4 |
pydantic>=2.0.0
|
|
|
|
| 1 |
+
openenv-core>=0.2.0
|
| 2 |
fastapi>=0.115.0
|
| 3 |
uvicorn>=0.24.0
|
| 4 |
pydantic>=2.0.0
|
server/sql_query_debugger_environment.py
CHANGED
|
@@ -16,7 +16,7 @@ SCENARIOS = [
|
|
| 16 |
{
|
| 17 |
"id": "easy_1",
|
| 18 |
"task_id": "syntax_fix",
|
| 19 |
-
"
|
| 20 |
"setup": [
|
| 21 |
"INSERT INTO employees VALUES (1,'Alice',75000,'Engineering');",
|
| 22 |
"INSERT INTO employees VALUES (2,'Bob',50000,'Marketing');",
|
|
@@ -31,7 +31,7 @@ SCENARIOS = [
|
|
| 31 |
{
|
| 32 |
"id": "easy_2",
|
| 33 |
"task_id": "syntax_fix",
|
| 34 |
-
"
|
| 35 |
"setup": [
|
| 36 |
"INSERT INTO products VALUES (1,'Laptop',999.99,10);",
|
| 37 |
"INSERT INTO products VALUES (2,'Mouse',29.99,50);",
|
|
@@ -46,7 +46,7 @@ SCENARIOS = [
|
|
| 46 |
{
|
| 47 |
"id": "easy_3",
|
| 48 |
"task_id": "syntax_fix",
|
| 49 |
-
"
|
| 50 |
"setup": [
|
| 51 |
"INSERT INTO students VALUES (1,'Dan',85,'Math');",
|
| 52 |
"INSERT INTO students VALUES (2,'Eve',92,'Science');",
|
|
@@ -61,7 +61,7 @@ SCENARIOS = [
|
|
| 61 |
{
|
| 62 |
"id": "easy_4",
|
| 63 |
"task_id": "syntax_fix",
|
| 64 |
-
"
|
| 65 |
"setup": [
|
| 66 |
"INSERT INTO orders VALUES (1,'Alice',250.0,'shipped');",
|
| 67 |
"INSERT INTO orders VALUES (2,'Bob',89.0,'pending');",
|
|
@@ -76,7 +76,7 @@ SCENARIOS = [
|
|
| 76 |
{
|
| 77 |
"id": "easy_5",
|
| 78 |
"task_id": "syntax_fix",
|
| 79 |
-
"
|
| 80 |
"setup": [
|
| 81 |
"INSERT INTO inventory VALUES (1,'Bolts',500,'A');",
|
| 82 |
"INSERT INTO inventory VALUES (2,'Nuts',300,'B');",
|
|
@@ -91,7 +91,7 @@ SCENARIOS = [
|
|
| 91 |
{
|
| 92 |
"id": "easy_6",
|
| 93 |
"task_id": "syntax_fix",
|
| 94 |
-
"
|
| 95 |
"setup": [
|
| 96 |
"INSERT INTO users VALUES (1,'alice',28,'Delhi');",
|
| 97 |
"INSERT INTO users VALUES (2,'bob',35,'Mumbai');",
|
|
@@ -106,7 +106,7 @@ SCENARIOS = [
|
|
| 106 |
{
|
| 107 |
"id": "easy_7",
|
| 108 |
"task_id": "syntax_fix",
|
| 109 |
-
"
|
| 110 |
"setup": [
|
| 111 |
"INSERT INTO sales VALUES (1,'Tom',15000,'North');",
|
| 112 |
"INSERT INTO sales VALUES (2,'Sue',22000,'South');",
|
|
@@ -123,7 +123,7 @@ SCENARIOS = [
|
|
| 123 |
{
|
| 124 |
"id": "medium_1",
|
| 125 |
"task_id": "logic_bug",
|
| 126 |
-
"
|
| 127 |
"setup": [
|
| 128 |
"INSERT INTO employees VALUES (1,'Alice',75000,'Engineering');",
|
| 129 |
"INSERT INTO employees VALUES (2,'Bob',50000,'Marketing');",
|
|
@@ -139,7 +139,7 @@ SCENARIOS = [
|
|
| 139 |
{
|
| 140 |
"id": "medium_2",
|
| 141 |
"task_id": "logic_bug",
|
| 142 |
-
"
|
| 143 |
"setup": [
|
| 144 |
"INSERT INTO orders VALUES (1,'Alice',250.0,'shipped');",
|
| 145 |
"INSERT INTO orders VALUES (2,'Bob',89.0,'pending');",
|
|
@@ -155,7 +155,7 @@ SCENARIOS = [
|
|
| 155 |
{
|
| 156 |
"id": "medium_3",
|
| 157 |
"task_id": "logic_bug",
|
| 158 |
-
"
|
| 159 |
"setup": [
|
| 160 |
"INSERT INTO products VALUES (1,'Laptop',999.99,'Electronics');",
|
| 161 |
"INSERT INTO products VALUES (2,'Shirt',29.99,'Clothing');",
|
|
@@ -171,7 +171,7 @@ SCENARIOS = [
|
|
| 171 |
{
|
| 172 |
"id": "medium_4",
|
| 173 |
"task_id": "logic_bug",
|
| 174 |
-
"
|
| 175 |
"setup": [
|
| 176 |
"INSERT INTO students VALUES (1,'Alice',85,1);",
|
| 177 |
"INSERT INTO students VALUES (2,'Bob',45,0);",
|
|
@@ -187,7 +187,7 @@ SCENARIOS = [
|
|
| 187 |
{
|
| 188 |
"id": "medium_5",
|
| 189 |
"task_id": "logic_bug",
|
| 190 |
-
"
|
| 191 |
"setup": [
|
| 192 |
"INSERT INTO employees VALUES (1,'Alice',75000,'Engineering');",
|
| 193 |
"INSERT INTO employees VALUES (2,'Bob',50000,'Marketing');",
|
|
@@ -202,7 +202,7 @@ SCENARIOS = [
|
|
| 202 |
{
|
| 203 |
"id": "medium_6",
|
| 204 |
"task_id": "logic_bug",
|
| 205 |
-
"
|
| 206 |
"setup": [
|
| 207 |
"INSERT INTO sales VALUES (1,'Tom',15000,1);",
|
| 208 |
"INSERT INTO sales VALUES (2,'Sue',22000,1);",
|
|
@@ -220,7 +220,7 @@ SCENARIOS = [
|
|
| 220 |
{
|
| 221 |
"id": "hard_1",
|
| 222 |
"task_id": "multi_table",
|
| 223 |
-
"
|
| 224 |
"CREATE TABLE employees (id INTEGER, name TEXT, dept_id INTEGER, salary REAL);"
|
| 225 |
"CREATE TABLE departments (id INTEGER, dept_name TEXT, budget REAL);"
|
| 226 |
),
|
|
@@ -240,7 +240,7 @@ SCENARIOS = [
|
|
| 240 |
{
|
| 241 |
"id": "hard_2",
|
| 242 |
"task_id": "multi_table",
|
| 243 |
-
"
|
| 244 |
"CREATE TABLE orders (id INTEGER, customer_id INTEGER, amount REAL);"
|
| 245 |
"CREATE TABLE customers (id INTEGER, name TEXT, city TEXT);"
|
| 246 |
),
|
|
@@ -260,7 +260,7 @@ SCENARIOS = [
|
|
| 260 |
{
|
| 261 |
"id": "hard_3",
|
| 262 |
"task_id": "multi_table",
|
| 263 |
-
"
|
| 264 |
"CREATE TABLE employees (id INTEGER, name TEXT, dept_id INTEGER, salary REAL);"
|
| 265 |
"CREATE TABLE departments (id INTEGER, dept_name TEXT, budget REAL);"
|
| 266 |
),
|
|
@@ -281,7 +281,7 @@ SCENARIOS = [
|
|
| 281 |
{
|
| 282 |
"id": "hard_4",
|
| 283 |
"task_id": "multi_table",
|
| 284 |
-
"
|
| 285 |
"CREATE TABLE products (id INTEGER, name TEXT, category_id INTEGER, price REAL);"
|
| 286 |
"CREATE TABLE categories (id INTEGER, cat_name TEXT);"
|
| 287 |
"CREATE TABLE order_items (id INTEGER, product_id INTEGER, qty INTEGER);"
|
|
@@ -305,7 +305,7 @@ SCENARIOS = [
|
|
| 305 |
{
|
| 306 |
"id": "hard_5",
|
| 307 |
"task_id": "multi_table",
|
| 308 |
-
"
|
| 309 |
"CREATE TABLE employees (id INTEGER, name TEXT, manager_id INTEGER, salary REAL);"
|
| 310 |
),
|
| 311 |
"setup": [
|
|
@@ -323,7 +323,7 @@ SCENARIOS = [
|
|
| 323 |
{
|
| 324 |
"id": "hard_6",
|
| 325 |
"task_id": "multi_table",
|
| 326 |
-
"
|
| 327 |
"CREATE TABLE orders (id INTEGER, customer_id INTEGER, amount REAL, status TEXT);"
|
| 328 |
"CREATE TABLE customers (id INTEGER, name TEXT, tier TEXT);"
|
| 329 |
),
|
|
@@ -345,7 +345,7 @@ SCENARIOS = [
|
|
| 345 |
{
|
| 346 |
"id": "hard_7",
|
| 347 |
"task_id": "multi_table",
|
| 348 |
-
"
|
| 349 |
"CREATE TABLE employees (id INTEGER, name TEXT, dept_id INTEGER, salary REAL);"
|
| 350 |
"CREATE TABLE departments (id INTEGER, dept_name TEXT, budget REAL);"
|
| 351 |
),
|
|
@@ -389,11 +389,11 @@ def compute_f1(predicted_rows, expected_rows):
|
|
| 389 |
return 2 * precision * recall / (precision + recall)
|
| 390 |
|
| 391 |
|
| 392 |
-
def run_query_safe(
|
| 393 |
try:
|
| 394 |
conn = sqlite3.connect(":memory:")
|
| 395 |
cur = conn.cursor()
|
| 396 |
-
for stmt in
|
| 397 |
stmt = stmt.strip()
|
| 398 |
if stmt:
|
| 399 |
cur.execute(stmt)
|
|
@@ -430,15 +430,15 @@ class SqlQueryDebuggerEnvironment(Environment):
|
|
| 430 |
_CURRENT_SCENARIO = random.choice(pool)
|
| 431 |
|
| 432 |
sample_rows, _ = run_query_safe(
|
| 433 |
-
_CURRENT_SCENARIO["
|
| 434 |
_CURRENT_SCENARIO["setup"],
|
| 435 |
-
"SELECT * FROM " + _CURRENT_SCENARIO["
|
| 436 |
.split("CREATE TABLE ")[1].split(" ")[0] + " LIMIT 3;"
|
| 437 |
)
|
| 438 |
|
| 439 |
return SqlQueryDebuggerObservation(
|
| 440 |
broken_query = _CURRENT_SCENARIO["broken_query"],
|
| 441 |
-
|
| 442 |
error_message = _CURRENT_SCENARIO["error_message"],
|
| 443 |
sample_rows = json.dumps(sample_rows),
|
| 444 |
expected_output_hint = _CURRENT_SCENARIO["expected_output_hint"],
|
|
@@ -456,7 +456,7 @@ class SqlQueryDebuggerEnvironment(Environment):
|
|
| 456 |
attempts_left = self.MAX_STEPS - _CURRENT_STEP
|
| 457 |
|
| 458 |
rows, error = run_query_safe(
|
| 459 |
-
_CURRENT_SCENARIO["
|
| 460 |
_CURRENT_SCENARIO["setup"],
|
| 461 |
action.fixed_query,
|
| 462 |
)
|
|
@@ -468,7 +468,7 @@ class SqlQueryDebuggerEnvironment(Environment):
|
|
| 468 |
|
| 469 |
return SqlQueryDebuggerObservation(
|
| 470 |
broken_query = _CURRENT_SCENARIO["broken_query"],
|
| 471 |
-
|
| 472 |
error_message = error,
|
| 473 |
sample_rows = json.dumps(_CURRENT_SCENARIO["setup"]),
|
| 474 |
expected_output_hint = _CURRENT_SCENARIO["expected_output_hint"],
|
|
|
|
| 16 |
{
|
| 17 |
"id": "easy_1",
|
| 18 |
"task_id": "syntax_fix",
|
| 19 |
+
"db_schema": "CREATE TABLE employees (id INTEGER, name TEXT, salary REAL, dept TEXT);",
|
| 20 |
"setup": [
|
| 21 |
"INSERT INTO employees VALUES (1,'Alice',75000,'Engineering');",
|
| 22 |
"INSERT INTO employees VALUES (2,'Bob',50000,'Marketing');",
|
|
|
|
| 31 |
{
|
| 32 |
"id": "easy_2",
|
| 33 |
"task_id": "syntax_fix",
|
| 34 |
+
"db_schema": "CREATE TABLE products (id INTEGER, name TEXT, price REAL, stock INTEGER);",
|
| 35 |
"setup": [
|
| 36 |
"INSERT INTO products VALUES (1,'Laptop',999.99,10);",
|
| 37 |
"INSERT INTO products VALUES (2,'Mouse',29.99,50);",
|
|
|
|
| 46 |
{
|
| 47 |
"id": "easy_3",
|
| 48 |
"task_id": "syntax_fix",
|
| 49 |
+
"db_schema": "CREATE TABLE students (id INTEGER, name TEXT, grade INTEGER, subject TEXT);",
|
| 50 |
"setup": [
|
| 51 |
"INSERT INTO students VALUES (1,'Dan',85,'Math');",
|
| 52 |
"INSERT INTO students VALUES (2,'Eve',92,'Science');",
|
|
|
|
| 61 |
{
|
| 62 |
"id": "easy_4",
|
| 63 |
"task_id": "syntax_fix",
|
| 64 |
+
"db_schema": "CREATE TABLE orders (id INTEGER, customer TEXT, amount REAL, status TEXT);",
|
| 65 |
"setup": [
|
| 66 |
"INSERT INTO orders VALUES (1,'Alice',250.0,'shipped');",
|
| 67 |
"INSERT INTO orders VALUES (2,'Bob',89.0,'pending');",
|
|
|
|
| 76 |
{
|
| 77 |
"id": "easy_5",
|
| 78 |
"task_id": "syntax_fix",
|
| 79 |
+
"db_schema": "CREATE TABLE inventory (id INTEGER, item TEXT, qty INTEGER, warehouse TEXT);",
|
| 80 |
"setup": [
|
| 81 |
"INSERT INTO inventory VALUES (1,'Bolts',500,'A');",
|
| 82 |
"INSERT INTO inventory VALUES (2,'Nuts',300,'B');",
|
|
|
|
| 91 |
{
|
| 92 |
"id": "easy_6",
|
| 93 |
"task_id": "syntax_fix",
|
| 94 |
+
"db_schema": "CREATE TABLE users (id INTEGER, username TEXT, age INTEGER, city TEXT);",
|
| 95 |
"setup": [
|
| 96 |
"INSERT INTO users VALUES (1,'alice',28,'Delhi');",
|
| 97 |
"INSERT INTO users VALUES (2,'bob',35,'Mumbai');",
|
|
|
|
| 106 |
{
|
| 107 |
"id": "easy_7",
|
| 108 |
"task_id": "syntax_fix",
|
| 109 |
+
"db_schema": "CREATE TABLE sales (id INTEGER, rep TEXT, amount REAL, region TEXT);",
|
| 110 |
"setup": [
|
| 111 |
"INSERT INTO sales VALUES (1,'Tom',15000,'North');",
|
| 112 |
"INSERT INTO sales VALUES (2,'Sue',22000,'South');",
|
|
|
|
| 123 |
{
|
| 124 |
"id": "medium_1",
|
| 125 |
"task_id": "logic_bug",
|
| 126 |
+
"db_schema": "CREATE TABLE employees (id INTEGER, name TEXT, salary REAL, dept TEXT);",
|
| 127 |
"setup": [
|
| 128 |
"INSERT INTO employees VALUES (1,'Alice',75000,'Engineering');",
|
| 129 |
"INSERT INTO employees VALUES (2,'Bob',50000,'Marketing');",
|
|
|
|
| 139 |
{
|
| 140 |
"id": "medium_2",
|
| 141 |
"task_id": "logic_bug",
|
| 142 |
+
"db_schema": "CREATE TABLE orders (id INTEGER, customer TEXT, amount REAL, status TEXT);",
|
| 143 |
"setup": [
|
| 144 |
"INSERT INTO orders VALUES (1,'Alice',250.0,'shipped');",
|
| 145 |
"INSERT INTO orders VALUES (2,'Bob',89.0,'pending');",
|
|
|
|
| 155 |
{
|
| 156 |
"id": "medium_3",
|
| 157 |
"task_id": "logic_bug",
|
| 158 |
+
"db_schema": "CREATE TABLE products (id INTEGER, name TEXT, price REAL, category TEXT);",
|
| 159 |
"setup": [
|
| 160 |
"INSERT INTO products VALUES (1,'Laptop',999.99,'Electronics');",
|
| 161 |
"INSERT INTO products VALUES (2,'Shirt',29.99,'Clothing');",
|
|
|
|
| 171 |
{
|
| 172 |
"id": "medium_4",
|
| 173 |
"task_id": "logic_bug",
|
| 174 |
+
"db_schema": "CREATE TABLE students (id INTEGER, name TEXT, score INTEGER, passed INTEGER);",
|
| 175 |
"setup": [
|
| 176 |
"INSERT INTO students VALUES (1,'Alice',85,1);",
|
| 177 |
"INSERT INTO students VALUES (2,'Bob',45,0);",
|
|
|
|
| 187 |
{
|
| 188 |
"id": "medium_5",
|
| 189 |
"task_id": "logic_bug",
|
| 190 |
+
"db_schema": "CREATE TABLE employees (id INTEGER, name TEXT, salary REAL, dept TEXT);",
|
| 191 |
"setup": [
|
| 192 |
"INSERT INTO employees VALUES (1,'Alice',75000,'Engineering');",
|
| 193 |
"INSERT INTO employees VALUES (2,'Bob',50000,'Marketing');",
|
|
|
|
| 202 |
{
|
| 203 |
"id": "medium_6",
|
| 204 |
"task_id": "logic_bug",
|
| 205 |
+
"db_schema": "CREATE TABLE sales (id INTEGER, rep TEXT, amount REAL, month INTEGER);",
|
| 206 |
"setup": [
|
| 207 |
"INSERT INTO sales VALUES (1,'Tom',15000,1);",
|
| 208 |
"INSERT INTO sales VALUES (2,'Sue',22000,1);",
|
|
|
|
| 220 |
{
|
| 221 |
"id": "hard_1",
|
| 222 |
"task_id": "multi_table",
|
| 223 |
+
"db_schema": (
|
| 224 |
"CREATE TABLE employees (id INTEGER, name TEXT, dept_id INTEGER, salary REAL);"
|
| 225 |
"CREATE TABLE departments (id INTEGER, dept_name TEXT, budget REAL);"
|
| 226 |
),
|
|
|
|
| 240 |
{
|
| 241 |
"id": "hard_2",
|
| 242 |
"task_id": "multi_table",
|
| 243 |
+
"db_schema": (
|
| 244 |
"CREATE TABLE orders (id INTEGER, customer_id INTEGER, amount REAL);"
|
| 245 |
"CREATE TABLE customers (id INTEGER, name TEXT, city TEXT);"
|
| 246 |
),
|
|
|
|
| 260 |
{
|
| 261 |
"id": "hard_3",
|
| 262 |
"task_id": "multi_table",
|
| 263 |
+
"db_schema": (
|
| 264 |
"CREATE TABLE employees (id INTEGER, name TEXT, dept_id INTEGER, salary REAL);"
|
| 265 |
"CREATE TABLE departments (id INTEGER, dept_name TEXT, budget REAL);"
|
| 266 |
),
|
|
|
|
| 281 |
{
|
| 282 |
"id": "hard_4",
|
| 283 |
"task_id": "multi_table",
|
| 284 |
+
"db_schema": (
|
| 285 |
"CREATE TABLE products (id INTEGER, name TEXT, category_id INTEGER, price REAL);"
|
| 286 |
"CREATE TABLE categories (id INTEGER, cat_name TEXT);"
|
| 287 |
"CREATE TABLE order_items (id INTEGER, product_id INTEGER, qty INTEGER);"
|
|
|
|
| 305 |
{
|
| 306 |
"id": "hard_5",
|
| 307 |
"task_id": "multi_table",
|
| 308 |
+
"db_schema": (
|
| 309 |
"CREATE TABLE employees (id INTEGER, name TEXT, manager_id INTEGER, salary REAL);"
|
| 310 |
),
|
| 311 |
"setup": [
|
|
|
|
| 323 |
{
|
| 324 |
"id": "hard_6",
|
| 325 |
"task_id": "multi_table",
|
| 326 |
+
"db_schema": (
|
| 327 |
"CREATE TABLE orders (id INTEGER, customer_id INTEGER, amount REAL, status TEXT);"
|
| 328 |
"CREATE TABLE customers (id INTEGER, name TEXT, tier TEXT);"
|
| 329 |
),
|
|
|
|
| 345 |
{
|
| 346 |
"id": "hard_7",
|
| 347 |
"task_id": "multi_table",
|
| 348 |
+
"db_schema": (
|
| 349 |
"CREATE TABLE employees (id INTEGER, name TEXT, dept_id INTEGER, salary REAL);"
|
| 350 |
"CREATE TABLE departments (id INTEGER, dept_name TEXT, budget REAL);"
|
| 351 |
),
|
|
|
|
| 389 |
return 2 * precision * recall / (precision + recall)
|
| 390 |
|
| 391 |
|
| 392 |
+
def run_query_safe(db_schema, setup_stmts, query):
|
| 393 |
try:
|
| 394 |
conn = sqlite3.connect(":memory:")
|
| 395 |
cur = conn.cursor()
|
| 396 |
+
for stmt in db_schema.split(";"):
|
| 397 |
stmt = stmt.strip()
|
| 398 |
if stmt:
|
| 399 |
cur.execute(stmt)
|
|
|
|
| 430 |
_CURRENT_SCENARIO = random.choice(pool)
|
| 431 |
|
| 432 |
sample_rows, _ = run_query_safe(
|
| 433 |
+
_CURRENT_SCENARIO["db_schema"],
|
| 434 |
_CURRENT_SCENARIO["setup"],
|
| 435 |
+
"SELECT * FROM " + _CURRENT_SCENARIO["db_schema"]
|
| 436 |
.split("CREATE TABLE ")[1].split(" ")[0] + " LIMIT 3;"
|
| 437 |
)
|
| 438 |
|
| 439 |
return SqlQueryDebuggerObservation(
|
| 440 |
broken_query = _CURRENT_SCENARIO["broken_query"],
|
| 441 |
+
db_schema = _CURRENT_SCENARIO["db_schema"],
|
| 442 |
error_message = _CURRENT_SCENARIO["error_message"],
|
| 443 |
sample_rows = json.dumps(sample_rows),
|
| 444 |
expected_output_hint = _CURRENT_SCENARIO["expected_output_hint"],
|
|
|
|
| 456 |
attempts_left = self.MAX_STEPS - _CURRENT_STEP
|
| 457 |
|
| 458 |
rows, error = run_query_safe(
|
| 459 |
+
_CURRENT_SCENARIO["db_schema"],
|
| 460 |
_CURRENT_SCENARIO["setup"],
|
| 461 |
action.fixed_query,
|
| 462 |
)
|
|
|
|
| 468 |
|
| 469 |
return SqlQueryDebuggerObservation(
|
| 470 |
broken_query = _CURRENT_SCENARIO["broken_query"],
|
| 471 |
+
db_schema = _CURRENT_SCENARIO["db_schema"],
|
| 472 |
error_message = error,
|
| 473 |
sample_rows = json.dumps(_CURRENT_SCENARIO["setup"]),
|
| 474 |
expected_output_hint = _CURRENT_SCENARIO["expected_output_hint"],
|