sql-db-engineer-agent / dataset /medium_cases.json
junaid0600's picture
Initial commit
f812d5b
Raw
History Blame Contribute Delete
6.48 kB
[
{
"id": "medium_001",
"category": "logic",
"description": "Fix the wrong JOIN type: INNER JOIN excludes users with no orders. Should be LEFT JOIN to include all users.",
"buggy_query": "SELECT u.id, u.name, COUNT(o.id) as order_count FROM users u INNER JOIN orders o ON u.id = o.user_id GROUP BY u.id, u.name",
"fixed_query": "SELECT u.id, u.name, COUNT(o.id) as order_count FROM users u LEFT JOIN orders o ON u.id = o.user_id GROUP BY u.id, u.name",
"error_message": "No error thrown — query runs but returns wrong results (missing users with 0 orders)",
"database_schema": {
"users": ["id INT PRIMARY KEY", "name VARCHAR(100)", "email VARCHAR(100)"],
"orders": ["id INT PRIMARY KEY", "user_id INT REFERENCES users(id)", "total DECIMAL", "status VARCHAR(20)"]
},
"expected_output": [
{"id": 1, "name": "Alice", "order_count": 3},
{"id": 2, "name": "Bob", "order_count": 0},
{"id": 3, "name": "Carol", "order_count": 1}
],
"actual_buggy_output": [
{"id": 1, "name": "Alice", "order_count": 3},
{"id": 3, "name": "Carol", "order_count": 1}
],
"error_type": "logic",
"error_location": "JOIN type",
"fix_description": "Change INNER JOIN to LEFT JOIN so users with zero orders are included",
"estimated_fix_steps": 3,
"partial_credit_hints": {
"identifies_missing_rows": 0.15,
"identifies_join_type_issue": 0.2,
"correct_join_fix": 0.4,
"explanation": 0.15,
"confidence": 0.1
}
},
{
"id": "medium_002",
"category": "logic",
"description": "Fix the wrong JOIN condition: joining on wrong column causes cartesian-like explosion of results.",
"buggy_query": "SELECT o.id, o.total, p.name FROM orders o JOIN order_items oi ON o.id = oi.order_id JOIN products p ON oi.id = p.id",
"fixed_query": "SELECT o.id, o.total, p.name FROM orders o JOIN order_items oi ON o.id = oi.order_id JOIN products p ON oi.product_id = p.id",
"error_message": "No error thrown — query runs but returns incorrect product associations",
"database_schema": {
"orders": ["id INT PRIMARY KEY", "user_id INT", "total DECIMAL"],
"order_items": ["id INT PRIMARY KEY", "order_id INT", "product_id INT", "quantity INT"],
"products": ["id INT PRIMARY KEY", "name VARCHAR(100)", "price DECIMAL"]
},
"expected_output": [
{"id": 101, "total": 250.00, "name": "Laptop"},
{"id": 102, "total": 89.99, "name": "Mouse"}
],
"error_type": "logic",
"error_location": "Second JOIN condition — oi.id should be oi.product_id",
"fix_description": "Change ON oi.id = p.id to ON oi.product_id = p.id",
"estimated_fix_steps": 3,
"partial_credit_hints": {
"identifies_wrong_column": 0.2,
"identifies_join_condition": 0.2,
"correct_fix": 0.4,
"explanation": 0.15,
"confidence": 0.05
}
},
{
"id": "medium_003",
"category": "logic",
"description": "Fix the aggregation logic: HAVING clause filters before GROUP BY aggregation is complete, wrong column used.",
"buggy_query": "SELECT department, AVG(salary) as avg_salary FROM employees WHERE AVG(salary) > 70000 GROUP BY department",
"fixed_query": "SELECT department, AVG(salary) as avg_salary FROM employees GROUP BY department HAVING AVG(salary) > 70000",
"error_message": "ERROR: aggregate functions are not allowed in WHERE",
"database_schema": {
"employees": ["id INT PRIMARY KEY", "name VARCHAR(100)", "department VARCHAR(50)", "salary DECIMAL"]
},
"expected_output": [
{"department": "Engineering", "avg_salary": 95000.00},
{"department": "Data Science","avg_salary": 88000.00}
],
"error_type": "logic",
"error_location": "WHERE clause — aggregate function used in WHERE instead of HAVING",
"fix_description": "Move AVG(salary) > 70000 from WHERE to HAVING clause, placed after GROUP BY",
"estimated_fix_steps": 3,
"partial_credit_hints": {
"identifies_where_vs_having": 0.2,
"identifies_aggregate_misuse": 0.2,
"correct_fix": 0.4,
"explanation": 0.15,
"confidence": 0.05
}
},
{
"id": "medium_004",
"category": "logic",
"description": "Fix the subquery logic: correlated subquery compares wrong column, returning incorrect filtered set.",
"buggy_query": "SELECT id, name, salary FROM employees e WHERE salary > (SELECT AVG(salary) FROM employees WHERE department = e.id)",
"fixed_query": "SELECT id, name, salary FROM employees e WHERE salary > (SELECT AVG(salary) FROM employees WHERE department = e.department)",
"error_message": "No error thrown — returns incorrect rows because subquery correlates on wrong column",
"database_schema": {
"employees": ["id INT PRIMARY KEY", "name VARCHAR(100)", "department VARCHAR(50)", "salary DECIMAL"]
},
"expected_output": [
{"id": 3, "name": "Carol", "salary": 110000},
{"id": 7, "name": "Dave", "salary": 98000}
],
"error_type": "logic",
"error_location": "Correlated subquery WHERE clause — e.id should be e.department",
"fix_description": "Change WHERE department = e.id to WHERE department = e.department",
"estimated_fix_steps": 4,
"partial_credit_hints": {
"identifies_correlated_subquery": 0.15,
"identifies_wrong_correlation": 0.2,
"correct_fix": 0.45,
"explanation": 0.15,
"confidence": 0.05
}
},
{
"id": "medium_005",
"category": "logic",
"description": "Fix the DISTINCT misuse: DISTINCT applied to wrong scope, counting duplicates incorrectly.",
"buggy_query": "SELECT COUNT(DISTINCT *) FROM orders WHERE status = 'completed'",
"fixed_query": "SELECT COUNT(DISTINCT id) FROM orders WHERE status = 'completed'",
"error_message": "ERROR: COUNT(DISTINCT) with multiple columns requires explicit column list",
"database_schema": {
"orders": ["id INT PRIMARY KEY", "user_id INT", "total DECIMAL", "status VARCHAR(20)"]
},
"expected_output": [{"count": 47}],
"error_type": "logic",
"error_location": "COUNT(DISTINCT *) — wildcard not valid with DISTINCT",
"fix_description": "Replace COUNT(DISTINCT *) with COUNT(DISTINCT id) to count unique order IDs",
"estimated_fix_steps": 2,
"partial_credit_hints": {
"identifies_distinct_misuse": 0.25,
"correct_fix": 0.5,
"explanation": 0.15,
"confidence": 0.1
}
}
]