[ { "id": "medium_001", "category": "logic", "description": "Fix the wrong JOIN type: INNER JOIN excludes users with no orders. Should be LEFT JOIN to include all users.", "buggy_query": "SELECT u.id, u.name, COUNT(o.id) as order_count FROM users u INNER JOIN orders o ON u.id = o.user_id GROUP BY u.id, u.name", "fixed_query": "SELECT u.id, u.name, COUNT(o.id) as order_count FROM users u LEFT JOIN orders o ON u.id = o.user_id GROUP BY u.id, u.name", "error_message": "No error thrown — query runs but returns wrong results (missing users with 0 orders)", "database_schema": { "users": ["id INT PRIMARY KEY", "name VARCHAR(100)", "email VARCHAR(100)"], "orders": ["id INT PRIMARY KEY", "user_id INT REFERENCES users(id)", "total DECIMAL", "status VARCHAR(20)"] }, "expected_output": [ {"id": 1, "name": "Alice", "order_count": 3}, {"id": 2, "name": "Bob", "order_count": 0}, {"id": 3, "name": "Carol", "order_count": 1} ], "actual_buggy_output": [ {"id": 1, "name": "Alice", "order_count": 3}, {"id": 3, "name": "Carol", "order_count": 1} ], "error_type": "logic", "error_location": "JOIN type", "fix_description": "Change INNER JOIN to LEFT JOIN so users with zero orders are included", "estimated_fix_steps": 3, "partial_credit_hints": { "identifies_missing_rows": 0.15, "identifies_join_type_issue": 0.2, "correct_join_fix": 0.4, "explanation": 0.15, "confidence": 0.1 } }, { "id": "medium_002", "category": "logic", "description": "Fix the wrong JOIN condition: joining on wrong column causes cartesian-like explosion of results.", "buggy_query": "SELECT o.id, o.total, p.name FROM orders o JOIN order_items oi ON o.id = oi.order_id JOIN products p ON oi.id = p.id", "fixed_query": "SELECT o.id, o.total, p.name FROM orders o JOIN order_items oi ON o.id = oi.order_id JOIN products p ON oi.product_id = p.id", "error_message": "No error thrown — query runs but returns incorrect product associations", "database_schema": { "orders": ["id INT PRIMARY KEY", "user_id INT", "total DECIMAL"], "order_items": ["id INT PRIMARY KEY", "order_id INT", "product_id INT", "quantity INT"], "products": ["id INT PRIMARY KEY", "name VARCHAR(100)", "price DECIMAL"] }, "expected_output": [ {"id": 101, "total": 250.00, "name": "Laptop"}, {"id": 102, "total": 89.99, "name": "Mouse"} ], "error_type": "logic", "error_location": "Second JOIN condition — oi.id should be oi.product_id", "fix_description": "Change ON oi.id = p.id to ON oi.product_id = p.id", "estimated_fix_steps": 3, "partial_credit_hints": { "identifies_wrong_column": 0.2, "identifies_join_condition": 0.2, "correct_fix": 0.4, "explanation": 0.15, "confidence": 0.05 } }, { "id": "medium_003", "category": "logic", "description": "Fix the aggregation logic: HAVING clause filters before GROUP BY aggregation is complete, wrong column used.", "buggy_query": "SELECT department, AVG(salary) as avg_salary FROM employees WHERE AVG(salary) > 70000 GROUP BY department", "fixed_query": "SELECT department, AVG(salary) as avg_salary FROM employees GROUP BY department HAVING AVG(salary) > 70000", "error_message": "ERROR: aggregate functions are not allowed in WHERE", "database_schema": { "employees": ["id INT PRIMARY KEY", "name VARCHAR(100)", "department VARCHAR(50)", "salary DECIMAL"] }, "expected_output": [ {"department": "Engineering", "avg_salary": 95000.00}, {"department": "Data Science","avg_salary": 88000.00} ], "error_type": "logic", "error_location": "WHERE clause — aggregate function used in WHERE instead of HAVING", "fix_description": "Move AVG(salary) > 70000 from WHERE to HAVING clause, placed after GROUP BY", "estimated_fix_steps": 3, "partial_credit_hints": { "identifies_where_vs_having": 0.2, "identifies_aggregate_misuse": 0.2, "correct_fix": 0.4, "explanation": 0.15, "confidence": 0.05 } }, { "id": "medium_004", "category": "logic", "description": "Fix the subquery logic: correlated subquery compares wrong column, returning incorrect filtered set.", "buggy_query": "SELECT id, name, salary FROM employees e WHERE salary > (SELECT AVG(salary) FROM employees WHERE department = e.id)", "fixed_query": "SELECT id, name, salary FROM employees e WHERE salary > (SELECT AVG(salary) FROM employees WHERE department = e.department)", "error_message": "No error thrown — returns incorrect rows because subquery correlates on wrong column", "database_schema": { "employees": ["id INT PRIMARY KEY", "name VARCHAR(100)", "department VARCHAR(50)", "salary DECIMAL"] }, "expected_output": [ {"id": 3, "name": "Carol", "salary": 110000}, {"id": 7, "name": "Dave", "salary": 98000} ], "error_type": "logic", "error_location": "Correlated subquery WHERE clause — e.id should be e.department", "fix_description": "Change WHERE department = e.id to WHERE department = e.department", "estimated_fix_steps": 4, "partial_credit_hints": { "identifies_correlated_subquery": 0.15, "identifies_wrong_correlation": 0.2, "correct_fix": 0.45, "explanation": 0.15, "confidence": 0.05 } }, { "id": "medium_005", "category": "logic", "description": "Fix the DISTINCT misuse: DISTINCT applied to wrong scope, counting duplicates incorrectly.", "buggy_query": "SELECT COUNT(DISTINCT *) FROM orders WHERE status = 'completed'", "fixed_query": "SELECT COUNT(DISTINCT id) FROM orders WHERE status = 'completed'", "error_message": "ERROR: COUNT(DISTINCT) with multiple columns requires explicit column list", "database_schema": { "orders": ["id INT PRIMARY KEY", "user_id INT", "total DECIMAL", "status VARCHAR(20)"] }, "expected_output": [{"count": 47}], "error_type": "logic", "error_location": "COUNT(DISTINCT *) — wildcard not valid with DISTINCT", "fix_description": "Replace COUNT(DISTINCT *) with COUNT(DISTINCT id) to count unique order IDs", "estimated_fix_steps": 2, "partial_credit_hints": { "identifies_distinct_misuse": 0.25, "correct_fix": 0.5, "explanation": 0.15, "confidence": 0.1 } } ]