sql_tutor_env / server /challenges.py
snigenigmatic's picture
Upload folder using huggingface_hub
0683cf4 verified
"""
A bank of SQL challenges. Each challenge has:
- schema_sql: DDL + seed data to create an in-memory SQLite DB
- schema_description: human-readable description of tables
- task_description: what the correct query must return
- broken_query: a query with a deliberate bug
- correct_query: the canonical correct query
- hints: progressive hints (easiest to most specific)
"""
CHALLENGES = [
{
"id": "wrong_aggregate",
"schema_sql": """
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
customer_id INTEGER,
amount REAL,
status TEXT
);
INSERT INTO orders VALUES
(1, 1, 120.50, 'completed'),
(2, 1, 45.00, 'completed'),
(3, 2, 200.00, 'pending'),
(4, 2, 80.00, 'completed'),
(5, 3, 60.00, 'completed');
""",
"schema_description": (
"Table `orders`: id (int), customer_id (int), amount (real), status (text)."
),
"task_description": (
"Find each customer's total spending on completed orders. "
"Return customer_id and total_amount, ordered by customer_id."
),
"broken_query": (
"SELECT customer_id, amount AS total_amount "
"FROM orders "
"WHERE status = 'completed' "
"ORDER BY customer_id;"
),
"correct_query": (
"SELECT customer_id, SUM(amount) AS total_amount "
"FROM orders "
"WHERE status = 'completed' "
"GROUP BY customer_id "
"ORDER BY customer_id;"
),
"hints": [
"The query is missing an aggregation - each customer can have multiple orders.",
"You need to use SUM() to add up amounts, and GROUP BY to group per customer.",
"Add `SUM(amount) AS total_amount` and `GROUP BY customer_id` to your query.",
],
},
{
"id": "wrong_join",
"schema_sql": """
CREATE TABLE employees (
id INTEGER PRIMARY KEY,
name TEXT,
department_id INTEGER
);
CREATE TABLE departments (
id INTEGER PRIMARY KEY,
name TEXT
);
INSERT INTO departments VALUES (1, 'Engineering'), (2, 'Marketing'), (3, 'HR');
INSERT INTO employees VALUES
(1, 'Alice', 1),
(2, 'Bob', 2),
(3, 'Carol', 1),
(4, 'Dave', NULL);
""",
"schema_description": (
"Table `employees`: id, name, department_id. "
"Table `departments`: id, name."
),
"task_description": (
"List all employees and their department name. "
"Include employees with no department (show NULL for their department). "
"Return employee name and department name."
),
"broken_query": (
"SELECT e.name, d.name AS department "
"FROM employees e "
"INNER JOIN departments d ON e.department_id = d.id;"
),
"correct_query": (
"SELECT e.name, d.name AS department "
"FROM employees e "
"LEFT JOIN departments d ON e.department_id = d.id;"
),
"hints": [
"The result is missing some employees. Check whether all rows from `employees` appear.",
"INNER JOIN only returns rows with a match in both tables. Dave has no department.",
"Change INNER JOIN to LEFT JOIN so employees without a department are still included.",
],
},
{
"id": "off_by_one_filter",
"schema_sql": """
CREATE TABLE products (
id INTEGER PRIMARY KEY,
name TEXT,
price REAL,
stock INTEGER
);
INSERT INTO products VALUES
(1, 'Widget A', 9.99, 50),
(2, 'Widget B', 14.99, 0),
(3, 'Gadget X', 49.99, 10),
(4, 'Gadget Y', 99.99, 0),
(5, 'Gizmo Z', 4.99, 200);
""",
"schema_description": (
"Table `products`: id, name, price (real), stock (int)."
),
"task_description": (
"Find all products that are currently out of stock (stock = 0). "
"Return their name and price."
),
"broken_query": (
"SELECT name, price FROM products WHERE stock < 0;"
),
"correct_query": (
"SELECT name, price FROM products WHERE stock = 0;"
),
"hints": [
"The query returns no rows, but some products have zero stock.",
"Check the WHERE condition - you want products where stock equals zero, not less than zero.",
"Change `stock < 0` to `stock = 0`.",
],
},
{
"id": "missing_having",
"schema_sql": """
CREATE TABLE sales (
id INTEGER PRIMARY KEY,
region TEXT,
salesperson TEXT,
revenue REAL
);
INSERT INTO sales VALUES
(1, 'North', 'Alice', 5000),
(2, 'North', 'Bob', 3000),
(3, 'South', 'Carol', 8000),
(4, 'South', 'Dave', 2000),
(5, 'East', 'Eve', 1500),
(6, 'North', 'Alice', 4000),
(7, 'South', 'Carol', 7000);
""",
"schema_description": (
"Table `sales`: id, region (text), salesperson (text), revenue (real)."
),
"task_description": (
"Find regions whose total revenue exceeds 10000. "
"Return region and total_revenue."
),
"broken_query": (
"SELECT region, SUM(revenue) AS total_revenue "
"FROM sales "
"WHERE SUM(revenue) > 10000 "
"GROUP BY region;"
),
"correct_query": (
"SELECT region, SUM(revenue) AS total_revenue "
"FROM sales "
"GROUP BY region "
"HAVING SUM(revenue) > 10000;"
),
"hints": [
"You cannot use aggregate functions like SUM() inside a WHERE clause.",
"To filter on aggregated values, use HAVING instead of WHERE.",
"Move the condition to a HAVING clause after GROUP BY: `HAVING SUM(revenue) > 10000`.",
],
},
{
"id": "wrong_order_limit",
"schema_sql": """
CREATE TABLE students (
id INTEGER PRIMARY KEY,
name TEXT,
score INTEGER
);
INSERT INTO students VALUES
(1, 'Alice', 88),
(2, 'Bob', 72),
(3, 'Carol', 95),
(4, 'Dave', 60),
(5, 'Eve', 91);
""",
"schema_description": (
"Table `students`: id, name, score (int)."
),
"task_description": (
"Find the top 3 students by score. Return name and score, highest score first."
),
"broken_query": (
"SELECT name, score FROM students ORDER BY score ASC LIMIT 3;"
),
"correct_query": (
"SELECT name, score FROM students ORDER BY score DESC LIMIT 3;"
),
"hints": [
"The query returns the lowest scores, not the highest.",
"Check the ORDER BY direction - ASC sorts smallest first.",
"Change `ORDER BY score ASC` to `ORDER BY score DESC` to get the top scores.",
],
},
]