sql-env / data /tasks /join_aggregate.json
UtkarshSatav's picture
Upload folder using huggingface_hub
08b82d0 verified
{
"task_name": "join_aggregate",
"difficulty": "medium",
"description": "JOIN queries with GROUP BY, HAVING, and aggregate functions",
"max_steps_per_question": 4,
"questions": [
{
"id": "med_1",
"question": "What is the average order total for each customer? Show customer name and average total (rounded to 2 decimal places), sorted by average total from highest to lowest.",
"ground_truth_sql": "SELECT c.name, ROUND(AVG(o.total_amount), 2) as avg_total FROM customers c JOIN orders o ON c.id = o.customer_id GROUP BY c.id ORDER BY avg_total DESC",
"expected_columns": ["name", "avg_total"],
"expected_row_count": 18,
"expected_rows": [
["Swati Tiwari", 3798.0],
["Vikram Singh", 3648.0],
["Pooja Mishra", 3499.0],
["Kavita Joshi", 3248.5],
["Rahul Kumar", 2898.5],
["Divya Saxena", 2598.5],
["Priya Patel", 2548.5],
["Karan Malhotra", 2499.0],
["Aarav Sharma", 1748.5],
["Meera Iyer", 1724.0],
["Suresh Menon", 1648.5],
["Arjun Nair", 1599.0],
["Rohan Das", 1098.5],
["Amit Pandey", 1023.5],
["Ritu Chopra", 999.0],
["Ananya Reddy", 898.0],
["Nikhil Bhat", 849.0],
["Deepak Verma", 848.0]
],
"order_matters": true
},
{
"id": "med_2",
"question": "Which products have been ordered more than 2 times in total quantity? Show the product name and total quantity ordered, sorted by total quantity from highest to lowest.",
"ground_truth_sql": "SELECT p.name, SUM(oi.quantity) as total_qty FROM products p JOIN order_items oi ON p.id = oi.product_id GROUP BY p.id HAVING total_qty > 2 ORDER BY total_qty DESC",
"expected_columns": ["name", "total_qty"],
"expected_row_count": 8,
"expected_rows": [
["Wireless Headphones", 7],
["Ceramic Mug Set", 6],
["Smartphone Case", 5],
["Desk Lamp", 4],
["Python Programming", 4],
["Running Shoes", 4],
["Cotton T-Shirt", 4],
["Cooking Recipes", 3]
],
"order_matters": true
},
{
"id": "med_3",
"question": "List all customers who have never placed an order. Show their name and email, sorted by name.",
"ground_truth_sql": "SELECT name, email FROM customers WHERE id NOT IN (SELECT DISTINCT customer_id FROM orders) ORDER BY name",
"expected_columns": ["name", "email"],
"expected_row_count": 2,
"expected_rows": [
["Nisha Agarwal", "nisha@example.com"],
["Sneha Gupta", "sneha@example.com"]
],
"order_matters": true
},
{
"id": "med_4",
"question": "What is the total revenue per product category? Calculate revenue as quantity times unit_price from order_items. Show category and revenue (rounded to 2 decimal places), sorted by revenue from highest to lowest.",
"ground_truth_sql": "SELECT p.category, ROUND(SUM(oi.quantity * oi.unit_price), 2) as revenue FROM products p JOIN order_items oi ON p.id = oi.product_id GROUP BY p.category ORDER BY revenue DESC",
"expected_columns": ["category", "revenue"],
"expected_row_count": 4,
"expected_rows": [
["Electronics", 28384.0],
["Clothing", 20889.0],
["Home", 10339.0],
["Books", 4989.0]
],
"order_matters": true
},
{
"id": "med_5",
"question": "Who are the top 3 customers by total spending? Show customer name and total amount spent across all orders, sorted by total spent from highest to lowest.",
"ground_truth_sql": "SELECT c.name, SUM(o.total_amount) as total_spent FROM customers c JOIN orders o ON c.id = o.customer_id GROUP BY c.id ORDER BY total_spent DESC LIMIT 3",
"expected_columns": ["name", "total_spent"],
"expected_row_count": 3,
"expected_rows": [
["Vikram Singh", 7296.0],
["Kavita Joshi", 6497.0],
["Rahul Kumar", 5797.0]
],
"order_matters": true
}
]
}