Spaces:
Sleeping
Sleeping
| { | |
| "task_name": "advanced_analytics", | |
| "difficulty": "hard", | |
| "description": "Subqueries, CTEs, window functions, and complex multi-table analytics", | |
| "max_steps_per_question": 5, | |
| "questions": [ | |
| { | |
| "id": "hard_1", | |
| "question": "Find all customers whose total spending across all orders exceeds the average total spending per customer. Show customer name and total spent, sorted by total spent from highest to lowest.", | |
| "ground_truth_sql": "SELECT c.name, SUM(o.total_amount) as total_spent FROM customers c JOIN orders o ON c.id = o.customer_id GROUP BY c.id HAVING total_spent > (SELECT AVG(total_spent) FROM (SELECT SUM(total_amount) as total_spent FROM orders GROUP BY customer_id)) ORDER BY total_spent DESC", | |
| "expected_columns": ["name", "total_spent"], | |
| "expected_row_count": 9, | |
| "expected_rows": [ | |
| ["Vikram Singh", 7296.0], | |
| ["Kavita Joshi", 6497.0], | |
| ["Rahul Kumar", 5797.0], | |
| ["Divya Saxena", 5197.0], | |
| ["Priya Patel", 5097.0], | |
| ["Swati Tiwari", 3798.0], | |
| ["Pooja Mishra", 3499.0], | |
| ["Aarav Sharma", 3497.0], | |
| ["Meera Iyer", 3448.0] | |
| ], | |
| "order_matters": true | |
| }, | |
| { | |
| "id": "hard_2", | |
| "question": "Rank all products by their total revenue (quantity * unit_price from order_items) within each product category. Show category, product name, revenue, and the rank within the category. Sort by category alphabetically, then by rank.", | |
| "ground_truth_sql": "SELECT p.category, p.name, SUM(oi.quantity * oi.unit_price) as revenue, RANK() OVER (PARTITION BY p.category ORDER BY SUM(oi.quantity * oi.unit_price) DESC) as category_rank FROM products p JOIN order_items oi ON p.id = oi.product_id GROUP BY p.id ORDER BY p.category, category_rank", | |
| "expected_columns": ["category", "name", "revenue", "category_rank"], | |
| "expected_row_count": 15, | |
| "expected_rows": [ | |
| ["Books", "Python Programming", 1796.0, 1], | |
| ["Books", "Data Science Handbook", 1398.0, 2], | |
| ["Books", "Cooking Recipes", 1197.0, 3], | |
| ["Books", "Mystery Novel", 598.0, 4], | |
| ["Clothing", "Running Shoes", 11996.0, 1], | |
| ["Clothing", "Winter Jacket", 3499.0, 2], | |
| ["Clothing", "Denim Jeans", 2998.0, 3], | |
| ["Clothing", "Cotton T-Shirt", 2396.0, 4], | |
| ["Electronics", "Wireless Headphones", 17493.0, 1], | |
| ["Electronics", "Bluetooth Speaker", 7998.0, 2], | |
| ["Electronics", "Smartphone Case", 2495.0, 3], | |
| ["Electronics", "USB-C Cable", 398.0, 4], | |
| ["Home", "Desk Lamp", 5196.0, 1], | |
| ["Home", "Ceramic Mug Set", 4794.0, 2], | |
| ["Home", "Plant Pot", 349.0, 3] | |
| ], | |
| "order_matters": true | |
| }, | |
| { | |
| "id": "hard_3", | |
| "question": "Calculate the month-over-month growth in order count for 2024. Show the month (as YYYY-MM), the number of orders that month, and the change from the previous month (NULL for the first month). Sort by month.", | |
| "ground_truth_sql": "SELECT strftime('%Y-%m', order_date) as month, COUNT(*) as order_count, COUNT(*) - LAG(COUNT(*)) OVER (ORDER BY strftime('%Y-%m', order_date)) as growth FROM orders GROUP BY month ORDER BY month", | |
| "expected_columns": ["month", "order_count", "growth"], | |
| "expected_row_count": 6, | |
| "expected_rows": [ | |
| ["2024-01", 6, null], | |
| ["2024-02", 6, 0], | |
| ["2024-03", 7, 1], | |
| ["2024-04", 4, -3], | |
| ["2024-05", 4, 0], | |
| ["2024-06", 3, -1] | |
| ], | |
| "order_matters": true | |
| }, | |
| { | |
| "id": "hard_4", | |
| "question": "Find all customers who have purchased products from at least 3 different product categories. Show the customer name and the number of distinct categories they bought from, sorted by category count descending then name ascending.", | |
| "ground_truth_sql": "SELECT c.name, COUNT(DISTINCT p.category) as category_count FROM customers c JOIN orders o ON c.id = o.customer_id JOIN order_items oi ON o.id = oi.order_id JOIN products p ON oi.product_id = p.id GROUP BY c.id HAVING category_count >= 3 ORDER BY category_count DESC, c.name ASC", | |
| "expected_columns": ["name", "category_count"], | |
| "expected_row_count": 5, | |
| "expected_rows": [ | |
| ["Rahul Kumar", 4], | |
| ["Ananya Reddy", 3], | |
| ["Priya Patel", 3], | |
| ["Ritu Chopra", 3], | |
| ["Rohan Das", 3] | |
| ], | |
| "order_matters": true | |
| }, | |
| { | |
| "id": "hard_5", | |
| "question": "For each product category, find the product with the highest average review rating. Show the category, product name, and average rating (rounded to 2 decimal places). Only include products that have at least 2 reviews. Sort by category alphabetically, then by average rating descending.", | |
| "ground_truth_sql": "SELECT p.category, p.name, ROUND(AVG(r.rating), 2) as avg_rating FROM products p JOIN reviews r ON p.id = r.product_id GROUP BY p.id HAVING COUNT(r.id) >= 2 ORDER BY p.category, avg_rating DESC", | |
| "expected_columns": ["category", "name", "avg_rating"], | |
| "expected_row_count": 8, | |
| "expected_rows": [ | |
| ["Books", "Python Programming", 4.5], | |
| ["Clothing", "Running Shoes", 4.67], | |
| ["Clothing", "Cotton T-Shirt", 3.5], | |
| ["Electronics", "Wireless Headphones", 4.67], | |
| ["Electronics", "Bluetooth Speaker", 4.5], | |
| ["Electronics", "Smartphone Case", 3.5], | |
| ["Home", "Ceramic Mug Set", 4.67], | |
| ["Home", "Desk Lamp", 4.5] | |
| ], | |
| "order_matters": true | |
| } | |
| ] | |
| } | |