Spaces:
Running
Running
| """ | |
| nl2sql-bench/server/tasks/hard.py | |
| =================================== | |
| Task 3 — Analytics & Window (difficulty: hard) | |
| Questions require CTEs, window functions (RANK, ROW_NUMBER, running totals), | |
| or non-trivial subqueries. Even strong frontier models often need 3–5 steps. | |
| """ | |
| from __future__ import annotations | |
| from .base import BaseTask, TaskExample, register | |
| class AnalyticsWindowTask(BaseTask): | |
| name = "analytics-window" | |
| difficulty = "hard" | |
| examples = [ | |
| TaskExample( | |
| question=( | |
| "Rank customers by their total spending on delivered orders " | |
| "using DENSE_RANK (rank 1 = highest spender). " | |
| "Return columns: customer_name, total_spent, spending_rank. " | |
| "Round total_spent to 2 decimal places. " | |
| "Sort by spending_rank ascending." | |
| ), | |
| sql=( | |
| "SELECT customer_name, total_spent, spending_rank " | |
| "FROM ( " | |
| " SELECT c.name AS customer_name, " | |
| " ROUND(SUM(o.total_amount), 2) AS total_spent, " | |
| " DENSE_RANK() OVER (ORDER BY SUM(o.total_amount) DESC) AS spending_rank " | |
| " FROM customers c " | |
| " JOIN orders o ON o.customer_id = c.id " | |
| " WHERE o.status = 'delivered' " | |
| " GROUP BY c.id, c.name " | |
| ") sub " | |
| "ORDER BY spending_rank ASC" | |
| ), | |
| notes="Window function DENSE_RANK inside a subquery wrapping a GROUP BY.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "For each product that has been reviewed, show its name, its own " | |
| "average rating, and the average rating of all products in its category. " | |
| "Return columns: product_name, product_avg_rating, category_avg_rating. " | |
| "Round both averages to 2 decimal places. " | |
| "Sort by product_avg_rating descending." | |
| ), | |
| sql=( | |
| "SELECT p.name AS product_name, " | |
| " ROUND(AVG(r.rating), 2) AS product_avg_rating, " | |
| " ROUND(AVG(AVG(r.rating)) OVER (PARTITION BY p.category_id), 2) " | |
| " AS category_avg_rating " | |
| "FROM products p " | |
| "JOIN reviews r ON r.product_id = p.id " | |
| "GROUP BY p.id, p.name, p.category_id " | |
| "ORDER BY product_avg_rating DESC" | |
| ), | |
| notes="AVG of AVG via window PARTITION BY — requires nested aggregate understanding.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "Find all customers whose most recent order has status 'cancelled'. " | |
| "Use a CTE with ROW_NUMBER to identify the latest order per customer. " | |
| "Return columns: customer_name, last_order_status, last_order_date. " | |
| "Sort by customer_name ascending." | |
| ), | |
| sql=( | |
| "WITH ranked_orders AS ( " | |
| " SELECT customer_id, status, created_at, " | |
| " ROW_NUMBER() OVER (PARTITION BY customer_id " | |
| " ORDER BY created_at DESC) AS rn " | |
| " FROM orders " | |
| ") " | |
| "SELECT c.name AS customer_name, " | |
| " ro.status AS last_order_status, " | |
| " ro.created_at AS last_order_date " | |
| "FROM customers c " | |
| "JOIN ranked_orders ro ON ro.customer_id = c.id " | |
| "WHERE ro.rn = 1 " | |
| " AND ro.status = 'cancelled' " | |
| "ORDER BY customer_name ASC" | |
| ), | |
| notes="CTE + ROW_NUMBER window partitioned by customer_id.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "Show the monthly revenue from delivered orders and its running total, " | |
| "for all months in 2024. " | |
| "Return columns: month (format YYYY-MM), monthly_revenue, running_total. " | |
| "Round both revenue columns to 2 decimal places. " | |
| "Sort by month ascending." | |
| ), | |
| sql=( | |
| "WITH monthly AS ( " | |
| " SELECT strftime('%Y-%m', created_at) AS month, " | |
| " ROUND(SUM(total_amount), 2) AS monthly_revenue " | |
| " FROM orders " | |
| " WHERE status = 'delivered' " | |
| " AND created_at >= '2024-01-01' " | |
| " AND created_at < '2025-01-01' " | |
| " GROUP BY strftime('%Y-%m', created_at) " | |
| ") " | |
| "SELECT month, " | |
| " monthly_revenue, " | |
| " ROUND(SUM(monthly_revenue) OVER (ORDER BY month), 2) AS running_total " | |
| "FROM monthly " | |
| "ORDER BY month ASC" | |
| ), | |
| notes="CTE + cumulative SUM window ordered by month string.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "Find products whose average rating is strictly above the average " | |
| "rating of all products in their category. " | |
| "Return columns: product_name, category_name, " | |
| "product_avg_rating, category_avg_rating. " | |
| "Round both averages to 2 decimal places. " | |
| "Sort by product_avg_rating descending, then product_name ascending." | |
| ), | |
| sql=( | |
| "WITH product_ratings AS ( " | |
| " SELECT p.id AS product_id, p.name AS product_name, " | |
| " p.category_id, c.name AS category_name, " | |
| " ROUND(AVG(r.rating), 2) AS product_avg_rating " | |
| " FROM products p " | |
| " JOIN reviews r ON r.product_id = p.id " | |
| " JOIN categories c ON c.id = p.category_id " | |
| " GROUP BY p.id, p.name, p.category_id, c.name " | |
| "), " | |
| "category_ratings AS ( " | |
| " SELECT category_id, " | |
| " ROUND(AVG(product_avg_rating), 2) AS category_avg_rating " | |
| " FROM product_ratings " | |
| " GROUP BY category_id " | |
| ") " | |
| "SELECT pr.product_name, pr.category_name, " | |
| " pr.product_avg_rating, cr.category_avg_rating " | |
| "FROM product_ratings pr " | |
| "JOIN category_ratings cr ON cr.category_id = pr.category_id " | |
| "WHERE pr.product_avg_rating > cr.category_avg_rating " | |
| "ORDER BY pr.product_avg_rating DESC, pr.product_name ASC" | |
| ), | |
| notes="Two CTEs, correlated comparison between product and category averages.", | |
| ), | |
| ] | |
| def description(self) -> str: | |
| return ( | |
| "Advanced analytics queries using CTEs, window functions " | |
| "(DENSE_RANK, ROW_NUMBER, running SUM), and nested subqueries. " | |
| "Tests multi-step reasoning and SQLite-specific syntax." | |
| ) | |