File size: 4,596 Bytes
a39d8ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""
nl2sql-bench/server/tasks/medium.py
=====================================
Task 2 — Join & Aggregation  (difficulty: medium)

Questions require at least one JOIN and GROUP BY / HAVING.
Expect most frontier models to succeed in 2–3 steps.
"""

from __future__ import annotations

from .base import BaseTask, TaskExample, register


@register
class JoinAggregationTask(BaseTask):
    name = "join-aggregation"
    difficulty = "medium"

    examples = [
        TaskExample(
            question=(
                "How many orders has each customer placed? "
                "Return columns: customer_name, order_count. "
                "Include customers with zero orders. "
                "Sort by order_count descending, then customer_name ascending."
            ),
            sql=(
                "SELECT c.name AS customer_name, COUNT(o.id) AS order_count "
                "FROM customers c "
                "LEFT JOIN orders o ON c.id = o.customer_id "
                "GROUP BY c.id, c.name "
                "ORDER BY order_count DESC, customer_name ASC"
            ),
            notes="LEFT JOIN to include zero-order customers, COUNT aggregate.",
        ),
        TaskExample(
            question=(
                "What is the average product rating per category? "
                "Only include categories that have at least one review. "
                "Return columns: category_name, avg_rating. "
                "Round avg_rating to 2 decimal places. "
                "Sort by avg_rating descending."
            ),
            sql=(
                "SELECT c.name AS category_name, "
                "       ROUND(AVG(r.rating), 2) AS avg_rating "
                "FROM categories c "
                "JOIN products p ON p.category_id = c.id "
                "JOIN reviews r ON r.product_id = p.id "
                "GROUP BY c.id, c.name "
                "ORDER BY avg_rating DESC"
            ),
            notes="Two JOINs, AVG aggregate, ROUND function.",
        ),
        TaskExample(
            question=(
                "Which categories have more than 5 products in stock "
                "(i.e., stock_quantity > 0)? "
                "Return columns: category_name, in_stock_count. "
                "Sort by in_stock_count descending."
            ),
            sql=(
                "SELECT c.name AS category_name, "
                "       COUNT(p.id) AS in_stock_count "
                "FROM categories c "
                "JOIN products p ON p.category_id = c.id "
                "WHERE p.stock_quantity > 0 "
                "GROUP BY c.id, c.name "
                "HAVING COUNT(p.id) > 5 "
                "ORDER BY in_stock_count DESC"
            ),
            notes="WHERE before GROUP BY, HAVING filter on aggregate.",
        ),
        TaskExample(
            question=(
                "Which customers have spent more than $500 total on delivered orders? "
                "Return columns: customer_name, total_spent. "
                "Round total_spent to 2 decimal places. "
                "Sort by total_spent descending."
            ),
            sql=(
                "SELECT c.name AS customer_name, "
                "       ROUND(SUM(o.total_amount), 2) AS total_spent "
                "FROM customers c "
                "JOIN orders o ON o.customer_id = c.id "
                "WHERE o.status = 'delivered' "
                "GROUP BY c.id, c.name "
                "HAVING SUM(o.total_amount) > 500 "
                "ORDER BY total_spent DESC"
            ),
            notes="SUM aggregate, HAVING on SUM, status filter.",
        ),
        TaskExample(
            question=(
                "Show the total quantity sold for each product. "
                "Only include products that appear in at least one order item. "
                "Return columns: product_name, total_quantity_sold. "
                "Sort by total_quantity_sold descending."
            ),
            sql=(
                "SELECT p.name AS product_name, "
                "       SUM(oi.quantity) AS total_quantity_sold "
                "FROM products p "
                "JOIN order_items oi ON oi.product_id = p.id "
                "GROUP BY p.id, p.name "
                "ORDER BY total_quantity_sold DESC"
            ),
            notes="JOIN on order_items, SUM aggregate.",
        ),
    ]

    def description(self) -> str:
        return (
            "Multi-table JOIN queries with GROUP BY, HAVING, and aggregation "
            "functions (COUNT, SUM, AVG, ROUND). Tests relational reasoning."
        )