File size: 4,846 Bytes
6518b31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
from typing import Optional, List, Dict, Any
from .base import BaseTask

class FinanceExplosionTask(BaseTask):
    @property
    def task_id(self) -> str:
        return "hard_finance_explosion"

    @property
    def name(self) -> str:
        return "Financial Cartesian Explosion Fix"

    @property
    def expected_output(self) -> List[Dict[str, Any]]:
        return [
            {"name": "Alice", "total_orders": 300.0, "total_payments": 300.0},
            {"name": "Bob", "total_orders": 50.0, "total_payments": 50.0}
        ]

    @property
    def difficulty(self) -> str:
        return "expert"

    @property
    def description(self) -> str:
        return (
            "A financial dashboard is reporting massive revenue discrepancies. "
            "The query calculates the total order amount and total payment amount for each user. "
            "However, due to a 'Cartesian Explosion' (Fan Trap) in the JOINs, users with multiple orders "
            "and payments are having their totals multiplied exponentially. "
            "Rewrite the query using Common Table Expressions (CTEs) or Subqueries to aggregate "
            "orders and payments separately *before* joining them to the users table."
        )

    @property
    def expected_output_description(self) -> str:
        return "A table with 'name', 'total_orders', and 'total_payments'. The totals must accurately reflect the sum of orders and payments without multiplication from joins."

    @property
    def schema_sql(self) -> str:
        return """
        CREATE TABLE users (
            user_id INTEGER PRIMARY KEY,
            name TEXT
        );
        CREATE TABLE orders (
            order_id INTEGER PRIMARY KEY,
            user_id INTEGER,
            order_amount DECIMAL(10,2)
        );
        CREATE TABLE payments (
            payment_id INTEGER PRIMARY KEY,
            user_id INTEGER,
            payment_amount DECIMAL(10,2)
        );
        """

    @property
    def seed_data_sql(self) -> str:
        return """
        INSERT INTO users (user_id, name) VALUES (1, 'Alice');
        INSERT INTO users (user_id, name) VALUES (2, 'Bob');
        
        -- Alice has 3 orders (Total: 300)
        INSERT INTO orders (order_id, user_id, order_amount) VALUES (101, 1, 100.00);
        INSERT INTO orders (order_id, user_id, order_amount) VALUES (102, 1, 100.00);
        INSERT INTO orders (order_id, user_id, order_amount) VALUES (103, 1, 100.00);
        
        -- Alice has 3 payments (Total: 300)
        INSERT INTO payments (payment_id, user_id, payment_amount) VALUES (201, 1, 100.00);
        INSERT INTO payments (payment_id, user_id, payment_amount) VALUES (202, 1, 100.00);
        INSERT INTO payments (payment_id, user_id, payment_amount) VALUES (203, 1, 100.00);

        -- Bob has 1 order and 1 payment
        INSERT INTO orders (order_id, user_id, order_amount) VALUES (104, 2, 50.00);
        INSERT INTO payments (payment_id, user_id, payment_amount) VALUES (204, 2, 50.00);
        """

    @property
    def broken_query(self) -> str:
        return """
        SELECT 
            u.name, 
            SUM(o.order_amount) as total_orders, 
            SUM(p.payment_amount) as total_payments
        FROM users u
        LEFT JOIN orders o ON u.user_id = o.user_id
        LEFT JOIN payments p ON u.user_id = p.user_id
        GROUP BY u.name
        ORDER BY u.name;
        """

    @property
    def max_steps(self) -> int:
        return 12

    @property
    def hint(self) -> str:
        return "Aggregate the 'orders' table by user_id in one CTE, and the 'payments' table in another CTE. Then join those aggregated CTEs to the users table."

    def grade(self, rows: Optional[List[Dict[str, Any]]]) -> float:
        if not rows:
            return 0.0

        try:
            # Expected exact answers based on seed data
            expected = {
                "Alice": {"total_orders": 300.0, "total_payments": 300.0},
                "Bob": {"total_orders": 50.0, "total_payments": 50.0}
            }
            
            if len(rows) != 2:
                return 0.1

            score = 0.5
            correct_users = 0

            for row in rows:
                name = row.get("name")
                if name in expected:
                    o_amt = float(row.get("total_orders", 0) or 0)
                    p_amt = float(row.get("total_payments", 0) or 0)
                    
                    if o_amt == expected[name]["total_orders"] and p_amt == expected[name]["total_payments"]:
                        correct_users += 1

            if correct_users == 2:
                return 1.0  # Perfect fix!
            elif correct_users == 1:
                return 0.7  # Partial logic fix

            return score

        except Exception:
            return 0.0