File size: 3,548 Bytes
aca1396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19b4563
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import re
import sqlite3
from pathlib import Path

import pandas as pd

from tasks.base_task import BaseTask

DB_PATH = Path(__file__).resolve().parent.parent / "datasets" / "store_data.db"


class CustomerLoyaltyRevenueTask(BaseTask):
    """Hard task: find the highest-revenue customer loyalty tier using cross-source data.

    The agent must query the customer_profiles table from the SQLite database,
    join it with the sales DataFrame on customer_id, and compute revenue by tier.
    The database is accessible via sqlite3.connect(db_path) in the sandbox.
    """

    @property
    def task_id(self) -> int:
        return 5

    @property
    def difficulty(self) -> str:
        return "hard"

    @property
    def description(self) -> str:
        return (
            "Using the customer profiles database (connect with sqlite3.connect(db_path)), "
            "which customer loyalty tier generates the highest total revenue? "
            "What percentage of total revenue does it represent? "
            "Round percentage to 2 decimal places. "
            "Submit your answer in the format: "
            "'Top tier: <name>, Revenue: $X.XX, Percentage: X.XX%'"
        )

    def _compute(self) -> tuple:
        """Compute the top loyalty tier and its revenue share.

        Returns:
            A tuple of (top_tier, tier_revenue, percentage).
        """
        conn = sqlite3.connect(DB_PATH)
        profiles = pd.read_sql("SELECT customer_id, loyalty_tier FROM customer_profiles", conn)
        conn.close()
        merged = self.df.merge(profiles, on="customer_id", how="left")
        tier_rev = merged.groupby("loyalty_tier")["total_price"].sum()
        total = merged["total_price"].sum()
        top = tier_rev.idxmax()
        rev = tier_rev[top]
        pct = rev / total * 100
        return top, rev, pct

    def expected_answer(self) -> str:
        """Compute the expected formatted answer.

        Returns:
            Formatted string like 'Top tier: Bronze, Revenue: $97210.91, Percentage: 39.28%'.
        """
        top, rev, pct = self._compute()
        return f"Top tier: {top}, Revenue: ${rev:.2f}, Percentage: {round(pct, 2)}%"

    def grade(self, answer: str) -> float:
        """Grade with partial credit for each of the three fields.

        Scoring:
            - 0.33 for correct tier name (case-insensitive)
            - 0.33 for revenue within ±0.5% of expected
            - 0.34 for percentage within ±0.1 of expected

        Args:
            answer: The agent's submitted answer string.

        Returns:
            A score between 0.0 and 1.0.
        """
        top, expected_rev, expected_pct = self._compute()
        score = 0.0

        tier_match = re.search(r"Top tier:\s*([^,]+)", answer, re.IGNORECASE)
        if tier_match and tier_match.group(1).strip().lower() == top.lower():
            score += 0.33

        rev_match = re.search(r"Revenue:\s*\$?([\d.]+)", answer, re.IGNORECASE)
        if rev_match:
            try:
                submitted = float(rev_match.group(1))
                if abs(submitted - expected_rev) <= expected_rev * 0.005:
                    score += 0.33
            except ValueError:
                pass

        pct_match = re.search(r"Percentage:\s*([\d.]+)%?", answer, re.IGNORECASE)
        if pct_match:
            try:
                if abs(float(pct_match.group(1)) - expected_pct) <= 0.1:
                    score += 0.34
            except ValueError:
                pass

        return max(0.05, min(0.95, score))