File size: 4,074 Bytes
aca1396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19b4563
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import re
import sqlite3
from pathlib import Path

import pandas as pd

from tasks.base_task import BaseTask

DB_PATH = Path(__file__).resolve().parent.parent / "datasets" / "store_data.db"


class SupplierProfitabilityTask(BaseTask):
    """Hard task: find the most profitable supplier using cross-source data.

    The agent must query the product_catalog table from the SQLite database,
    join it with the sales DataFrame on product_name, compute per-order profit
    and margin, then aggregate by supplier.
    The database is accessible via sqlite3.connect(db_path) in the sandbox.
    """

    @property
    def task_id(self) -> int:
        return 6

    @property
    def difficulty(self) -> str:
        return "hard"

    @property
    def description(self) -> str:
        return (
            "Using the product catalog database (connect with sqlite3.connect(db_path)), "
            "which supplier has the highest total profit from orders? "
            "(profit per order = (unit_price - cost_price) * quantity) "
            "What is their total profit and average profit margin? "
            "(margin % = (unit_price - cost_price) / unit_price * 100, "
            "averaged across all their orders) "
            "Round total profit to 2 decimal places and avg margin to 2 decimal places. "
            "Submit your answer in the format: "
            "'Supplier: <name>, Total profit: $X.XX, Avg margin: X.XX%'"
        )

    def _compute(self) -> tuple:
        """Compute the top supplier by profit and their average margin.

        Returns:
            A tuple of (supplier_name, total_profit, avg_margin_pct).
        """
        conn = sqlite3.connect(DB_PATH)
        catalog = pd.read_sql("SELECT product_name, supplier, cost_price FROM product_catalog", conn)
        conn.close()
        merged = self.df.merge(catalog, on="product_name", how="left")
        merged["profit"] = (merged["unit_price"] - merged["cost_price"]) * merged["quantity"]
        merged["margin"] = (merged["unit_price"] - merged["cost_price"]) / merged["unit_price"] * 100
        sup_profit = merged.groupby("supplier")["profit"].sum()
        sup_margin = merged.groupby("supplier")["margin"].mean()
        top = sup_profit.idxmax()
        return top, sup_profit[top], sup_margin[top]

    def expected_answer(self) -> str:
        """Compute the expected formatted answer.

        Returns:
            Formatted string like 'Supplier: FashionWorld, Total profit: $38292.08, Avg margin: 52.08%'.
        """
        top, profit, margin = self._compute()
        return f"Supplier: {top}, Total profit: ${profit:.2f}, Avg margin: {round(margin, 2)}%"

    def grade(self, answer: str) -> float:
        """Grade with partial credit for each of the three fields.

        Scoring:
            - 0.33 for correct supplier name (case-insensitive)
            - 0.34 for total profit within ±0.5% of expected
            - 0.33 for avg margin within ±0.1 of expected

        Args:
            answer: The agent's submitted answer string.

        Returns:
            A score between 0.0 and 1.0.
        """
        top, expected_profit, expected_margin = self._compute()
        score = 0.0

        sup_match = re.search(r"Supplier:\s*([^,]+)", answer, re.IGNORECASE)
        if sup_match and sup_match.group(1).strip().lower() == top.lower():
            score += 0.33

        profit_match = re.search(r"Total profit:\s*\$?([\d.]+)", answer, re.IGNORECASE)
        if profit_match:
            try:
                submitted = float(profit_match.group(1))
                if abs(submitted - expected_profit) <= expected_profit * 0.005:
                    score += 0.34
            except ValueError:
                pass

        margin_match = re.search(r"Avg margin:\s*([\d.]+)%?", answer, re.IGNORECASE)
        if margin_match:
            try:
                if abs(float(margin_match.group(1)) - expected_margin) <= 0.1:
                    score += 0.33
            except ValueError:
                pass

        return max(0.05, min(0.95, score))