# Exploration for Agent Specialized on Data Analysis

In [None]:
%pip install numpy pandas tabulate matplotlib seaborn openai smolagents

In [None]:
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
from smolagents import CodeAgent, OpenAIServerModel, Tool
import requests
from urllib.parse import urljoin
import base64
import tempfile


class GetAttachmentTool(Tool):
    name = "get_attachment"
    description = """Retrieves attachment for current task in data url format."""
    inputs = {
        "fmt": {
            "type": "string",
            "description": "Retrieves the attachment associated with the agent task in the specified format. Options are: URL, DATA_URL, LOCAL_FILE_PATH, TEXT. URL returns the URL of the file, DATA_URL returns a base64 encoded data URL, LOCAL_FILE_PATH returns a local file path to the downloaded file, and TEXT returns the content of the file as text.",
            "nullable": True,
            "default": "URL",
        }
    }
    output_type = "string"

    def __init__(
        self,
        agent_evaluation_api: str | None = None,
        task_id: str | None = None,
        **kwargs,
    ):
        self.agent_evaluation_api = (
            agent_evaluation_api
            if agent_evaluation_api is not None
            else "https://agents-course-unit4-scoring.hf.space/"
        )
        self.task_id = task_id
        super().__init__(**kwargs)

    def attachment_for(self, task_id: str | None):
        self.task_id = task_id

    def forward(self, fmt: str = "URL") -> str:
        fmt = fmt.upper()
        assert fmt in ["URL", "DATA_URL", "LOCAL_FILE_PATH", "TEXT"]

        if not self.task_id:
            return ""

        file_url = urljoin(self.agent_evaluation_api, f"files/{self.task_id}")
        if fmt == "URL":
            return file_url

        response = requests.get(
            file_url,
            headers={
                "Content-Type": "application/json",
                "Accept": "application/json",
            },
        )
        response.raise_for_status()
        mime = response.headers.get("content-type", "text/plain")
        if fmt == "TEXT":
            if mime.startswith("text/"):
                return response.text
            else:
                raise ValueError(
                    f"Content of file type {mime} cannot be retrieved as TEXT."
                )
        elif fmt == "DATA_URL":
            return f"data:{mime};base64,{base64.b64encode(response.content).decode('utf-8')}"
        elif fmt == "LOCAL_FILE_PATH":
            with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
                tmp_file.write(response.content)
                return tmp_file.name
        else:
            raise ValueError(
                f"Unsupported format: {fmt}. Supported formats are URL, DATA_URL, LOCAL_FILE_PATH, and TEXT."
            )


from smolagents import AgentLogger
from rich.console import Console

model = OpenAIServerModel(model_id="gpt-4.1")
console = Console(record=True)
logger = AgentLogger(level=2, console=console)
steps = []


def capture_step_log(agent) -> None:
    steps.append(console.export_text(clear=True))


agent = CodeAgent(
    name="data_analyst",
    description="Data analyst with advanced skills in statistic, handling tabular data and related Python packages.",
    tools=[
        GetAttachmentTool(task_id="7bd855d8-463d-4ed5-93ca-5fe35145f733"),
    ],
    additional_authorized_imports=[
        "numpy",
        "pandas",
        "tabulate",
        "matplotlib",
        "seaborn",
    ],
    model=model,
    logger=logger,
    step_callbacks=[capture_step_log],
)

In [35]:
agent.run(
    "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places."
)

'$89706.00'

In [38]:
for s in steps:
    print(s)

╭──────────────────────────────────────────── New run - data_analyst ─────────────────────────────────────────────╮
│                                                                                                                 │
│ The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales │
│ that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.       │
│                                                                                                                 │
╰─ OpenAIServerModel - gpt-4.1 ───────────────────────────────────────────────────────────────────────────────────╯
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Output message of the LLM: ────────────────────────────────────────────────────────────────────────────────────────
Thought: I need to first retrieve and review the contents of the attache