Artsem Radzevich
commited on
Commit
·
1786917
1
Parent(s):
a4c3cf0
Additional fixes to work with files and excel files.
Browse files- agent_config.py +40 -2
- requirements.txt +3 -1
- tools/excel_analysis_tool.py +56 -60
agent_config.py
CHANGED
|
@@ -2,6 +2,10 @@ from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpage
|
|
| 2 |
from tools.wiki_tool import WikiTool
|
| 3 |
from tools.excel_analysis_tool import ExcelAnalysisTool
|
| 4 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
@tool
|
| 7 |
def check_answer(answer: str) -> str:
|
|
@@ -29,6 +33,40 @@ def reverse_sentence_tool(reverse_sentence: str) -> str:
|
|
| 29 |
|
| 30 |
return " ".join(correct_words)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
def create_agent():
|
| 33 |
"""
|
| 34 |
Creates and configures the CodeAgent with the necessary model and tools.
|
|
@@ -60,11 +98,11 @@ def create_agent():
|
|
| 60 |
# Create and return the agent
|
| 61 |
agent = CodeAgent(
|
| 62 |
model=model,
|
| 63 |
-
tools=[wiki_tool, web_search_tool, excel_analysis_tool, visitWebpageTool, python_interpreter_tool, check_answer, reverse_sentence_tool],
|
| 64 |
add_base_tools=True,
|
| 65 |
max_steps=8,
|
| 66 |
verbosity_level=2,
|
| 67 |
-
additional_authorized_imports=['pandas','
|
| 68 |
)
|
| 69 |
return agent
|
| 70 |
|
|
|
|
| 2 |
from tools.wiki_tool import WikiTool
|
| 3 |
from tools.excel_analysis_tool import ExcelAnalysisTool
|
| 4 |
import os
|
| 5 |
+
import requests
|
| 6 |
+
import re
|
| 7 |
+
import tempfile
|
| 8 |
+
from pathlib import Path
|
| 9 |
|
| 10 |
@tool
|
| 11 |
def check_answer(answer: str) -> str:
|
|
|
|
| 33 |
|
| 34 |
return " ".join(correct_words)
|
| 35 |
|
| 36 |
+
@tool
|
| 37 |
+
def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
|
| 38 |
+
"""
|
| 39 |
+
Try GET /files/{task_id}.
|
| 40 |
+
• On HTTP 200 → save to a temp dir and return local path.
|
| 41 |
+
• On 404 → return None.
|
| 42 |
+
• On other errors → raise so caller can log / handle.
|
| 43 |
+
"""
|
| 44 |
+
url = f"{base_api_url}/files/{task_id}"
|
| 45 |
+
try:
|
| 46 |
+
resp = requests.get(url, timeout=30)
|
| 47 |
+
if resp.status_code == 404:
|
| 48 |
+
return None # no file
|
| 49 |
+
resp.raise_for_status() # raise on 4xx/5xx ≠ 404
|
| 50 |
+
except requests.exceptions.HTTPError as e:
|
| 51 |
+
# propagate non-404 errors (403, 500, …)
|
| 52 |
+
raise e
|
| 53 |
+
|
| 54 |
+
# ▸ Save bytes to a named file inside the system temp dir
|
| 55 |
+
# Try to keep original extension from Content-Disposition if present.
|
| 56 |
+
cdisp = resp.headers.get("content-disposition", "")
|
| 57 |
+
filename = task_id # default base name
|
| 58 |
+
if "filename=" in cdisp:
|
| 59 |
+
m = re.search(r'filename="([^"]+)"', cdisp)
|
| 60 |
+
if m:
|
| 61 |
+
filename = m.group(1) # keep provided name
|
| 62 |
+
|
| 63 |
+
tmp_dir = Path(tempfile.gettempdir()) / "gaia_files"
|
| 64 |
+
tmp_dir.mkdir(exist_ok=True)
|
| 65 |
+
file_path = tmp_dir / filename
|
| 66 |
+
with open(file_path, "wb") as f:
|
| 67 |
+
f.write(resp.content)
|
| 68 |
+
return str(file_path)
|
| 69 |
+
|
| 70 |
def create_agent():
|
| 71 |
"""
|
| 72 |
Creates and configures the CodeAgent with the necessary model and tools.
|
|
|
|
| 98 |
# Create and return the agent
|
| 99 |
agent = CodeAgent(
|
| 100 |
model=model,
|
| 101 |
+
tools=[wiki_tool, web_search_tool, excel_analysis_tool, visitWebpageTool, python_interpreter_tool, check_answer, reverse_sentence_tool, download_file_if_any],
|
| 102 |
add_base_tools=True,
|
| 103 |
max_steps=8,
|
| 104 |
verbosity_level=2,
|
| 105 |
+
additional_authorized_imports=['numpy','subprocess', 're', 'pandas', 'requests', 'json', 'os', 'pathlib', 'tempfile'],
|
| 106 |
)
|
| 107 |
return agent
|
| 108 |
|
requirements.txt
CHANGED
|
@@ -4,4 +4,6 @@ wikipedia
|
|
| 4 |
pandas
|
| 5 |
openpyxl
|
| 6 |
huggingface_hub
|
| 7 |
-
smolagents[litellm]
|
|
|
|
|
|
|
|
|
| 4 |
pandas
|
| 5 |
openpyxl
|
| 6 |
huggingface_hub
|
| 7 |
+
smolagents[litellm]
|
| 8 |
+
tabulate
|
| 9 |
+
requests
|
tools/excel_analysis_tool.py
CHANGED
|
@@ -1,67 +1,63 @@
|
|
| 1 |
from smolagents.tools import Tool
|
| 2 |
import pandas as pd
|
|
|
|
|
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
|
| 5 |
class ExcelAnalysisTool(Tool):
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
return "Error: 'filename' is required."
|
| 41 |
|
| 42 |
try:
|
| 43 |
-
#
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
return "Error: The specified file was not found."
|
| 64 |
-
except KeyError as e:
|
| 65 |
-
return f"Error: Missing expected column in the Excel file: {str(e)}"
|
| 66 |
-
except Exception as e:
|
| 67 |
-
return f"An unexpected error occurred: {str(e)}"
|
|
|
|
| 1 |
from smolagents.tools import Tool
|
| 2 |
import pandas as pd
|
| 3 |
+
from tabulate import tabulate
|
| 4 |
+
from typing import Union, Optional
|
| 5 |
from huggingface_hub import hf_hub_download
|
| 6 |
|
| 7 |
class ExcelAnalysisTool(Tool):
|
| 8 |
+
"""Render an Excel worksheet as Markdown text."""
|
| 9 |
+
|
| 10 |
+
name = "excel_to_text"
|
| 11 |
+
description = (
|
| 12 |
+
"Read an Excel file and return a Markdown table of the requested sheet. "
|
| 13 |
+
"Accepts either the sheet name or the zero-based index."
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
inputs = {
|
| 17 |
+
"excel_path": {
|
| 18 |
+
"type": "string",
|
| 19 |
+
"description": "Path to the Excel file (.xlsx / .xls).",
|
| 20 |
+
},
|
| 21 |
+
"sheet_name": {
|
| 22 |
+
"type": "string",
|
| 23 |
+
"description": (
|
| 24 |
+
"Worksheet name or zero‑based index *as a string* (optional; default first sheet)."
|
| 25 |
+
),
|
| 26 |
+
"nullable": True,
|
| 27 |
+
},
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
output_type = "string"
|
| 31 |
+
|
| 32 |
+
def forward(
|
| 33 |
+
self,
|
| 34 |
+
excel_path: str,
|
| 35 |
+
sheet_name: Optional[str] = None,
|
| 36 |
+
) -> str:
|
| 37 |
+
"""Load *excel_path* and return the sheet as a Markdown table."""
|
| 38 |
+
|
| 39 |
+
path = pathlib.Path(excel_path).expanduser().resolve()
|
| 40 |
+
if not path.exists():
|
| 41 |
+
return f"Error: Excel file not found at {path}"
|
|
|
|
| 42 |
|
| 43 |
try:
|
| 44 |
+
# Interpret sheet identifier -----------------------------------
|
| 45 |
+
sheet: Union[str, int]
|
| 46 |
+
if sheet_name is None or sheet_name == "":
|
| 47 |
+
sheet = 0 # first sheet
|
| 48 |
+
else:
|
| 49 |
+
# If the user passed a numeric string (e.g. "1"), cast to int
|
| 50 |
+
sheet = int(sheet_name) if sheet_name.isdigit() else sheet_name
|
| 51 |
+
|
| 52 |
+
# Load worksheet ----------------------------------------------
|
| 53 |
+
df = pd.read_excel(path, sheet_name=sheet)
|
| 54 |
+
|
| 55 |
+
# Render to Markdown; fall back to tabulate if needed ---------
|
| 56 |
+
if hasattr(pd.DataFrame, "to_markdown"):
|
| 57 |
+
return df.to_markdown(index=False)
|
| 58 |
+
from tabulate import tabulate # pragma: no cover – fallback path
|
| 59 |
+
|
| 60 |
+
return tabulate(df, headers="keys", tablefmt="github", showindex=False)
|
| 61 |
+
|
| 62 |
+
except Exception as exc: # broad catch keeps the agent chat‑friendly
|
| 63 |
+
return f"Error reading Excel file: {exc}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|