Artsem Radzevich commited on
Commit
1786917
·
1 Parent(s): a4c3cf0

Additional fixes to work with files and excel files.

Browse files
Files changed (3) hide show
  1. agent_config.py +40 -2
  2. requirements.txt +3 -1
  3. tools/excel_analysis_tool.py +56 -60
agent_config.py CHANGED
@@ -2,6 +2,10 @@ from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, VisitWebpage
2
  from tools.wiki_tool import WikiTool
3
  from tools.excel_analysis_tool import ExcelAnalysisTool
4
  import os
 
 
 
 
5
 
6
  @tool
7
  def check_answer(answer: str) -> str:
@@ -29,6 +33,40 @@ def reverse_sentence_tool(reverse_sentence: str) -> str:
29
 
30
  return " ".join(correct_words)
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def create_agent():
33
  """
34
  Creates and configures the CodeAgent with the necessary model and tools.
@@ -60,11 +98,11 @@ def create_agent():
60
  # Create and return the agent
61
  agent = CodeAgent(
62
  model=model,
63
- tools=[wiki_tool, web_search_tool, excel_analysis_tool, visitWebpageTool, python_interpreter_tool, check_answer, reverse_sentence_tool],
64
  add_base_tools=True,
65
  max_steps=8,
66
  verbosity_level=2,
67
- additional_authorized_imports=['pandas','numpy','csv','subprocess']
68
  )
69
  return agent
70
 
 
2
  from tools.wiki_tool import WikiTool
3
  from tools.excel_analysis_tool import ExcelAnalysisTool
4
  import os
5
+ import requests
6
+ import re
7
+ import tempfile
8
+ from pathlib import Path
9
 
10
  @tool
11
  def check_answer(answer: str) -> str:
 
33
 
34
  return " ".join(correct_words)
35
 
36
+ @tool
37
+ def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
38
+ """
39
+ Try GET /files/{task_id}.
40
+ • On HTTP 200 → save to a temp dir and return local path.
41
+ • On 404 → return None.
42
+ • On other errors → raise so caller can log / handle.
43
+ """
44
+ url = f"{base_api_url}/files/{task_id}"
45
+ try:
46
+ resp = requests.get(url, timeout=30)
47
+ if resp.status_code == 404:
48
+ return None # no file
49
+ resp.raise_for_status() # raise on 4xx/5xx ≠ 404
50
+ except requests.exceptions.HTTPError as e:
51
+ # propagate non-404 errors (403, 500, …)
52
+ raise e
53
+
54
+ # ▸ Save bytes to a named file inside the system temp dir
55
+ # Try to keep original extension from Content-Disposition if present.
56
+ cdisp = resp.headers.get("content-disposition", "")
57
+ filename = task_id # default base name
58
+ if "filename=" in cdisp:
59
+ m = re.search(r'filename="([^"]+)"', cdisp)
60
+ if m:
61
+ filename = m.group(1) # keep provided name
62
+
63
+ tmp_dir = Path(tempfile.gettempdir()) / "gaia_files"
64
+ tmp_dir.mkdir(exist_ok=True)
65
+ file_path = tmp_dir / filename
66
+ with open(file_path, "wb") as f:
67
+ f.write(resp.content)
68
+ return str(file_path)
69
+
70
  def create_agent():
71
  """
72
  Creates and configures the CodeAgent with the necessary model and tools.
 
98
  # Create and return the agent
99
  agent = CodeAgent(
100
  model=model,
101
+ tools=[wiki_tool, web_search_tool, excel_analysis_tool, visitWebpageTool, python_interpreter_tool, check_answer, reverse_sentence_tool, download_file_if_any],
102
  add_base_tools=True,
103
  max_steps=8,
104
  verbosity_level=2,
105
+ additional_authorized_imports=['numpy','subprocess', 're', 'pandas', 'requests', 'json', 'os', 'pathlib', 'tempfile'],
106
  )
107
  return agent
108
 
requirements.txt CHANGED
@@ -4,4 +4,6 @@ wikipedia
4
  pandas
5
  openpyxl
6
  huggingface_hub
7
- smolagents[litellm]
 
 
 
4
  pandas
5
  openpyxl
6
  huggingface_hub
7
+ smolagents[litellm]
8
+ tabulate
9
+ requests
tools/excel_analysis_tool.py CHANGED
@@ -1,67 +1,63 @@
1
  from smolagents.tools import Tool
2
  import pandas as pd
 
 
3
  from huggingface_hub import hf_hub_download
4
 
5
  class ExcelAnalysisTool(Tool):
6
- def __init__(self):
7
- self.name = "ExcelAnalysisTool"
8
- self.description = (
9
- "Loads an Excel file from the GAIA dataset on Hugging Face and calculates "
10
- "the total sales for items labeled as 'food', excluding drinks. "
11
- "Provide input as a string with the filename, e.g., 'sales_data.xlsx'."
12
- )
13
- self.inputs = {
14
- "filename": {
15
- "type": "string",
16
- "description": "The name of the Excel file to process (e.g., 'sales_data.xlsx').",
17
- "nullable": True # Сделать filename необязательным
18
- }
19
- }
20
- self.output_type = "string"
21
- self.repo_id = "gaia-benchmark/GAIA"
22
- super().__init__(
23
- name=self.name,
24
- description=self.description,
25
- inputs=self.inputs,
26
- output_type=self.output_type
27
- )
28
-
29
- def forward(self, filename: str = "") -> str:
30
- """
31
- Loads and processes the Excel file.
32
-
33
- Args:
34
- filename (str): The name of the Excel file (e.g., 'sales_data.xlsx').
35
-
36
- Returns:
37
- str: Total food sales in USD, or an error message.
38
- """
39
- if not filename:
40
- return "Error: 'filename' is required."
41
 
42
  try:
43
- # Download the file from Hugging Face Hub
44
- file_path = hf_hub_download(
45
- repo_id=self.repo_id,
46
- filename=filename,
47
- repo_type="dataset"
48
- )
49
-
50
- # Load the Excel file into a DataFrame
51
- df = pd.read_excel(file_path)
52
-
53
- # Filter rows: category == 'food' and item != 'drinks'
54
- food_sales = df[
55
- (df['category'].str.lower() == 'food') &
56
- (df['item'].str.lower() != 'drinks')
57
- ]
58
-
59
- total_sales = food_sales['sales'].sum()
60
- return f"Total sales for food items: ${total_sales:.2f}"
61
-
62
- except FileNotFoundError:
63
- return "Error: The specified file was not found."
64
- except KeyError as e:
65
- return f"Error: Missing expected column in the Excel file: {str(e)}"
66
- except Exception as e:
67
- return f"An unexpected error occurred: {str(e)}"
 
1
  from smolagents.tools import Tool
2
  import pandas as pd
3
+ from tabulate import tabulate
4
+ from typing import Union, Optional
5
  from huggingface_hub import hf_hub_download
6
 
7
  class ExcelAnalysisTool(Tool):
8
+ """Render an Excel worksheet as Markdown text."""
9
+
10
+ name = "excel_to_text"
11
+ description = (
12
+ "Read an Excel file and return a Markdown table of the requested sheet. "
13
+ "Accepts either the sheet name or the zero-based index."
14
+ )
15
+
16
+ inputs = {
17
+ "excel_path": {
18
+ "type": "string",
19
+ "description": "Path to the Excel file (.xlsx / .xls).",
20
+ },
21
+ "sheet_name": {
22
+ "type": "string",
23
+ "description": (
24
+ "Worksheet name or zero‑based index *as a string* (optional; default first sheet)."
25
+ ),
26
+ "nullable": True,
27
+ },
28
+ }
29
+
30
+ output_type = "string"
31
+
32
+ def forward(
33
+ self,
34
+ excel_path: str,
35
+ sheet_name: Optional[str] = None,
36
+ ) -> str:
37
+ """Load *excel_path* and return the sheet as a Markdown table."""
38
+
39
+ path = pathlib.Path(excel_path).expanduser().resolve()
40
+ if not path.exists():
41
+ return f"Error: Excel file not found at {path}"
 
42
 
43
  try:
44
+ # Interpret sheet identifier -----------------------------------
45
+ sheet: Union[str, int]
46
+ if sheet_name is None or sheet_name == "":
47
+ sheet = 0 # first sheet
48
+ else:
49
+ # If the user passed a numeric string (e.g. "1"), cast to int
50
+ sheet = int(sheet_name) if sheet_name.isdigit() else sheet_name
51
+
52
+ # Load worksheet ----------------------------------------------
53
+ df = pd.read_excel(path, sheet_name=sheet)
54
+
55
+ # Render to Markdown; fall back to tabulate if needed ---------
56
+ if hasattr(pd.DataFrame, "to_markdown"):
57
+ return df.to_markdown(index=False)
58
+ from tabulate import tabulate # pragma: no cover – fallback path
59
+
60
+ return tabulate(df, headers="keys", tablefmt="github", showindex=False)
61
+
62
+ except Exception as exc: # broad catch keeps the agent chat‑friendly
63
+ return f"Error reading Excel file: {exc}"