cpatino10 commited on
Commit
435a561
·
verified ·
1 Parent(s): 69ea7bd

Update tools.py

Browse files

added logic to handle different file types

Files changed (1) hide show
  1. tools.py +30 -10
tools.py CHANGED
@@ -1,5 +1,7 @@
1
  from smolagents import tool, DuckDuckGoSearchTool, VisitWebpageTool
2
  import os
 
 
3
 
4
  # Instantiate built-in tools
5
  search_tool = DuckDuckGoSearchTool()
@@ -7,22 +9,40 @@ visit_webpage = VisitWebpageTool()
7
 
8
  # Custom tool for GAIA files
9
  @tool
10
- def hanlde_file(file_path: str) -> str:
11
  """
12
- A tool that reads the content of a file provided in a GAIA task.
13
- Supports .txt files.
14
-
15
  Args:
16
- file_path: The local path to the file to be read.
17
  """
18
  if not os.path.exists(file_path):
19
- return f"Error: FIle {file_path} not found"
20
-
 
 
21
  try:
22
- with open(file_path, 'r', encoding='utf-8') as f:
23
- return f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
- return f"Error reading file: {str(e)}"
26
 
27
  # all tools in a list ready for export
28
  all_tools = [search_tool, visit_webpage, hanlde_file]
 
1
  from smolagents import tool, DuckDuckGoSearchTool, VisitWebpageTool
2
  import os
3
+ import pandas as pd
4
+ from pypdf import PdfReader
5
 
6
  # Instantiate built-in tools
7
  search_tool = DuckDuckGoSearchTool()
 
9
 
10
  # Custom tool for GAIA files
11
  @tool
12
+ def handle_file(file_path: str) -> str:
13
  """
14
+ This tool extracts content from different file types (PDF, Excel, CSV, TXT).
15
+
 
16
  Args:
17
+ file_path: The local path to the file.
18
  """
19
  if not os.path.exists(file_path):
20
+ return f"Error: File {file_path} not found."
21
+
22
+ ext = os.path.splitext(file_path)[1].lower()
23
+
24
  try:
25
+ # Handle Excel
26
+ if ext in ['.xlsx', '.xls', '.csv']:
27
+ df = pd.read_csv(file_path) if ext == '.csv' else pd.read_excel(file_path)
28
+ # We return a markdown version of the head and info to save tokens
29
+ return f"Dataframe Summary:\n{df.head(10).to_markdown()}\n\nFull shape: {df.shape}"
30
+
31
+ # Handle PDFs
32
+ elif ext == '.pdf':
33
+ reader = PdfReader(file_path)
34
+ text = ""
35
+ for page in reader.pages:
36
+ text += page.extract_text() + "\n"
37
+ return text[:10000] # Cap text to avoid context window issues
38
+
39
+ # Handle Text files
40
+ else:
41
+ with open(file_path, 'r', encoding='utf-8') as f:
42
+ return f.read()
43
+
44
  except Exception as e:
45
+ return f"Error processing {ext} file: {str(e)}"
46
 
47
  # all tools in a list ready for export
48
  all_tools = [search_tool, visit_webpage, hanlde_file]