Final_Assignment_Template

Sleeping

App Files Files Community

cpatino10 commited on Jan 21

Commit

435a561

verified ·

1 Parent(s): 69ea7bd

Update tools.py

Browse files

added logic to handle different file types

Files changed (1) hide show

tools.py +30 -10

tools.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from smolagents import tool, DuckDuckGoSearchTool, VisitWebpageTool
 import os
 # Instantiate built-in tools
 search_tool = DuckDuckGoSearchTool()
@@ -7,22 +9,40 @@ visit_webpage = VisitWebpageTool()
 # Custom tool for GAIA files
 @tool
-def hanlde_file(file_path: str) -> str:
     """
-    A tool that reads the content of a file provided in a GAIA task.
-    Supports .txt files.
     Args:
-        file_path: The local path to the file to be read.
     """
     if not os.path.exists(file_path):
-        return f"Error: FIle {file_path} not found"
     try:
-        with open(file_path, 'r', encoding='utf-8') as f:
-            return f.read()
     except Exception as e:
-        return f"Error reading file: {str(e)}"
 # all tools in a list ready for export
 all_tools = [search_tool, visit_webpage, hanlde_file]

 from smolagents import tool, DuckDuckGoSearchTool, VisitWebpageTool
 import os
+import pandas as pd
+from pypdf import PdfReader
 # Instantiate built-in tools
 search_tool = DuckDuckGoSearchTool()
 # Custom tool for GAIA files
 @tool
+def handle_file(file_path: str) -> str:
     """
+    This tool extracts content from different file types (PDF, Excel, CSV, TXT).
     Args:
+        file_path: The local path to the file.
     """
     if not os.path.exists(file_path):
+        return f"Error: File {file_path} not found."
+    ext = os.path.splitext(file_path)[1].lower()
     try:
+        # Handle Excel
+        if ext in ['.xlsx', '.xls', '.csv']:
+            df = pd.read_csv(file_path) if ext == '.csv' else pd.read_excel(file_path)
+            # We return a markdown version of the head and info to save tokens
+            return f"Dataframe Summary:\n{df.head(10).to_markdown()}\n\nFull shape: {df.shape}"
+        # Handle PDFs
+        elif ext == '.pdf':
+            reader = PdfReader(file_path)
+            text = ""
+            for page in reader.pages:
+                text += page.extract_text() + "\n"
+            return text[:10000] # Cap text to avoid context window issues
+        # Handle Text files
+        else:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return f.read()
     except Exception as e:
+        return f"Error processing {ext} file: {str(e)}"
 # all tools in a list ready for export
 all_tools = [search_tool, visit_webpage, hanlde_file]