GaiaAgent_Final_Assignment

Sleeping

App Files Files Community

Francesco-A commited on 29 days ago

Commit

856f7b8

verified ·

1 Parent(s): 021cedf

Upload 2 files

Browse files

Files changed (2) hide show

tools/files_to_dict.py +62 -0
tools/files_to_text.py +74 -0

tools/files_to_dict.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from smolagents import tool
+import pandas as pd
+import pymupdf
+@tool
+def csv_to_dict(csv_file_path: str) -> str:
+    """
+    Reads a CSV file from the given path and returns:
+        - the data as a list of dictionaries,
+        - the list of column names,
+        - a basic descriptive summary of numeric columns.
+    Args:
+        csv_file_path (str): Path to the CSV file.
+    Returns:
+        str: A dictionary-like structure containing:
+             "data", "columns", and "describe".
+    """
+    try:
+        df = pd.read_csv(csv_file_path)
+        output = {
+            "columns" : df.columns.tolist(),
+            "describe": df.describe(include="all",percentiles=[.5]).to_dict(),
+            "data"    : df.to_dict(orient="records")
+        }
+        return output
+    except FileNotFoundError:
+        return f"Error: The file at '{csv_file_path}' was not found."
+    except Exception as e:
+        return f"An error occurred: {e}"
+@tool
+def excel_to_dict(xlsx_file_path: str) -> str:
+    """
+    Reads an Excel (xlsx) file from the given path and returns:
+        - the data as a list of dictionaries,
+        - the list of column names,
+        - a basic descriptive summary of numeric columns.
+    Args:
+        xlsx_file_path (str): Path to the Excel file.
+    Returns:
+        str: A dictionary-like structure containing:
+             "data", "columns", and "describe".
+    """
+    try:
+        df = pd.read_excel(xlsx_file_path)
+        output = {
+            "columns" : df.columns.tolist(),
+            "describe": df.describe(include="all",percentiles=[.5]).to_dict(),
+            "data"    : df.to_dict(orient="records")
+        }
+        return output
+    except FileNotFoundError:
+        return f"Error: The file at '{xlsx_file_path}' was not found."
+    except Exception as e:
+        return f"An error occurred: {e}"

tools/files_to_text.py ADDED Viewed

	@@ -0,0 +1,74 @@

+@tool
+def image_to_text(image_path: str) -> str:
+    """
+    Extract text from an image using pytesseract (if available).
+    Args:
+        image_path: Path to the image file
+    Returns:
+        Extracted text or error message
+    """
+    try:
+      import pytesseract
+      from PIL import Image
+      # Open the image using PIL
+      img = Image.open(image_path)
+      # Use pytesseract to extract text from the image
+      extracted_text = pytesseract.image_to_string(img)
+      return f"Extracted text from image: {extracted_text}"
+    except ImportError:
+        return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"
+@tool
+def pdf_to_text(pdf_file_path: str) -> str:
+    """
+    Reads a PDF file from the given path and returns its content as text.
+    Args:
+        pdf_file_path (str): The path to the PDF file.
+    Returns:
+        str: The text content of the PDF.
+    """
+    try:
+      doc = pymupdf.open(pdf_file_path)
+      text = ""
+      for page in doc:
+          text += page.get_text("text")
+          text += "\n"
+      return text
+    except FileNotFoundError:
+        return f"Error: The file at '{pdf_file_path}' was not found."
+    except Exception as e:
+        return f"An error occurred: {e}"
+@tool
+def text_file_to_string(path: str) -> str:
+    """
+    Reads any plain text file and returns its content as a string.
+    Args:
+        path (str): The path to the text file.
+    Works for:
+    - .txt
+    - .md
+    - .json / .jsonl
+    - .html
+    - .csv (as raw text)
+    - any UTF-8 or ASCII compatible text file
+    If the file contains binary data, the returned string may be partially decoded.
+    """
+    try:
+      with open(path, "r", encoding="utf-8", errors="ignore") as f:
+        content = f.read()
+      return content
+    except FileNotFoundError:
+      return f"Error: The file at '{path}' was not found."
+    except Exception as e:
+      return f"An error occurred: {e}"