Spaces:

Isics
/

agents_gaia

Runtime error

File size: 3,859 Bytes

32844c7

import json

import pandas as pd
import pypdf
import yaml
from smolagents import CodeAgent, Model, tool

from config import authorized_libraries


@tool
def read_yaml(path: str) -> str:
    """
    Reads a YAML file and returns the contents as a dictionary parsed as a string.
    Args:
        path (str): path to YAML file.

    Returns:
        str: contents of YAML file.

    Example:
        >>> result = read_yaml("path/to/file.yaml")
    """
    with open(path, 'r') as f:
        return yaml.load(f, Loader=yaml.FullLoader)


@tool
def read_json(path: str) -> str:
    """
    Reads a JSON file and returns the contents as a dictionary parsed as a string.
    Args:
        path (str): path to JSON file.

    Returns:
        str: contents of JSON file.

    Example:
        >>> result = read_json("path/to/file.json")
    """
    with open(path, 'r') as f:
        return json.load(f)


@tool
def read_txt(path: str) -> str:
    """
    Reads a txt file and returns the contents as a string.
    Args:
        path (str): path to a text file.

    Returns:
        str: contents of the text file.

    Example:
        >>> result = read_yaml("path/to/textfile.text")
    """
    with open(path, 'r') as f:
        return f.read()


@tool
def read_csv(path: str) -> str:
    """
    Reads a CSV file and returns its content formatted as a markdown table.
    Useful for understanding the structure and data of comma-separated files.

    Args:
        path (str): path to the CSV file (e.g., 'data.csv').

    Returns:
        str: The content of the CSV as a markdown string.
    """
    try:
        df = pd.read_csv(path)
        return df.to_markdown(index=False)
    except Exception as e:
        return f"Error reading CSV: {str(e)}"


@tool
def read_excel(path: str) -> str:
    """
    Reads the first sheet of an Excel file and returns its content as a markdown table.

    Args:
        path (str): path to the .xlsx file.

    Returns:
        str: The content of the first sheet as a markdown string.
    """
    try:
        df = pd.read_excel(path, engine='openpyxl')
        return df.to_markdown(index=False)
    except Exception as e:
        return f"Error reading Excel: {str(e)}"


@tool
def read_pdf(path: str) -> str:
    """
    Extracts text from a PDF file.

    Args:
        path (str): path to the PDF file.

    Returns:
        str: The raw text content extracted from the PDF pages.
    """
    try:
        reader = pypdf.PdfReader(path)
        text_content = []
        for i, page in enumerate(reader.pages):
            text = page.extract_text()
            if text:
                text_content.append(f"--- Page {i + 1} ---\n{text}")

        return "\n".join(text_content)
    except Exception as e:
        return f"Error reading PDF: {str(e)}"


@tool
def inspect_csv(path: str) -> str:
    """
    Reads the first 5 rows and the columns of a CSV file.
    Use this to understand the data structure before writing code to process the full file.

    Args:
        path (str): path to the CSV file.
    """
    try:
        df = pd.read_csv(path)
        info = f"Columns: {list(df.columns)}\n"
        info += f"Total Rows: {len(df)}\n\n"
        info += "First 5 rows:\n"
        info += df.head(5).to_markdown(index=False)
        return info
    except Exception as e:
        return f"Error inspecting CSV: {str(e)}"

def create_file_reader(model: Model) -> CodeAgent:
    return CodeAgent(
        model=model,
        tools=[
            read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel
        ],
        add_base_tools=True,
        additional_authorized_imports=authorized_libraries,
        name="files_manager",
        description="Reads a file and returns the contents as a string, multiple formats accepted.",
        verbosity_level=0,
        max_steps=8,
    )