File size: 3,859 Bytes
32844c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import json

import pandas as pd
import pypdf
import yaml
from smolagents import CodeAgent, Model, tool

from config import authorized_libraries


@tool
def read_yaml(path: str) -> str:
    """
    Reads a YAML file and returns the contents as a dictionary parsed as a string.
    Args:
        path (str): path to YAML file.

    Returns:
        str: contents of YAML file.

    Example:
        >>> result = read_yaml("path/to/file.yaml")
    """
    with open(path, 'r') as f:
        return yaml.load(f, Loader=yaml.FullLoader)


@tool
def read_json(path: str) -> str:
    """
    Reads a JSON file and returns the contents as a dictionary parsed as a string.
    Args:
        path (str): path to JSON file.

    Returns:
        str: contents of JSON file.

    Example:
        >>> result = read_json("path/to/file.json")
    """
    with open(path, 'r') as f:
        return json.load(f)


@tool
def read_txt(path: str) -> str:
    """
    Reads a txt file and returns the contents as a string.
    Args:
        path (str): path to a text file.

    Returns:
        str: contents of the text file.

    Example:
        >>> result = read_yaml("path/to/textfile.text")
    """
    with open(path, 'r') as f:
        return f.read()


@tool
def read_csv(path: str) -> str:
    """
    Reads a CSV file and returns its content formatted as a markdown table.
    Useful for understanding the structure and data of comma-separated files.

    Args:
        path (str): path to the CSV file (e.g., 'data.csv').

    Returns:
        str: The content of the CSV as a markdown string.
    """
    try:
        df = pd.read_csv(path)
        return df.to_markdown(index=False)
    except Exception as e:
        return f"Error reading CSV: {str(e)}"


@tool
def read_excel(path: str) -> str:
    """
    Reads the first sheet of an Excel file and returns its content as a markdown table.

    Args:
        path (str): path to the .xlsx file.

    Returns:
        str: The content of the first sheet as a markdown string.
    """
    try:
        df = pd.read_excel(path, engine='openpyxl')
        return df.to_markdown(index=False)
    except Exception as e:
        return f"Error reading Excel: {str(e)}"


@tool
def read_pdf(path: str) -> str:
    """
    Extracts text from a PDF file.

    Args:
        path (str): path to the PDF file.

    Returns:
        str: The raw text content extracted from the PDF pages.
    """
    try:
        reader = pypdf.PdfReader(path)
        text_content = []
        for i, page in enumerate(reader.pages):
            text = page.extract_text()
            if text:
                text_content.append(f"--- Page {i + 1} ---\n{text}")

        return "\n".join(text_content)
    except Exception as e:
        return f"Error reading PDF: {str(e)}"


@tool
def inspect_csv(path: str) -> str:
    """
    Reads the first 5 rows and the columns of a CSV file.
    Use this to understand the data structure before writing code to process the full file.

    Args:
        path (str): path to the CSV file.
    """
    try:
        df = pd.read_csv(path)
        info = f"Columns: {list(df.columns)}\n"
        info += f"Total Rows: {len(df)}\n\n"
        info += "First 5 rows:\n"
        info += df.head(5).to_markdown(index=False)
        return info
    except Exception as e:
        return f"Error inspecting CSV: {str(e)}"

def create_file_reader(model: Model) -> CodeAgent:
    return CodeAgent(
        model=model,
        tools=[
            read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel
        ],
        add_base_tools=True,
        additional_authorized_imports=authorized_libraries,
        name="files_manager",
        description="Reads a file and returns the contents as a string, multiple formats accepted.",
        verbosity_level=0,
        max_steps=8,
    )