Spaces:
Runtime error
Runtime error
File size: 3,859 Bytes
32844c7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | import json
import pandas as pd
import pypdf
import yaml
from smolagents import CodeAgent, Model, tool
from config import authorized_libraries
@tool
def read_yaml(path: str) -> str:
"""
Reads a YAML file and returns the contents as a dictionary parsed as a string.
Args:
path (str): path to YAML file.
Returns:
str: contents of YAML file.
Example:
>>> result = read_yaml("path/to/file.yaml")
"""
with open(path, 'r') as f:
return yaml.load(f, Loader=yaml.FullLoader)
@tool
def read_json(path: str) -> str:
"""
Reads a JSON file and returns the contents as a dictionary parsed as a string.
Args:
path (str): path to JSON file.
Returns:
str: contents of JSON file.
Example:
>>> result = read_json("path/to/file.json")
"""
with open(path, 'r') as f:
return json.load(f)
@tool
def read_txt(path: str) -> str:
"""
Reads a txt file and returns the contents as a string.
Args:
path (str): path to a text file.
Returns:
str: contents of the text file.
Example:
>>> result = read_yaml("path/to/textfile.text")
"""
with open(path, 'r') as f:
return f.read()
@tool
def read_csv(path: str) -> str:
"""
Reads a CSV file and returns its content formatted as a markdown table.
Useful for understanding the structure and data of comma-separated files.
Args:
path (str): path to the CSV file (e.g., 'data.csv').
Returns:
str: The content of the CSV as a markdown string.
"""
try:
df = pd.read_csv(path)
return df.to_markdown(index=False)
except Exception as e:
return f"Error reading CSV: {str(e)}"
@tool
def read_excel(path: str) -> str:
"""
Reads the first sheet of an Excel file and returns its content as a markdown table.
Args:
path (str): path to the .xlsx file.
Returns:
str: The content of the first sheet as a markdown string.
"""
try:
df = pd.read_excel(path, engine='openpyxl')
return df.to_markdown(index=False)
except Exception as e:
return f"Error reading Excel: {str(e)}"
@tool
def read_pdf(path: str) -> str:
"""
Extracts text from a PDF file.
Args:
path (str): path to the PDF file.
Returns:
str: The raw text content extracted from the PDF pages.
"""
try:
reader = pypdf.PdfReader(path)
text_content = []
for i, page in enumerate(reader.pages):
text = page.extract_text()
if text:
text_content.append(f"--- Page {i + 1} ---\n{text}")
return "\n".join(text_content)
except Exception as e:
return f"Error reading PDF: {str(e)}"
@tool
def inspect_csv(path: str) -> str:
"""
Reads the first 5 rows and the columns of a CSV file.
Use this to understand the data structure before writing code to process the full file.
Args:
path (str): path to the CSV file.
"""
try:
df = pd.read_csv(path)
info = f"Columns: {list(df.columns)}\n"
info += f"Total Rows: {len(df)}\n\n"
info += "First 5 rows:\n"
info += df.head(5).to_markdown(index=False)
return info
except Exception as e:
return f"Error inspecting CSV: {str(e)}"
def create_file_reader(model: Model) -> CodeAgent:
return CodeAgent(
model=model,
tools=[
read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel
],
add_base_tools=True,
additional_authorized_imports=authorized_libraries,
name="files_manager",
description="Reads a file and returns the contents as a string, multiple formats accepted.",
verbosity_level=0,
max_steps=8,
)
|