Spaces:

Isics
/

agents_gaia

Runtime error

App Files Files Community

agents_gaia / agents /file_reader.py

Isics

initial commit

32844c7 3 months ago

raw

history blame contribute delete

3.86 kB

	import json

	import pandas as pd
	import pypdf
	import yaml
	from smolagents import CodeAgent, Model, tool

	from config import authorized_libraries


	@tool
	def read_yaml(path: str) -> str:
	"""
	Reads a YAML file and returns the contents as a dictionary parsed as a string.
	Args:
	path (str): path to YAML file.

	Returns:
	str: contents of YAML file.

	Example:
	>>> result = read_yaml("path/to/file.yaml")
	"""
	with open(path, 'r') as f:
	return yaml.load(f, Loader=yaml.FullLoader)


	@tool
	def read_json(path: str) -> str:
	"""
	Reads a JSON file and returns the contents as a dictionary parsed as a string.
	Args:
	path (str): path to JSON file.

	Returns:
	str: contents of JSON file.

	Example:
	>>> result = read_json("path/to/file.json")
	"""
	with open(path, 'r') as f:
	return json.load(f)


	@tool
	def read_txt(path: str) -> str:
	"""
	Reads a txt file and returns the contents as a string.
	Args:
	path (str): path to a text file.

	Returns:
	str: contents of the text file.

	Example:
	>>> result = read_yaml("path/to/textfile.text")
	"""
	with open(path, 'r') as f:
	return f.read()


	@tool
	def read_csv(path: str) -> str:
	"""
	Reads a CSV file and returns its content formatted as a markdown table.
	Useful for understanding the structure and data of comma-separated files.

	Args:
	path (str): path to the CSV file (e.g., 'data.csv').

	Returns:
	str: The content of the CSV as a markdown string.
	"""
	try:
	df = pd.read_csv(path)
	return df.to_markdown(index=False)
	except Exception as e:
	return f"Error reading CSV: {str(e)}"


	@tool
	def read_excel(path: str) -> str:
	"""
	Reads the first sheet of an Excel file and returns its content as a markdown table.

	Args:
	path (str): path to the .xlsx file.

	Returns:
	str: The content of the first sheet as a markdown string.
	"""
	try:
	df = pd.read_excel(path, engine='openpyxl')
	return df.to_markdown(index=False)
	except Exception as e:
	return f"Error reading Excel: {str(e)}"


	@tool
	def read_pdf(path: str) -> str:
	"""
	Extracts text from a PDF file.

	Args:
	path (str): path to the PDF file.

	Returns:
	str: The raw text content extracted from the PDF pages.
	"""
	try:
	reader = pypdf.PdfReader(path)
	text_content = []
	for i, page in enumerate(reader.pages):
	text = page.extract_text()
	if text:
	text_content.append(f"--- Page {i + 1} ---\n{text}")

	return "\n".join(text_content)
	except Exception as e:
	return f"Error reading PDF: {str(e)}"


	@tool
	def inspect_csv(path: str) -> str:
	"""
	Reads the first 5 rows and the columns of a CSV file.
	Use this to understand the data structure before writing code to process the full file.

	Args:
	path (str): path to the CSV file.
	"""
	try:
	df = pd.read_csv(path)
	info = f"Columns: {list(df.columns)}\n"
	info += f"Total Rows: {len(df)}\n\n"
	info += "First 5 rows:\n"
	info += df.head(5).to_markdown(index=False)
	return info
	except Exception as e:
	return f"Error inspecting CSV: {str(e)}"

	def create_file_reader(model: Model) -> CodeAgent:
	return CodeAgent(
	model=model,
	tools=[
	read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel
	],
	add_base_tools=True,
	additional_authorized_imports=authorized_libraries,
	name="files_manager",
	description="Reads a file and returns the contents as a string, multiple formats accepted.",
	verbosity_level=0,
	max_steps=8,
	)