import json import pandas as pd import pypdf import yaml from smolagents import CodeAgent, Model, tool from config import authorized_libraries @tool def read_yaml(path: str) -> str: """ Reads a YAML file and returns the contents as a dictionary parsed as a string. Args: path (str): path to YAML file. Returns: str: contents of YAML file. Example: >>> result = read_yaml("path/to/file.yaml") """ with open(path, 'r') as f: return yaml.load(f, Loader=yaml.FullLoader) @tool def read_json(path: str) -> str: """ Reads a JSON file and returns the contents as a dictionary parsed as a string. Args: path (str): path to JSON file. Returns: str: contents of JSON file. Example: >>> result = read_json("path/to/file.json") """ with open(path, 'r') as f: return json.load(f) @tool def read_txt(path: str) -> str: """ Reads a txt file and returns the contents as a string. Args: path (str): path to a text file. Returns: str: contents of the text file. Example: >>> result = read_yaml("path/to/textfile.text") """ with open(path, 'r') as f: return f.read() @tool def read_csv(path: str) -> str: """ Reads a CSV file and returns its content formatted as a markdown table. Useful for understanding the structure and data of comma-separated files. Args: path (str): path to the CSV file (e.g., 'data.csv'). Returns: str: The content of the CSV as a markdown string. """ try: df = pd.read_csv(path) return df.to_markdown(index=False) except Exception as e: return f"Error reading CSV: {str(e)}" @tool def read_excel(path: str) -> str: """ Reads the first sheet of an Excel file and returns its content as a markdown table. Args: path (str): path to the .xlsx file. Returns: str: The content of the first sheet as a markdown string. """ try: df = pd.read_excel(path, engine='openpyxl') return df.to_markdown(index=False) except Exception as e: return f"Error reading Excel: {str(e)}" @tool def read_pdf(path: str) -> str: """ Extracts text from a PDF file. Args: path (str): path to the PDF file. Returns: str: The raw text content extracted from the PDF pages. """ try: reader = pypdf.PdfReader(path) text_content = [] for i, page in enumerate(reader.pages): text = page.extract_text() if text: text_content.append(f"--- Page {i + 1} ---\n{text}") return "\n".join(text_content) except Exception as e: return f"Error reading PDF: {str(e)}" @tool def inspect_csv(path: str) -> str: """ Reads the first 5 rows and the columns of a CSV file. Use this to understand the data structure before writing code to process the full file. Args: path (str): path to the CSV file. """ try: df = pd.read_csv(path) info = f"Columns: {list(df.columns)}\n" info += f"Total Rows: {len(df)}\n\n" info += "First 5 rows:\n" info += df.head(5).to_markdown(index=False) return info except Exception as e: return f"Error inspecting CSV: {str(e)}" def create_file_reader(model: Model) -> CodeAgent: return CodeAgent( model=model, tools=[ read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel ], add_base_tools=True, additional_authorized_imports=authorized_libraries, name="files_manager", description="Reads a file and returns the contents as a string, multiple formats accepted.", verbosity_level=0, max_steps=8, )