Spaces:
Runtime error
Runtime error
| import json | |
| import pandas as pd | |
| import pypdf | |
| import yaml | |
| from smolagents import CodeAgent, Model, tool | |
| from config import authorized_libraries | |
| def read_yaml(path: str) -> str: | |
| """ | |
| Reads a YAML file and returns the contents as a dictionary parsed as a string. | |
| Args: | |
| path (str): path to YAML file. | |
| Returns: | |
| str: contents of YAML file. | |
| Example: | |
| >>> result = read_yaml("path/to/file.yaml") | |
| """ | |
| with open(path, 'r') as f: | |
| return yaml.load(f, Loader=yaml.FullLoader) | |
| def read_json(path: str) -> str: | |
| """ | |
| Reads a JSON file and returns the contents as a dictionary parsed as a string. | |
| Args: | |
| path (str): path to JSON file. | |
| Returns: | |
| str: contents of JSON file. | |
| Example: | |
| >>> result = read_json("path/to/file.json") | |
| """ | |
| with open(path, 'r') as f: | |
| return json.load(f) | |
| def read_txt(path: str) -> str: | |
| """ | |
| Reads a txt file and returns the contents as a string. | |
| Args: | |
| path (str): path to a text file. | |
| Returns: | |
| str: contents of the text file. | |
| Example: | |
| >>> result = read_yaml("path/to/textfile.text") | |
| """ | |
| with open(path, 'r') as f: | |
| return f.read() | |
| def read_csv(path: str) -> str: | |
| """ | |
| Reads a CSV file and returns its content formatted as a markdown table. | |
| Useful for understanding the structure and data of comma-separated files. | |
| Args: | |
| path (str): path to the CSV file (e.g., 'data.csv'). | |
| Returns: | |
| str: The content of the CSV as a markdown string. | |
| """ | |
| try: | |
| df = pd.read_csv(path) | |
| return df.to_markdown(index=False) | |
| except Exception as e: | |
| return f"Error reading CSV: {str(e)}" | |
| def read_excel(path: str) -> str: | |
| """ | |
| Reads the first sheet of an Excel file and returns its content as a markdown table. | |
| Args: | |
| path (str): path to the .xlsx file. | |
| Returns: | |
| str: The content of the first sheet as a markdown string. | |
| """ | |
| try: | |
| df = pd.read_excel(path, engine='openpyxl') | |
| return df.to_markdown(index=False) | |
| except Exception as e: | |
| return f"Error reading Excel: {str(e)}" | |
| def read_pdf(path: str) -> str: | |
| """ | |
| Extracts text from a PDF file. | |
| Args: | |
| path (str): path to the PDF file. | |
| Returns: | |
| str: The raw text content extracted from the PDF pages. | |
| """ | |
| try: | |
| reader = pypdf.PdfReader(path) | |
| text_content = [] | |
| for i, page in enumerate(reader.pages): | |
| text = page.extract_text() | |
| if text: | |
| text_content.append(f"--- Page {i + 1} ---\n{text}") | |
| return "\n".join(text_content) | |
| except Exception as e: | |
| return f"Error reading PDF: {str(e)}" | |
| def inspect_csv(path: str) -> str: | |
| """ | |
| Reads the first 5 rows and the columns of a CSV file. | |
| Use this to understand the data structure before writing code to process the full file. | |
| Args: | |
| path (str): path to the CSV file. | |
| """ | |
| try: | |
| df = pd.read_csv(path) | |
| info = f"Columns: {list(df.columns)}\n" | |
| info += f"Total Rows: {len(df)}\n\n" | |
| info += "First 5 rows:\n" | |
| info += df.head(5).to_markdown(index=False) | |
| return info | |
| except Exception as e: | |
| return f"Error inspecting CSV: {str(e)}" | |
| def create_file_reader(model: Model) -> CodeAgent: | |
| return CodeAgent( | |
| model=model, | |
| tools=[ | |
| read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel | |
| ], | |
| add_base_tools=True, | |
| additional_authorized_imports=authorized_libraries, | |
| name="files_manager", | |
| description="Reads a file and returns the contents as a string, multiple formats accepted.", | |
| verbosity_level=0, | |
| max_steps=8, | |
| ) | |