Francesco-A commited on
Commit
856f7b8
·
verified ·
1 Parent(s): 021cedf

Upload 2 files

Browse files
Files changed (2) hide show
  1. tools/files_to_dict.py +62 -0
  2. tools/files_to_text.py +74 -0
tools/files_to_dict.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ import pandas as pd
3
+ import pymupdf
4
+
5
+ @tool
6
+ def csv_to_dict(csv_file_path: str) -> str:
7
+ """
8
+ Reads a CSV file from the given path and returns:
9
+ - the data as a list of dictionaries,
10
+ - the list of column names,
11
+ - a basic descriptive summary of numeric columns.
12
+
13
+ Args:
14
+ csv_file_path (str): Path to the CSV file.
15
+
16
+ Returns:
17
+ str: A dictionary-like structure containing:
18
+ "data", "columns", and "describe".
19
+ """
20
+ try:
21
+ df = pd.read_csv(csv_file_path)
22
+
23
+ output = {
24
+ "columns" : df.columns.tolist(),
25
+ "describe": df.describe(include="all",percentiles=[.5]).to_dict(),
26
+ "data" : df.to_dict(orient="records")
27
+ }
28
+
29
+ return output
30
+ except FileNotFoundError:
31
+ return f"Error: The file at '{csv_file_path}' was not found."
32
+ except Exception as e:
33
+ return f"An error occurred: {e}"
34
+
35
+ @tool
36
+ def excel_to_dict(xlsx_file_path: str) -> str:
37
+ """
38
+ Reads an Excel (xlsx) file from the given path and returns:
39
+ - the data as a list of dictionaries,
40
+ - the list of column names,
41
+ - a basic descriptive summary of numeric columns.
42
+
43
+ Args:
44
+ xlsx_file_path (str): Path to the Excel file.
45
+
46
+ Returns:
47
+ str: A dictionary-like structure containing:
48
+ "data", "columns", and "describe".
49
+ """
50
+ try:
51
+ df = pd.read_excel(xlsx_file_path)
52
+ output = {
53
+ "columns" : df.columns.tolist(),
54
+ "describe": df.describe(include="all",percentiles=[.5]).to_dict(),
55
+ "data" : df.to_dict(orient="records")
56
+ }
57
+
58
+ return output
59
+ except FileNotFoundError:
60
+ return f"Error: The file at '{xlsx_file_path}' was not found."
61
+ except Exception as e:
62
+ return f"An error occurred: {e}"
tools/files_to_text.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @tool
2
+ def image_to_text(image_path: str) -> str:
3
+ """
4
+ Extract text from an image using pytesseract (if available).
5
+
6
+ Args:
7
+ image_path: Path to the image file
8
+
9
+ Returns:
10
+ Extracted text or error message
11
+ """
12
+ try:
13
+ import pytesseract
14
+ from PIL import Image
15
+
16
+ # Open the image using PIL
17
+ img = Image.open(image_path)
18
+
19
+ # Use pytesseract to extract text from the image
20
+ extracted_text = pytesseract.image_to_string(img)
21
+
22
+ return f"Extracted text from image: {extracted_text}"
23
+ except ImportError:
24
+ return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
25
+ except Exception as e:
26
+ return f"Error extracting text from image: {str(e)}"
27
+
28
+ @tool
29
+ def pdf_to_text(pdf_file_path: str) -> str:
30
+ """
31
+ Reads a PDF file from the given path and returns its content as text.
32
+ Args:
33
+ pdf_file_path (str): The path to the PDF file.
34
+ Returns:
35
+ str: The text content of the PDF.
36
+ """
37
+ try:
38
+ doc = pymupdf.open(pdf_file_path)
39
+ text = ""
40
+ for page in doc:
41
+ text += page.get_text("text")
42
+ text += "\n"
43
+ return text
44
+ except FileNotFoundError:
45
+ return f"Error: The file at '{pdf_file_path}' was not found."
46
+ except Exception as e:
47
+ return f"An error occurred: {e}"
48
+
49
+ @tool
50
+ def text_file_to_string(path: str) -> str:
51
+ """
52
+ Reads any plain text file and returns its content as a string.
53
+
54
+ Args:
55
+ path (str): The path to the text file.
56
+
57
+ Works for:
58
+ - .txt
59
+ - .md
60
+ - .json / .jsonl
61
+ - .html
62
+ - .csv (as raw text)
63
+ - any UTF-8 or ASCII compatible text file
64
+
65
+ If the file contains binary data, the returned string may be partially decoded.
66
+ """
67
+ try:
68
+ with open(path, "r", encoding="utf-8", errors="ignore") as f:
69
+ content = f.read()
70
+ return content
71
+ except FileNotFoundError:
72
+ return f"Error: The file at '{path}' was not found."
73
+ except Exception as e:
74
+ return f"An error occurred: {e}"