Francesco-A's picture
fix(tools): import @tool in download_file.py and files_to_text.py
998967a
from smolagents import tool
@tool
def image_to_text(image_path: str) -> str:
"""
Extract text from an image using pytesseract (if available).
Args:
image_path: Path to the image file
Returns:
Extracted text or error message
"""
try:
import pytesseract
from PIL import Image
# Open the image using PIL
img = Image.open(image_path)
# Use pytesseract to extract text from the image
extracted_text = pytesseract.image_to_string(img)
return f"Extracted text from image: {extracted_text}"
except ImportError:
return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
except Exception as e:
return f"Error extracting text from image: {str(e)}"
@tool
def pdf_to_text(pdf_file_path: str) -> str:
"""
Reads a PDF file from the given path and returns its content as text.
Args:
pdf_file_path (str): The path to the PDF file.
Returns:
str: The text content of the PDF.
"""
try:
import pymupdf
doc = pymupdf.open(pdf_file_path)
text = ""
for page in doc:
text += page.get_text("text")
text += "\n"
return text
except FileNotFoundError:
return f"Error: The file at '{pdf_file_path}' was not found."
except Exception as e:
return f"An error occurred: {e}"
@tool
def text_file_to_string(path: str) -> str:
"""
Reads any plain text file and returns its content as a string.
Args:
path (str): The path to the text file.
Works for:
- .txt
- .md
- .json / .jsonl
- .html
- .csv (as raw text)
- any UTF-8 or ASCII compatible text file
If the file contains binary data, the returned string may be partially decoded.
"""
try:
with open(path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
return content
except FileNotFoundError:
return f"Error: The file at '{path}' was not found."
except Exception as e:
return f"An error occurred: {e}"