|
|
from smolagents import tool |
|
|
|
|
|
@tool |
|
|
def image_to_text(image_path: str) -> str: |
|
|
""" |
|
|
Extract text from an image using pytesseract (if available). |
|
|
|
|
|
Args: |
|
|
image_path: Path to the image file |
|
|
|
|
|
Returns: |
|
|
Extracted text or error message |
|
|
""" |
|
|
try: |
|
|
import pytesseract |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
img = Image.open(image_path) |
|
|
|
|
|
|
|
|
extracted_text = pytesseract.image_to_string(img) |
|
|
|
|
|
return f"Extracted text from image: {extracted_text}" |
|
|
except ImportError: |
|
|
return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system." |
|
|
except Exception as e: |
|
|
return f"Error extracting text from image: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def pdf_to_text(pdf_file_path: str) -> str: |
|
|
""" |
|
|
Reads a PDF file from the given path and returns its content as text. |
|
|
Args: |
|
|
pdf_file_path (str): The path to the PDF file. |
|
|
Returns: |
|
|
str: The text content of the PDF. |
|
|
""" |
|
|
|
|
|
try: |
|
|
import pymupdf |
|
|
doc = pymupdf.open(pdf_file_path) |
|
|
text = "" |
|
|
for page in doc: |
|
|
text += page.get_text("text") |
|
|
text += "\n" |
|
|
return text |
|
|
except FileNotFoundError: |
|
|
return f"Error: The file at '{pdf_file_path}' was not found." |
|
|
except Exception as e: |
|
|
return f"An error occurred: {e}" |
|
|
|
|
|
@tool |
|
|
def text_file_to_string(path: str) -> str: |
|
|
""" |
|
|
Reads any plain text file and returns its content as a string. |
|
|
|
|
|
Args: |
|
|
path (str): The path to the text file. |
|
|
|
|
|
Works for: |
|
|
- .txt |
|
|
- .md |
|
|
- .json / .jsonl |
|
|
- .html |
|
|
- .csv (as raw text) |
|
|
- any UTF-8 or ASCII compatible text file |
|
|
|
|
|
If the file contains binary data, the returned string may be partially decoded. |
|
|
""" |
|
|
try: |
|
|
with open(path, "r", encoding="utf-8", errors="ignore") as f: |
|
|
content = f.read() |
|
|
return content |
|
|
except FileNotFoundError: |
|
|
return f"Error: The file at '{path}' was not found." |
|
|
except Exception as e: |
|
|
return f"An error occurred: {e}" |
|
|
|