from smolagents import tool @tool def image_to_text(image_path: str) -> str: """ Extract text from an image using pytesseract (if available). Args: image_path: Path to the image file Returns: Extracted text or error message """ try: import pytesseract from PIL import Image # Open the image using PIL img = Image.open(image_path) # Use pytesseract to extract text from the image extracted_text = pytesseract.image_to_string(img) return f"Extracted text from image: {extracted_text}" except ImportError: return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system." except Exception as e: return f"Error extracting text from image: {str(e)}" @tool def pdf_to_text(pdf_file_path: str) -> str: """ Reads a PDF file from the given path and returns its content as text. Args: pdf_file_path (str): The path to the PDF file. Returns: str: The text content of the PDF. """ try: import pymupdf doc = pymupdf.open(pdf_file_path) text = "" for page in doc: text += page.get_text("text") text += "\n" return text except FileNotFoundError: return f"Error: The file at '{pdf_file_path}' was not found." except Exception as e: return f"An error occurred: {e}" @tool def text_file_to_string(path: str) -> str: """ Reads any plain text file and returns its content as a string. Args: path (str): The path to the text file. Works for: - .txt - .md - .json / .jsonl - .html - .csv (as raw text) - any UTF-8 or ASCII compatible text file If the file contains binary data, the returned string may be partially decoded. """ try: with open(path, "r", encoding="utf-8", errors="ignore") as f: content = f.read() return content except FileNotFoundError: return f"Error: The file at '{path}' was not found." except Exception as e: return f"An error occurred: {e}"