GaiaAgent_Final_Assignment

Running

App Files Files Community

GaiaAgent_Final_Assignment / tools /files_to_text.py

Francesco-A

fix(tools): import @tool in download_file.py and files_to_text.py

998967a 14 days ago

raw

history blame contribute delete

2.2 kB

	from smolagents import tool

	@tool
	def image_to_text(image_path: str) -> str:
	"""
	Extract text from an image using pytesseract (if available).

	Args:
	image_path: Path to the image file

	Returns:
	Extracted text or error message
	"""
	try:
	import pytesseract
	from PIL import Image

	# Open the image using PIL
	img = Image.open(image_path)

	# Use pytesseract to extract text from the image
	extracted_text = pytesseract.image_to_string(img)

	return f"Extracted text from image: {extracted_text}"
	except ImportError:
	return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
	except Exception as e:
	return f"Error extracting text from image: {str(e)}"

	@tool
	def pdf_to_text(pdf_file_path: str) -> str:
	"""
	Reads a PDF file from the given path and returns its content as text.
	Args:
	pdf_file_path (str): The path to the PDF file.
	Returns:
	str: The text content of the PDF.
	"""

	try:
	import pymupdf
	doc = pymupdf.open(pdf_file_path)
	text = ""
	for page in doc:
	text += page.get_text("text")
	text += "\n"
	return text
	except FileNotFoundError:
	return f"Error: The file at '{pdf_file_path}' was not found."
	except Exception as e:
	return f"An error occurred: {e}"

	@tool
	def text_file_to_string(path: str) -> str:
	"""
	Reads any plain text file and returns its content as a string.

	Args:
	path (str): The path to the text file.

	Works for:
	- .txt
	- .md
	- .json / .jsonl
	- .html
	- .csv (as raw text)
	- any UTF-8 or ASCII compatible text file

	If the file contains binary data, the returned string may be partially decoded.
	"""
	try:
	with open(path, "r", encoding="utf-8", errors="ignore") as f:
	content = f.read()
	return content
	except FileNotFoundError:
	return f"Error: The file at '{path}' was not found."
	except Exception as e:
	return f"An error occurred: {e}"