Agent_Course_Final_Assignment

Sleeping

Create tools.py

9b2bab8 verified 11 months ago

1.73 kB

	import base64
	from langchain_core.messages import AnyMessage, HumanMessage, AIMessage


	def extract_text(img_path: str) -> str:
	"""
	Extract text from an image file using a multimodal model.
	"""
	all_text = ""
	try:
	# Read image and encode as base64
	with open(img_path, "rb") as image_file:
	image_bytes = image_file.read()

	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	# Prepare the prompt including the base64 image data
	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": (
	"Extract all the text from this image. "
	"Return only the extracted text, no explanations."
	),
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	},
	},
	]
	)
	]

	# Call the vision-capable model
	response = vision_llm.invoke(message)

	# Append extracted text
	all_text += response.content + "\n\n"

	return all_text.strip()
	except Exception as e:
	error_msg = f"Error extracting text: {str(e)}"
	print(error_msg)
	return ""


	def web_search(query: str):
	"""Performs a web search using SerpAPI."""
	search = GoogleSearch({
	"q": query,
	"num": 5,
	"api_key": "your_serpapi_key"
	})
	results = search.get_dict()["organic_results"]
	return results