Spaces:

CTPC
/

AgentFinalAssignment

Sleeping

App Files Files Community

AgentFinalAssignment / tools.py

CTPC

Update tools.py

63e4748 verified 8 months ago

raw

history blame contribute delete

6.26 kB

	import pandas as pd
	from langchain_community.tools import DuckDuckGoSearchRun, TavilySearchResults
	from langchain_core.tools import tool
	from langchain.schema import HumanMessage, AIMessage, SystemMessage
	from langchain_google_genai import ChatGoogleGenerativeAI
	import base64

	#LLMs
	google_llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-lite')

	#IMAGE_TOOLS
	@tool
	def extract_text(img_path: str) -> str:
	"""
	Extract text from an image file using a multimodal model.

	Args:
	img_path: A local image file path (strings).

	Returns:
	A single string containing the concatenated text extracted from each image.
	"""
	all_text = ""
	try:

	# Read image and encode as base64
	with open(img_path, "rb") as image_file:
	image_bytes = image_file.read()

	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	# Prepare the prompt including the base64 image data
	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": (
	"Extract all the text from this image. "
	"Return only the extracted text, no explanations."
	),
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	},
	},
	]
	)
	]

	# Call the vision-capable model
	response = google_llm.invoke(message)

	# Append extracted text
	all_text += response.content + "\n\n"

	return all_text.strip()
	except Exception as e:
	# You can choose whether to raise or just return an empty string / error message
	error_msg = f"Error extracting text: {str(e)}"
	print(error_msg)
	return ""

	@tool
	def describe_image(img_path: str) -> str:
	"""
	Takes an image file path or URL and returns a detailed description of the image.

	Args:
	image_path_or_url (str): Local file path or URL to the image.

	Returns:
	str: A detailed description of the image content.
	"""
	all_text = ""
	try:

	# Read image and encode as base64
	with open(img_path, "rb") as image_file:
	image_bytes = image_file.read()

	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	# Prepare the prompt including the base64 image data
	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": (
	"Provide a detailed description from this image. "
	"Return descriptive text only."
	),
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	},
	},
	]
	)
	]

	# Call the vision-capable model
	response = google_llm.invoke(message)

	# Append extracted text
	all_text += response.content + "\n\n"

	return all_text.strip()
	except Exception as e:
	# You can choose whether to raise or just return an empty string / error message
	error_msg = f"Error extracting text: {str(e)}"
	print(error_msg)
	return ""

	#AUDIO_TOOLS
	@tool
	def transcribe_audio(audio_path: str) -> str:
	"""
	Transcribe audio from a file using a multimodal model.

	Args:
	audio_path: A local audio file path (strings).

	Returns:
	A single string containing the transcribed text.
	"""
	all_text = ""
	try:
	# Read audio and encode as base64
	with open(audio_path, "rb") as audio_file:
	audio_bytes = audio_file.read()

	audio_base64 = base64.b64encode(audio_bytes).decode()

	# Prepare the prompt including the base64 image data
	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": (
	"Transcribe the following audio input:"
	),
	},
	{
	"type": "input_audio",
	"input_audio": {
	"data": audio_base64,
	"format": "wav"
	},
	},
	]
	)
	]

	# Call the vision-capable model
	response = google_llm.invoke(message)

	# Append extracted text
	all_text += response.content + "\n\n"
	return all_text.strip()

	except Exception as e:
	# You can choose whether to raise or just return an empty string / error message
	error_msg = f"Error transcribing audio: {str(e)}"
	print(error_msg)
	return ""

	#WEB_SEARCH_TOOL
	@tool
	def web_search(query: str) -> str:
	"""Perform a web search and return the top 5 results."""
	#search_tool = DuckDuckGoSearchRun()
	search_tool = TavilySearchResults(searxch_depth='basic')
	result = search_tool.invoke(query)
	return result

	#FILE_PARSE_TOOL
	@tool
	def read_file(file_path: str) -> str:
	"""
	Reads a text based file and returns its content as a string.

	Args:
	file_path (str): The path to the file.

	Returns:
	str: The content of the file.
	"""
	if file_path.endswith('.txt'):
	with open(file_path, 'r') as file:
	return file.read()
	elif file_path.endswith('.csv'):
	return pd.read_csv(file_path).to_string()
	elif file_path.endswith('.xlsx'):
	return pd.read_excel(file_path).to_string()
	elif file_path.endswith('.py'):
	with open(file_path, 'r') as file:
	return file.read()
	else:
	raise ValueError("Unsupported file format. Only .txt, .csv, and .xlsx are supported.")