Spaces:

DakshChaudhary
/

Agent_GAIA_Benchmark

Sleeping

Agent_GAIA_Benchmark / agent_tools /ImageReaderTool.py

Refactored + Added prompts + Added Tools (Calculator, FileDownloader, ImageReader, Pandas, WebSearch) + NebiusAI inference added

1b34f03 8 months ago

raw

history blame contribute delete

2.06 kB

	import os
	import base64
	from llama_index.core.tools import FunctionTool
	from llama_index.readers.file.image import ImageReader
	from agent_models.models import get_vision_model_client
	from agent_prompts.SystemPrompt import vision_model_system_prompt

	def get_image_description(image_path: str) -> str:
	"""
	Analyzes a local image and returns a text description. This tool is used to "see" what is in an image file.
	Args:
	image_path (str): The local file path of the image to analyze.
	"""
	try:
	print(f"Analyzing image at path: {image_path}")

	# Read and encode the image
	with open(image_path, "rb") as img_file:
	b64_image = base64.b64encode(img_file.read()).decode("utf-8")
	b64_url = f"data:image/png;base64,{b64_image}"

	# Get Nebius client
	client = get_vision_model_client()

	# Call Nebius API
	response = client.chat.completions.create(
	model="Qwen/Qwen2-VL-72B-Instruct",
	messages=[
	{
	"role": "system",
	"content": vision_model_system_prompt
	},
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Here is an image."},
	{"type": "image_url", "image_url": {"url": b64_url}}
	]
	}
	]
	)

	description = response.choices[0].message.content
	print(f"Vision model response: {description}")
	return description

	except Exception as e:
	return f"Error analyzing image: {e}"

	# Wrapper function to create the tool for our agent
	def get_image_interpreter_tool() -> FunctionTool:
	return FunctionTool.from_defaults(
	fn=get_image_description,
	name="image_interpreter",
	description="A tool to analyze an image from a local file path and return a detailed text description. Use this to 'see' what is in an image file that has already been downloaded."
	)