unit4_test

Sleeping

unit4_test / tools /image_inspector_tool.py

Vladyslav Khaitov

Remove extra empty line in tools description

1f15ff8 6 months ago

4.66 kB

	import base64
	import mimetypes
	import os

	from dotenv import load_dotenv
	# from smolagents import tool, ChatMessage

	# from ..rate_limit_models import ExponentialBackoffOpenAIServerModel

	load_dotenv(override=True)


	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')


	# @tool
	# def image_inspector_tool(image_path: str, question: str \| None = None) -> str:
	# """A tool that can answer questions about attached images.
	#
	# Args:
	# image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
	# question: The question to answer.
	# """
	# add_note = False
	# if not question:
	# add_note = True
	# question = "Please write a detailed caption for this image."
	# if not isinstance(image_path, str):
	# raise Exception("You should provide at least `image_path` string argument to this tool!")
	#
	# GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
	# GEMINI_MODEL = os.getenv("LLM_MODEL")
	# model = ExponentialBackoffOpenAIServerModel(GEMINI_MODEL,
	# api_base='https://generativelanguage.googleapis.com/v1beta/openai/',
	# api_key=GEMINI_API_KEY,
	# max_tokens=8096 * 2)
	#
	# mime_type, _ = mimetypes.guess_type(image_path)
	# base64_image = encode_image(image_path)
	#
	# response = model.generate(
	# messages=[
	# ChatMessage(role="user", content=[
	# {
	# "type": "text",
	# "text": question,
	# },
	# {
	# "type": "image_url",
	# "image_url": {"url": f"data:{mime_type};base64,{base64_image}"},
	# },
	# ])
	# ]
	# )
	# try:
	# output = response.content
	# except Exception:
	# raise Exception(f"Response format unexpected: {response}")
	#
	# if add_note:
	# output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
	#
	# return output


	from smolagents import Tool
	from smolagents.models import Model, MessageRole, ChatMessage


	class ImageInspectorTool(Tool):
	name = "inspect_image"
	description = """A tool that can answer questions about attached images. Use this tool when you need to analyze or describe images.
	This tool handles various image formats and can provide detailed descriptions or answer specific questions about image content.
	"""

	inputs = {
	"image_path": {
	"description": "The path to the image on which to answer the question. This should be a local path to downloaded image.",
	"type": "string",
	},
	"question": {
	"description": "[Optional]: The question to answer about the image. If not provided, will generate a detailed caption.",
	"type": "string",
	"nullable": True,
	},
	}
	output_type = "string"

	def __init__(self, model: Model):
	super().__init__()
	self.model = model

	def forward(self, image_path: str, question: str \| None = None) -> str:
	if not isinstance(image_path, str):
	raise Exception("You should provide at least `image_path` string argument to this tool!")

	add_note = False
	if not question:
	add_note = True
	question = "Please write a detailed caption for this image."

	mime_type, _ = mimetypes.guess_type(image_path)
	base64_image = encode_image(image_path)

	messages = [
	ChatMessage(
	role="user",
	content=[
	{
	"type": "text",
	"text": question,
	},
	{
	"type": "image_url",
	"image_url": {"url": f"data:{mime_type};base64,{base64_image}"},
	},
	]
	)
	]

	try:
	output = self.model(messages).content
	if isinstance(output, list):
	# Handle case where content is a list of dicts
	output = str(output)
	except Exception as e:
	raise Exception("Response format unexpected: " + str(e))

	if add_note:
	output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"

	return str(output)