unit4_test / tools /image_inspector_tool.py
Vladyslav Khaitov
Remove extra empty line in tools description
1f15ff8
import base64
import mimetypes
import os
from dotenv import load_dotenv
# from smolagents import tool, ChatMessage
# from ..rate_limit_models import ExponentialBackoffOpenAIServerModel
load_dotenv(override=True)
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# @tool
# def image_inspector_tool(image_path: str, question: str | None = None) -> str:
# """A tool that can answer questions about attached images.
#
# Args:
# image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
# question: The question to answer.
# """
# add_note = False
# if not question:
# add_note = True
# question = "Please write a detailed caption for this image."
# if not isinstance(image_path, str):
# raise Exception("You should provide at least `image_path` string argument to this tool!")
#
# GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
# GEMINI_MODEL = os.getenv("LLM_MODEL")
# model = ExponentialBackoffOpenAIServerModel(GEMINI_MODEL,
# api_base='https://generativelanguage.googleapis.com/v1beta/openai/',
# api_key=GEMINI_API_KEY,
# max_tokens=8096 * 2)
#
# mime_type, _ = mimetypes.guess_type(image_path)
# base64_image = encode_image(image_path)
#
# response = model.generate(
# messages=[
# ChatMessage(role="user", content=[
# {
# "type": "text",
# "text": question,
# },
# {
# "type": "image_url",
# "image_url": {"url": f"data:{mime_type};base64,{base64_image}"},
# },
# ])
# ]
# )
# try:
# output = response.content
# except Exception:
# raise Exception(f"Response format unexpected: {response}")
#
# if add_note:
# output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
#
# return output
from smolagents import Tool
from smolagents.models import Model, MessageRole, ChatMessage
class ImageInspectorTool(Tool):
name = "inspect_image"
description = """A tool that can answer questions about attached images. Use this tool when you need to analyze or describe images.
This tool handles various image formats and can provide detailed descriptions or answer specific questions about image content.
"""
inputs = {
"image_path": {
"description": "The path to the image on which to answer the question. This should be a local path to downloaded image.",
"type": "string",
},
"question": {
"description": "[Optional]: The question to answer about the image. If not provided, will generate a detailed caption.",
"type": "string",
"nullable": True,
},
}
output_type = "string"
def __init__(self, model: Model):
super().__init__()
self.model = model
def forward(self, image_path: str, question: str | None = None) -> str:
if not isinstance(image_path, str):
raise Exception("You should provide at least `image_path` string argument to this tool!")
add_note = False
if not question:
add_note = True
question = "Please write a detailed caption for this image."
mime_type, _ = mimetypes.guess_type(image_path)
base64_image = encode_image(image_path)
messages = [
ChatMessage(
role="user",
content=[
{
"type": "text",
"text": question,
},
{
"type": "image_url",
"image_url": {"url": f"data:{mime_type};base64,{base64_image}"},
},
]
)
]
try:
output = self.model(messages).content
if isinstance(output, list):
# Handle case where content is a list of dicts
output = str(output)
except Exception as e:
raise Exception("Response format unexpected: " + str(e))
if add_note:
output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
return str(output)