import base64 import mimetypes import os from dotenv import load_dotenv # from smolagents import tool, ChatMessage # from ..rate_limit_models import ExponentialBackoffOpenAIServerModel load_dotenv(override=True) def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') # @tool # def image_inspector_tool(image_path: str, question: str | None = None) -> str: # """A tool that can answer questions about attached images. # # Args: # image_path: The path to the image on which to answer the question. This should be a local path to downloaded image. # question: The question to answer. # """ # add_note = False # if not question: # add_note = True # question = "Please write a detailed caption for this image." # if not isinstance(image_path, str): # raise Exception("You should provide at least `image_path` string argument to this tool!") # # GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY") # GEMINI_MODEL = os.getenv("LLM_MODEL") # model = ExponentialBackoffOpenAIServerModel(GEMINI_MODEL, # api_base='https://generativelanguage.googleapis.com/v1beta/openai/', # api_key=GEMINI_API_KEY, # max_tokens=8096 * 2) # # mime_type, _ = mimetypes.guess_type(image_path) # base64_image = encode_image(image_path) # # response = model.generate( # messages=[ # ChatMessage(role="user", content=[ # { # "type": "text", # "text": question, # }, # { # "type": "image_url", # "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}, # }, # ]) # ] # ) # try: # output = response.content # except Exception: # raise Exception(f"Response format unexpected: {response}") # # if add_note: # output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}" # # return output from smolagents import Tool from smolagents.models import Model, MessageRole, ChatMessage class ImageInspectorTool(Tool): name = "inspect_image" description = """A tool that can answer questions about attached images. Use this tool when you need to analyze or describe images. This tool handles various image formats and can provide detailed descriptions or answer specific questions about image content. """ inputs = { "image_path": { "description": "The path to the image on which to answer the question. This should be a local path to downloaded image.", "type": "string", }, "question": { "description": "[Optional]: The question to answer about the image. If not provided, will generate a detailed caption.", "type": "string", "nullable": True, }, } output_type = "string" def __init__(self, model: Model): super().__init__() self.model = model def forward(self, image_path: str, question: str | None = None) -> str: if not isinstance(image_path, str): raise Exception("You should provide at least `image_path` string argument to this tool!") add_note = False if not question: add_note = True question = "Please write a detailed caption for this image." mime_type, _ = mimetypes.guess_type(image_path) base64_image = encode_image(image_path) messages = [ ChatMessage( role="user", content=[ { "type": "text", "text": question, }, { "type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}, }, ] ) ] try: output = self.model(messages).content if isinstance(output, list): # Handle case where content is a list of dicts output = str(output) except Exception as e: raise Exception("Response format unexpected: " + str(e)) if add_note: output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}" return str(output)