import base64 from python.helpers.print_style import PrintStyle from python.helpers.tool import Tool, Response from python.helpers import runtime, files, images from mimetypes import guess_type from python.helpers import history # image optimization and token estimation for context window MAX_PIXELS = 768_000 QUALITY = 75 TOKENS_ESTIMATE = 1500 class VisionLoad(Tool): async def execute(self, paths: list[str] = [], **kwargs) -> Response: self.images_dict = {} template: list[dict[str, str]] = [] # type: ignore for path in paths: if not await runtime.call_development_function(files.exists, str(path)): continue if path not in self.images_dict: mime_type, _ = guess_type(str(path)) if mime_type and mime_type.startswith("image/"): try: # Read binary file file_content = await runtime.call_development_function( files.read_file_base64, str(path) ) file_content = base64.b64decode(file_content) # Compress and convert to JPEG compressed = images.compress_image( file_content, max_pixels=MAX_PIXELS, quality=QUALITY ) # Encode as base64 file_content_b64 = base64.b64encode(compressed).decode("utf-8") # DEBUG: Save compressed image # await runtime.call_development_function( # files.write_file_base64, str(path), file_content_b64 # ) # Construct the data URL (always JPEG after compression) self.images_dict[path] = file_content_b64 except Exception as e: self.images_dict[path] = None PrintStyle().error(f"Error processing image {path}: {e}") self.agent.context.log.log("warning", f"Error processing image {path}: {e}") return Response(message="dummy", break_loop=False) async def after_execution(self, response: Response, **kwargs): # build image data messages for LLMs, or error message content = [] if self.images_dict: for path, image in self.images_dict.items(): if image: content.append( { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"}, } ) else: content.append( { "type": "text", "text": "Error processing image " + path, } ) # append as raw message content for LLMs with vision tokens estimate msg = history.RawMessage(raw_content=content, preview="") self.agent.hist_add_message( False, content=msg, tokens=TOKENS_ESTIMATE * len(content) ) else: self.agent.hist_add_tool_result(self.name, "No images processed") # print and log short version message = ( "No images processed" if not self.images_dict else f"{len(self.images_dict)} images processed" ) PrintStyle( font_color="#1B4F72", background_color="white", padding=True, bold=True ).print(f"{self.agent.agent_name}: Response from tool '{self.name}'") PrintStyle(font_color="#85C1E9").print(message) self.log.update(result=message)