selfevolveagent / evoagentx /tools /image_tools /openai_image_tools /image_analysis_openai.py

Upload 2846 files

5374a2d verified 26 days ago

3.8 kB

	from typing import Dict, Optional, List
	from ...tool import Tool
	from ...storage_handler import FileStorageHandler, LocalStorageHandler
	from .openai_utils import create_openai_client


	class OpenAIImageAnalysisTool(Tool):
	name: str = "openai_image_analysis"
	description: str = "Simple image analysis via OpenAI Responses API (input_text + input_image)."

	inputs: Dict[str, Dict[str, str]] = {
	"prompt": {"type": "string", "description": "User question/instruction. Required."},
	"image_url": {"type": "string", "description": "HTTP(S) image URL. Optional if image_path provided."},
	"image_path": {"type": "string", "description": "Local image path; converted to data URL internally."},
	"model": {"type": "string", "description": "OpenAI model for responses.create (e.g., gpt-4o-mini, gpt-4.1, gpt-5). Optional."},
	}
	required: Optional[List[str]] = ["prompt"]

	def __init__(self, api_key: str, organization_id: str = None, model: str = "gpt-4o-mini",
	storage_handler: Optional[FileStorageHandler] = None):
	super().__init__()
	self.api_key = api_key
	self.organization_id = organization_id
	self.model = model
	self.storage_handler = storage_handler or LocalStorageHandler()

	def __call__(
	self,
	prompt: str,
	image_url: str = None,
	image_path: str = None,
	model: str = None,
	):
	try:
	client = create_openai_client(self.api_key, self.organization_id)
	actual_model = model if model else self.model

	# Resolve image source: prefer URL, else local path to data URL
	final_image_url = image_url
	if not final_image_url and image_path:
	import base64
	import mimetypes
	mime, _ = mimetypes.guess_type(image_path)
	mime = mime or "image/png"

	# Use storage handler to read raw bytes directly
	# This bypasses the high-level read() method that processes images
	try:
	# Translate user path to system path first
	system_path = self.storage_handler.translate_in(image_path)
	content = self.storage_handler._read_raw(system_path)
	except Exception as e:
	return {"error": f"Could not read image {image_path}: {str(e)}"}

	b64 = base64.b64encode(content).decode("utf-8")
	final_image_url = f"data:{mime};base64,{b64}"

	response = client.responses.create(
	model=actual_model,
	input=[
	{
	"role": "user",
	"content": [
	{"type": "input_text", "text": prompt},
	{"type": "input_image", "image_url": final_image_url},
	],
	}
	],
	)

	# Prefer unified output_text when present
	text = getattr(response, "output_text", None)
	if text is None:
	# Fallback: try to assemble from content if SDK shape differs
	try:
	choices = getattr(response, "output", None) or getattr(response, "choices", None)
	if choices and isinstance(choices, list):
	first = choices[0]
	text = getattr(first, "message", {}).get("content", "") if isinstance(first, dict) else ""
	except Exception:
	text = ""

	return {"content": text or ""}
	except Exception as e:
	return {"error": f"OpenAI image analysis failed: {e}"}