iLOVE2D's picture
Upload 2846 files
5374a2d verified
from typing import Dict, Optional, List
from ...tool import Tool
from ...storage_handler import FileStorageHandler, LocalStorageHandler
from .openai_utils import create_openai_client
class OpenAIImageAnalysisTool(Tool):
name: str = "openai_image_analysis"
description: str = "Simple image analysis via OpenAI Responses API (input_text + input_image)."
inputs: Dict[str, Dict[str, str]] = {
"prompt": {"type": "string", "description": "User question/instruction. Required."},
"image_url": {"type": "string", "description": "HTTP(S) image URL. Optional if image_path provided."},
"image_path": {"type": "string", "description": "Local image path; converted to data URL internally."},
"model": {"type": "string", "description": "OpenAI model for responses.create (e.g., gpt-4o-mini, gpt-4.1, gpt-5). Optional."},
}
required: Optional[List[str]] = ["prompt"]
def __init__(self, api_key: str, organization_id: str = None, model: str = "gpt-4o-mini",
storage_handler: Optional[FileStorageHandler] = None):
super().__init__()
self.api_key = api_key
self.organization_id = organization_id
self.model = model
self.storage_handler = storage_handler or LocalStorageHandler()
def __call__(
self,
prompt: str,
image_url: str = None,
image_path: str = None,
model: str = None,
):
try:
client = create_openai_client(self.api_key, self.organization_id)
actual_model = model if model else self.model
# Resolve image source: prefer URL, else local path to data URL
final_image_url = image_url
if not final_image_url and image_path:
import base64
import mimetypes
mime, _ = mimetypes.guess_type(image_path)
mime = mime or "image/png"
# Use storage handler to read raw bytes directly
# This bypasses the high-level read() method that processes images
try:
# Translate user path to system path first
system_path = self.storage_handler.translate_in(image_path)
content = self.storage_handler._read_raw(system_path)
except Exception as e:
return {"error": f"Could not read image {image_path}: {str(e)}"}
b64 = base64.b64encode(content).decode("utf-8")
final_image_url = f"data:{mime};base64,{b64}"
response = client.responses.create(
model=actual_model,
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt},
{"type": "input_image", "image_url": final_image_url},
],
}
],
)
# Prefer unified output_text when present
text = getattr(response, "output_text", None)
if text is None:
# Fallback: try to assemble from content if SDK shape differs
try:
choices = getattr(response, "output", None) or getattr(response, "choices", None)
if choices and isinstance(choices, list):
first = choices[0]
text = getattr(first, "message", {}).get("content", "") if isinstance(first, dict) else ""
except Exception:
text = ""
return {"content": text or ""}
except Exception as e:
return {"error": f"OpenAI image analysis failed: {e}"}