iLOVE2D
/

selfevolveagent

Model card Files Files and versions

selfevolveagent / evoagentx /tools /image_tools /openrouter_image_tools /image_analysis.py

iLOVE2D's picture

Upload 2846 files

5374a2d verified 19 days ago

history blame contribute delete

4.42 kB

	import requests
	import base64
	from typing import Dict, Optional, List
	from ...tool import Tool
	from ...storage_handler import FileStorageHandler, LocalStorageHandler


	class ImageAnalysisTool(Tool):
	name: str = "image_analysis"
	description: str = (
	"Analyze and understand images and PDF documents using a multimodal LLM (via OpenRouter). "
	"Supports image URLs, local image files, and local PDF files."
	)

	inputs: Dict[str, Dict[str, str]] = {
	"prompt": {"type": "string", "description": "Question or instruction for image/PDF analysis."},
	"image_url": {"type": "string", "description": "URL of the image (optional)."},
	"image_path": {"type": "string", "description": "Local image file path (optional)."},
	"pdf_path": {"type": "string", "description": "Local PDF file path (optional)."},
	}
	required: Optional[List[str]] = ["prompt"]

	def __init__(self, api_key, model="openai/gpt-4o", storage_handler: Optional[FileStorageHandler] = None):
	super().__init__()
	self.api_key = api_key
	self.model = model
	self.storage_handler = storage_handler or LocalStorageHandler()

	def __call__(
	self,
	prompt: str,
	image_url: str = None,
	image_path: str = None,
	pdf_path: str = None,
	):
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt}
	]
	}
	]

	if image_url:
	messages[0]["content"].append({
	"type": "image_url",
	"image_url": {"url": image_url}
	})
	elif image_path:
	try:
	result = self.storage_handler.read(image_path)
	if not result["success"]:
	return {"error": f"Failed to read image: {result.get('error', 'Unknown error')}"}

	# Get image content as bytes
	if isinstance(result["content"], bytes):
	image_content = result["content"]
	else:
	# If content is not bytes, convert to bytes
	image_content = str(result["content"]).encode('utf-8')

	base64_image = base64.b64encode(image_content).decode("utf-8")
	except Exception as e:
	return {"error": f"Failed to read image: {e}"}
	data_url = f"data:image/jpeg;base64,{base64_image}"
	messages[0]["content"].append({
	"type": "image_url",
	"image_url": {"url": data_url}
	})
	elif pdf_path:
	try:
	result = self.storage_handler.read(pdf_path)
	if not result["success"]:
	return {"error": f"Failed to read PDF: {result.get('error', 'Unknown error')}"}

	# Get PDF content as bytes
	if isinstance(result["content"], bytes):
	pdf_content = result["content"]
	else:
	# If content is not bytes, convert to bytes
	pdf_content = str(result["content"]).encode('utf-8')

	base64_pdf = base64.b64encode(pdf_content).decode("utf-8")
	except Exception as e:
	return {"error": f"Failed to read PDF: {e}"}
	data_url = f"data:application/pdf;base64,{base64_pdf}"
	messages[0]["content"].append({
	"type": "file",
	"file": {"filename": pdf_path.split("/")[-1], "file_data": data_url}
	})

	payload = {"model": self.model, "messages": messages}
	url = "https://openrouter.ai/api/v1/chat/completions"
	headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
	response = requests.post(url, headers=headers, json=payload)
	try:
	data = response.json()
	result = {
	"content": data.get("choices", [{}])[0].get("message", {}).get("content", ""),
	"usage": data.get("usage", {})
	}
	return result
	except Exception as e:
	return {"error": f"Failed to parse OpenRouter response: {e}", "raw": response.text}


	## ImageAnalysisToolkit moved to toolkit.py to consolidate toolkits