water3 / agent /tools /image_gen_tool.py
onewayto's picture
Upload 187 files
070daf8 verified
"""
Image Generation Tool - Generate images using AI
"""
import logging
import os
import tempfile
import base64
from typing import Any, Dict
from datetime import datetime
import httpx
logger = logging.getLogger(__name__)
IMAGE_GEN_TOOL_SPEC = {
"name": "generate_image",
"description": (
"Generate images using AI based on text prompts. "
"Use this to create illustrations, diagrams, artwork, or visual content. "
"Supports various styles and can save images to disk."
),
"parameters": {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "Detailed description of the image to generate",
},
"filename": {
"type": "string",
"description": "Output filename for the image (e.g., 'image.png')",
},
"size": {
"type": "string",
"enum": ["256x256", "512x512", "1024x1024"],
"description": "Image size (default: 512x512)",
},
},
"required": ["prompt", "filename"],
},
}
async def generate_image_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
"""Handler for generating images using Pollinations AI (free, no API key needed)."""
try:
prompt = arguments.get("prompt", "")
filename = arguments.get("filename", "generated_image.png")
size = arguments.get("size", "512x512")
if not prompt.strip():
return "Error: Image prompt is required", False
# Ensure filename has proper extension
if not any(filename.endswith(ext) for ext in [".png", ".jpg", ".jpeg"]):
filename += ".png"
logger.info(f"Generating image with prompt: {prompt[:100]}...")
# Parse size
width, height = map(int, size.split("x"))
# Use Pollinations AI for image generation (free, no API key)
encoded_prompt = prompt.replace(" ", "%20")
image_url = f"https://image.pollinations.ai/prompt/{encoded_prompt}?width={width}&height={height}&nologo=true"
# Download the image
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(image_url)
response.raise_for_status()
image_data = response.content
# Save image
output_dir = os.path.join(tempfile.gettempdir(), "kimi_agent_outputs")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
with open(output_path, "wb") as f:
f.write(image_data)
# Also save to current working directory
with open(filename, "wb") as f:
f.write(image_data)
file_size = len(image_data)
# Create file data for file_generated event
file_content_b64 = base64.b64encode(image_data).decode()
file_data = {
"id": f"file_{datetime.now().strftime('%Y%m%d%H%M%S')}_{filename}",
"name": filename,
"type": "image",
"content": file_content_b64,
"language": None,
"originalContent": None,
"modifiedContent": file_content_b64,
"size": file_size,
"created_at": datetime.now().isoformat(),
}
return (
f"βœ… Image generated successfully!\n"
f"πŸ–ΌοΈ Filename: {filename}\n"
f"πŸ“ Size: {width}x{height}\n"
f"πŸ’Ύ File size: {file_size / 1024:.2f} KB\n"
f"πŸ“ Saved to: {output_path}\n"
f"🎨 Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}",
True,
file_data # Return file data for event
)
except Exception as e:
logger.error(f"Image generation error: {e}")
return f"❌ Error generating image: {str(e)}", False