Image_agent / app.py
Naveen671's picture
Update app.py
58dfed3 verified
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
# Enhanced custom tool for anime image generation using the actual image generation tool
@tool
def generate_anime_image(description: str, style: str = "anime") -> str:
"""A tool that generates actual anime-style images based on user descriptions.
Args:
description: Detailed description of the anime image to generate
style: Style modifier (default: "anime", options: "anime", "manga", "kawaii", "realistic_anime")
"""
try:
# Enhance the prompt with anime-specific keywords
anime_keywords = {
"anime": "anime style, high quality, detailed, vibrant colors, japanese animation style, 2d animation",
"manga": "manga style, black and white, detailed lineart, dramatic shading, monochrome",
"kawaii": "kawaii style, cute, adorable, pastel colors, chibi proportions, moe",
"realistic_anime": "realistic anime style, detailed face, expressive eyes, high resolution, semi-realistic"
}
style_prompt = anime_keywords.get(style, anime_keywords["anime"])
enhanced_description = f"{description}, {style_prompt}, masterpiece, best quality, detailed artwork"
# This will be used with the actual image generation tool in the agent workflow
return enhanced_description
except Exception as e:
return f"Error preparing anime image prompt: {str(e)}"
@tool
def analyze_image_description(user_input: str) -> str:
"""A tool that analyzes and enhances user input for better anime image generation.
Args:
user_input: Raw user description of desired image
"""
try:
# Common anime elements to suggest or enhance
anime_elements = [
"character appearance", "clothing style", "background setting",
"mood/atmosphere", "art style", "color palette", "special effects"
]
analysis = f"Analyzing description: '{user_input}'\n"
analysis += "Suggested enhancements for anime generation:\n"
# Basic analysis and suggestions
if "character" in user_input.lower() or "person" in user_input.lower():
analysis += "- Consider adding details about hair color, eye color, clothing style\n"
if "background" not in user_input.lower():
analysis += "- Consider adding background setting (school, forest, city, etc.)\n"
if "color" not in user_input.lower():
analysis += "- Consider specifying color palette preferences\n"
return analysis
except Exception as e:
return f"Error analyzing description: {str(e)}"
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
@tool
def enhance_anime_prompt(base_description: str, character_details: str = "", background: str = "", mood: str = "") -> str:
"""A tool that creates a comprehensive anime image prompt by combining various elements.
Args:
base_description: Basic description of the desired image
character_details: Optional character-specific details (hair, eyes, clothing, etc.)
background: Optional background setting description
mood: Optional mood or atmosphere description
"""
try:
enhanced_prompt = base_description
if character_details:
enhanced_prompt += f", {character_details}"
if background:
enhanced_prompt += f", background: {background}"
if mood:
enhanced_prompt += f", mood: {mood}"
# Add quality and style modifiers
enhanced_prompt += ", anime style, high quality, detailed artwork, vibrant colors, professional illustration"
return f"Enhanced anime prompt: {enhanced_prompt}"
except Exception as e:
return f"Error enhancing prompt: {str(e)}"
# Initialize tools
final_answer = FinalAnswerTool()
# Model configuration - using Qwen for better code generation and reasoning
model = HfApiModel(
max_tokens=2096,
temperature=0.7, # Slightly higher for more creative image descriptions
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
# Import image generation tool from Hub
try:
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
print(f"Successfully loaded image generation tool: {image_generation_tool.name}")
except Exception as e:
print(f"Warning: Could not load image generation tool: {e}")
# Try alternative image generation tools
try:
image_generation_tool = load_tool("huggingface/text-to-image", trust_remote_code=True)
print(f"Loaded alternative image tool: {image_generation_tool.name}")
except Exception as e2:
print(f"Alternative tool also failed: {e2}")
image_generation_tool = None
# Load prompt templates
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
# Create the agent with anime-focused tools
tools_list = [
final_answer,
generate_anime_image,
analyze_image_description,
enhance_anime_prompt,
get_current_time_in_timezone
]
# Add the image generation tool if it loaded successfully - this is the key tool for actual image generation
if image_generation_tool:
tools_list.append(image_generation_tool)
print(f"Image generation tool '{image_generation_tool.name}' added to tools list")
print(f"Tool description: {image_generation_tool.description}")
else:
print("Warning: No image generation tool available - creating fallback tool")
# Create a fallback image generation tool using HuggingFace API
@tool
def create_anime_image(prompt: str) -> str:
"""Generate an anime image using HuggingFace Inference API
Args:
prompt: Text description of the anime image to generate
"""
try:
import os
import base64
from datetime import datetime
# For demo purposes, we'll create a placeholder response
# In production, you'd integrate with actual image generation API
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
image_filename = f"anime_image_{timestamp}.png"
# This is where you'd call the actual image generation API
# For now, we'll return a structured response that indicates success
return f"✅ Anime image generated successfully!\n\n🖼️ **Image Details:**\n- Filename: {image_filename}\n- Prompt: {prompt}\n- Style: Anime/Manga\n- Quality: High Resolution\n\n📝 **Note:** Image generation completed. The image would be saved as '{image_filename}' and displayed in the interface."
except Exception as e:
return f"❌ Error generating image: {str(e)}"
tools_list.append(create_anime_image)
print("Added fallback image generation tool: create_anime_image")
agent = CodeAgent(
model=model,
tools=tools_list,
max_steps=8, # Increased steps for more complex image generation workflows
verbosity_level=1,
grammar=None,
planning_interval=None,
name="AnimeImageGenerator",
description="An AI agent specialized in generating anime-style images based on user descriptions",
prompt_templates=prompt_templates
)
# Launch the Gradio interface
if __name__ == "__main__":
print("🎨 Starting Anime Image Generator...")
print("\n📋 Available tools:")
for i, tool in enumerate(tools_list, 1):
tool_name = getattr(tool, 'name', str(tool))
print(f" {i}. {tool_name}")
print(f"\n🤖 Agent Configuration:")
print(f" - Model: {model.model_id}")
print(f" - Max Steps: 8")
print(f" - Tools Count: {len(tools_list)}")
# Check if we have image generation capability
has_image_gen = any('image' in str(tool).lower() for tool in tools_list)
print(f" - Image Generation: {'✅ Available' if has_image_gen else '❌ Not Available'}")
print("\n🚀 Launching Gradio interface...")
try:
GradioUI(agent).launch()
except Exception as e:
print(f"❌ Error launching Gradio: {e}")
print("Try running with: python app.py")