vasili01's picture
Update app.py
d026c7e verified
from smolagents import CodeAgent, HfApiModel, tool
import yaml
import requests
import base64
import io
import os
import hashlib
from datetime import datetime
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
# Create a custom image generation tool that returns a string instead of AgentImage
@tool
def text_to_image_generator(prompt: str) -> str:
"""Generate an image from text and return the result as a string with image details.
Args:
prompt: A detailed description of the image to generate
"""
try:
print(f"🎨 Starting image generation for: {prompt}")
# Create output directory if it doesn't exist
output_dir = "generated_images"
os.makedirs(output_dir, exist_ok=True)
# Method 1: Try Hugging Face Inference API
api_urls = [
"https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1",
"https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5",
"https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
]
# Get HF token from environment variable if available
hf_token = os.getenv('HUGGINGFACE_TOKEN')
headers = {}
if hf_token:
headers['Authorization'] = f'Bearer {hf_token}'
print("🔐 Using Hugging Face authentication token")
else:
print("⚠️ No HF token found, using public access")
for api_url in api_urls:
try:
print(f"🔄 Trying API: {api_url}")
# Prepare request payload
payload = {
"inputs": prompt,
"parameters": {
"num_inference_steps": 30,
"guidance_scale": 7.5,
"width": 512,
"height": 512
}
}
# Make request
response = requests.post(
api_url,
json=payload,
headers=headers,
timeout=60 # Increased timeout for image generation
)
print(f"📡 API Response: {response.status_code}")
if response.status_code == 200:
print("✅ Image generated successfully!")
# Generate unique filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:8]
image_filename = f"{output_dir}/generated_image_{timestamp}_{prompt_hash}.png"
# Save the image
with open(image_filename, 'wb') as f:
f.write(response.content)
image_size = len(response.content)
print(f"💾 Image saved as: {image_filename} (Size: {image_size} bytes)")
return f"""✅ IMAGE GENERATED SUCCESSFULLY!
📝 Prompt: {prompt}
🖼️ File: {image_filename}
📦 Size: {image_size} bytes
🌐 API: {api_url.split('/')[-1]}
⏰ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
The image has been generated and saved successfully. You can view it at: {os.path.abspath(image_filename)}"""
elif response.status_code == 503:
print("⏳ Model is loading, trying next option...")
continue
elif response.status_code == 429:
print("⚠️ Rate limit exceeded, trying next option...")
continue
elif response.status_code == 401:
print("🔐 Authentication required, trying next option...")
continue
else:
print(f"❌ API Error: {response.status_code}")
if response.text:
print(f"Error details: {response.text[:200]}...")
continue
except requests.exceptions.Timeout:
print("⏰ Request timeout, trying next option...")
continue
except requests.exceptions.ConnectionError:
print("🔗 Connection error, trying next option...")
continue
except requests.exceptions.RequestException as e:
print(f"🔗 Request error: {e}")
continue
except Exception as e:
print(f"🚫 Unexpected error: {e}")
continue
# Method 2: If all APIs fail, create a detailed description
print("🎭 All APIs failed, generating detailed description...")
description = f"""🎨 IMAGE CONCEPT GENERATED
📝 Original Prompt: "{prompt}"
🖼️ Detailed Visual Description:
This image would feature {prompt.lower()}, rendered in high quality with:
🎯 Main Elements:
- Primary subject matter as described in the prompt
- Professional composition and framing
- Balanced lighting and shadows
- Rich, vibrant colors
🎨 Style Characteristics:
- Digital art quality
- Realistic rendering
- High resolution output
- Professional photography aesthetic
📐 Technical Specs:
- Dimensions: 512x512 pixels (standard)
- Format: PNG with transparency support
- Quality: Professional grade
- Style: Photorealistic
💡 Note: This is a detailed concept description. To generate actual images, please:
1. Set up a Hugging Face token: export HUGGINGFACE_TOKEN=your_token_here
2. Ensure stable internet connection
3. Try again with specific, detailed prompts
⚠️ Status: Image generation APIs unavailable - concept description provided instead."""
print("📋 Description generated successfully!")
return description
except Exception as e:
error_msg = f"❌ Image generation failed: {str(e)}"
print(error_msg)
return error_msg
def initialize_agent():
"""Initialize the agent with proper error handling"""
try:
print("🚀 Initializing Image Generation Agent...")
# Initialize final answer tool
final_answer = FinalAnswerTool()
# Create model with optimized settings
model = HfApiModel(
max_tokens=1024,
temperature=0.7,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
# Load prompts with better error handling
try:
with open("prompts.yaml", 'r', encoding='utf-8') as stream:
prompt_templates = yaml.safe_load(stream)
print("✅ Loaded custom prompts from prompts.yaml")
except FileNotFoundError:
print("⚠️ prompts.yaml not found, using default prompts")
prompt_templates = {
"system": """You are an AI assistant specialized in image generation.
When users request images, use the text_to_image_generator tool with detailed, descriptive prompts.
Always provide clear feedback about the image generation process.
Be creative and add artistic details to make prompts more vivid.""",
"user": "Request: {input}"
}
except yaml.YAMLError as e:
print(f"⚠️ Error parsing prompts.yaml: {e}, using default prompts")
prompt_templates = {
"system": """You are an AI assistant specialized in image generation.
When users request images, use the text_to_image_generator tool with detailed, descriptive prompts.
Always provide clear feedback about the image generation process.""",
"user": "Request: {input}"
}
# Create agent with simplified tool set
agent = CodeAgent(
model=model,
tools=[final_answer, text_to_image_generator],
max_steps=3,
verbosity_level=1,
grammar=None,
planning_interval=None,
name="StringBasedImageAgent",
description="AI agent that generates images and returns detailed text descriptions",
prompt_templates=prompt_templates
)
print("✅ Agent initialized successfully")
print("🎯 Available tools: FinalAnswer, TextToImageGenerator")
return agent
except Exception as e:
print(f"❌ Agent initialization failed: {e}")
raise
def launch_agent():
"""Launch the agent with proper error handling"""
try:
agent = initialize_agent()
print("🌐 Starting Gradio interface...")
# Check if GradioUI is available
try:
ui = GradioUI(agent)
ui.launch(share=False, debug=True)
except Exception as e:
print(f"❌ Gradio launch failed: {e}")
print("💡 Possible solutions:")
print("1. Install Gradio: pip install gradio")
print("2. Check GradioUI import path")
print("3. Ensure all dependencies are installed")
# Fallback: simple command line interface
print("\n🔄 Falling back to command line interface...")
run_cli_interface(agent)
except Exception as e:
print(f"❌ Launch failed: {e}")
print("💡 Please check your dependencies and configuration")
def run_cli_interface(agent):
"""Simple command line interface as fallback"""
print("\n" + "=" * 50)
print("🎨 IMAGE GENERATION AGENT - CLI MODE")
print("=" * 50)
print("Type 'quit' to exit")
while True:
try:
user_input = input("\n🎯 Enter your image prompt: ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
print("👋 Goodbye!")
break
if user_input:
print(f"\n🔄 Processing: {user_input}")
result = agent.run(user_input)
print(f"\n📋 Result:\n{result}")
else:
print("⚠️ Please enter a valid prompt")
except KeyboardInterrupt:
print("\n👋 Goodbye!")
break
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
print("=" * 50)
print("🎨 STRING-BASED IMAGE GENERATION AGENT")
print("=" * 50)
print("💡 Setup tips:")
print("1. Set HF token: export HUGGINGFACE_TOKEN=your_token")
print("2. Install dependencies: pip install gradio pyyaml requests")
print("3. Ensure tools/ directory exists with required modules")
print("=" * 50)
launch_agent()