IMAGE_GENERATION

Runtime error

File size: 10,922 Bytes

from smolagents import CodeAgent, HfApiModel, tool
import yaml
import requests
import base64
import io
import os
import hashlib
from datetime import datetime
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI

# Create a custom image generation tool that returns a string instead of AgentImage
@tool
def text_to_image_generator(prompt: str) -> str:
    """Generate an image from text and return the result as a string with image details.
    Args:
        prompt: A detailed description of the image to generate
    """
    try:
        print(f"🎨 Starting image generation for: {prompt}")
        
        # Create output directory if it doesn't exist
        output_dir = "generated_images"
        os.makedirs(output_dir, exist_ok=True)
        
        # Method 1: Try Hugging Face Inference API
        api_urls = [
            "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1",
            "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5",
            "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
        ]
        
        # Get HF token from environment variable if available
        hf_token = os.getenv('HUGGINGFACE_TOKEN')
        headers = {}
        if hf_token:
            headers['Authorization'] = f'Bearer {hf_token}'
            print("🔐 Using Hugging Face authentication token")
        else:
            print("⚠️ No HF token found, using public access")
        
        for api_url in api_urls:
            try:
                print(f"🔄 Trying API: {api_url}")
                
                # Prepare request payload
                payload = {
                    "inputs": prompt,
                    "parameters": {
                        "num_inference_steps": 30,
                        "guidance_scale": 7.5,
                        "width": 512,
                        "height": 512
                    }
                }
                
                # Make request
                response = requests.post(
                    api_url,
                    json=payload,
                    headers=headers,
                    timeout=60  # Increased timeout for image generation
                )
                
                print(f"📡 API Response: {response.status_code}")
                
                if response.status_code == 200:
                    print("✅ Image generated successfully!")
                    
                    # Generate unique filename
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:8]
                    image_filename = f"{output_dir}/generated_image_{timestamp}_{prompt_hash}.png"
                    
                    # Save the image
                    with open(image_filename, 'wb') as f:
                        f.write(response.content)
                    
                    image_size = len(response.content)
                    print(f"💾 Image saved as: {image_filename} (Size: {image_size} bytes)")
                    
                    return f"""✅ IMAGE GENERATED SUCCESSFULLY!

📝 Prompt: {prompt}
🖼️ File: {image_filename}
📦 Size: {image_size} bytes
🌐 API: {api_url.split('/')[-1]}
⏰ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

The image has been generated and saved successfully. You can view it at: {os.path.abspath(image_filename)}"""
                    
                elif response.status_code == 503:
                    print("⏳ Model is loading, trying next option...")
                    continue
                elif response.status_code == 429:
                    print("⚠️ Rate limit exceeded, trying next option...")
                    continue
                elif response.status_code == 401:
                    print("🔐 Authentication required, trying next option...")
                    continue
                else:
                    print(f"❌ API Error: {response.status_code}")
                    if response.text:
                        print(f"Error details: {response.text[:200]}...")
                    continue
                    
            except requests.exceptions.Timeout:
                print("⏰ Request timeout, trying next option...")
                continue
            except requests.exceptions.ConnectionError:
                print("🔗 Connection error, trying next option...")
                continue
            except requests.exceptions.RequestException as e:
                print(f"🔗 Request error: {e}")
                continue
            except Exception as e:
                print(f"🚫 Unexpected error: {e}")
                continue
        
        # Method 2: If all APIs fail, create a detailed description
        print("🎭 All APIs failed, generating detailed description...")
        
        description = f"""🎨 IMAGE CONCEPT GENERATED

📝 Original Prompt: "{prompt}"

🖼️ Detailed Visual Description:
This image would feature {prompt.lower()}, rendered in high quality with:

🎯 Main Elements:
- Primary subject matter as described in the prompt
- Professional composition and framing
- Balanced lighting and shadows
- Rich, vibrant colors

🎨 Style Characteristics:
- Digital art quality
- Realistic rendering
- High resolution output
- Professional photography aesthetic

📐 Technical Specs:
- Dimensions: 512x512 pixels (standard)
- Format: PNG with transparency support
- Quality: Professional grade
- Style: Photorealistic

💡 Note: This is a detailed concept description. To generate actual images, please:
1. Set up a Hugging Face token: export HUGGINGFACE_TOKEN=your_token_here
2. Ensure stable internet connection
3. Try again with specific, detailed prompts

⚠️ Status: Image generation APIs unavailable - concept description provided instead."""

        print("📋 Description generated successfully!")
        return description
        
    except Exception as e:
        error_msg = f"❌ Image generation failed: {str(e)}"
        print(error_msg)
        return error_msg

def initialize_agent():
    """Initialize the agent with proper error handling"""
    try:
        print("🚀 Initializing Image Generation Agent...")
        
        # Initialize final answer tool
        final_answer = FinalAnswerTool()
        
        # Create model with optimized settings
        model = HfApiModel(
            max_tokens=1024,
            temperature=0.7,
            model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
            custom_role_conversions=None,
        )
        
        # Load prompts with better error handling
        try:
            with open("prompts.yaml", 'r', encoding='utf-8') as stream:
                prompt_templates = yaml.safe_load(stream)
            print("✅ Loaded custom prompts from prompts.yaml")
        except FileNotFoundError:
            print("⚠️ prompts.yaml not found, using default prompts")
            prompt_templates = {
                "system": """You are an AI assistant specialized in image generation. 
When users request images, use the text_to_image_generator tool with detailed, descriptive prompts.
Always provide clear feedback about the image generation process.
Be creative and add artistic details to make prompts more vivid.""",
                "user": "Request: {input}"
            }
        except yaml.YAMLError as e:
            print(f"⚠️ Error parsing prompts.yaml: {e}, using default prompts")
            prompt_templates = {
                "system": """You are an AI assistant specialized in image generation. 
When users request images, use the text_to_image_generator tool with detailed, descriptive prompts.
Always provide clear feedback about the image generation process.""",
                "user": "Request: {input}"
            }
        
        # Create agent with simplified tool set
        agent = CodeAgent(
            model=model,
            tools=[final_answer, text_to_image_generator],
            max_steps=3,
            verbosity_level=1,
            grammar=None,
            planning_interval=None,
            name="StringBasedImageAgent",
            description="AI agent that generates images and returns detailed text descriptions",
            prompt_templates=prompt_templates
        )
        
        print("✅ Agent initialized successfully")
        print("🎯 Available tools: FinalAnswer, TextToImageGenerator")
        return agent
        
    except Exception as e:
        print(f"❌ Agent initialization failed: {e}")
        raise

def launch_agent():
    """Launch the agent with proper error handling"""
    try:
        agent = initialize_agent()
        print("🌐 Starting Gradio interface...")
        
        # Check if GradioUI is available
        try:
            ui = GradioUI(agent)
            ui.launch(share=False, debug=True)
        except Exception as e:
            print(f"❌ Gradio launch failed: {e}")
            print("💡 Possible solutions:")
            print("1. Install Gradio: pip install gradio")
            print("2. Check GradioUI import path")
            print("3. Ensure all dependencies are installed")
            
            # Fallback: simple command line interface
            print("\n🔄 Falling back to command line interface...")
            run_cli_interface(agent)
            
    except Exception as e:
        print(f"❌ Launch failed: {e}")
        print("💡 Please check your dependencies and configuration")

def run_cli_interface(agent):
    """Simple command line interface as fallback"""
    print("\n" + "=" * 50)
    print("🎨 IMAGE GENERATION AGENT - CLI MODE")
    print("=" * 50)
    print("Type 'quit' to exit")
    
    while True:
        try:
            user_input = input("\n🎯 Enter your image prompt: ").strip()
            if user_input.lower() in ['quit', 'exit', 'q']:
                print("👋 Goodbye!")
                break
            
            if user_input:
                print(f"\n🔄 Processing: {user_input}")
                result = agent.run(user_input)
                print(f"\n📋 Result:\n{result}")
            else:
                print("⚠️ Please enter a valid prompt")
                
        except KeyboardInterrupt:
            print("\n👋 Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {e}")

if __name__ == "__main__":
    print("=" * 50)
    print("🎨 STRING-BASED IMAGE GENERATION AGENT")
    print("=" * 50)
    print("💡 Setup tips:")
    print("1. Set HF token: export HUGGINGFACE_TOKEN=your_token")
    print("2. Install dependencies: pip install gradio pyyaml requests")
    print("3. Ensure tools/ directory exists with required modules")
    print("=" * 50)
    
    launch_agent()