from smolagents import CodeAgent, HfApiModel, tool import yaml import requests import base64 import io import os import hashlib from datetime import datetime from tools.final_answer import FinalAnswerTool from Gradio_UI import GradioUI # Create a custom image generation tool that returns a string instead of AgentImage @tool def text_to_image_generator(prompt: str) -> str: """Generate an image from text and return the result as a string with image details. Args: prompt: A detailed description of the image to generate """ try: print(f"šŸŽØ Starting image generation for: {prompt}") # Create output directory if it doesn't exist output_dir = "generated_images" os.makedirs(output_dir, exist_ok=True) # Method 1: Try Hugging Face Inference API api_urls = [ "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1", "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5", "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0" ] # Get HF token from environment variable if available hf_token = os.getenv('HUGGINGFACE_TOKEN') headers = {} if hf_token: headers['Authorization'] = f'Bearer {hf_token}' print("šŸ” Using Hugging Face authentication token") else: print("āš ļø No HF token found, using public access") for api_url in api_urls: try: print(f"šŸ”„ Trying API: {api_url}") # Prepare request payload payload = { "inputs": prompt, "parameters": { "num_inference_steps": 30, "guidance_scale": 7.5, "width": 512, "height": 512 } } # Make request response = requests.post( api_url, json=payload, headers=headers, timeout=60 # Increased timeout for image generation ) print(f"šŸ“” API Response: {response.status_code}") if response.status_code == 200: print("āœ… Image generated successfully!") # Generate unique filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:8] image_filename = f"{output_dir}/generated_image_{timestamp}_{prompt_hash}.png" # Save the image with open(image_filename, 'wb') as f: f.write(response.content) image_size = len(response.content) print(f"šŸ’¾ Image saved as: {image_filename} (Size: {image_size} bytes)") return f"""āœ… IMAGE GENERATED SUCCESSFULLY! šŸ“ Prompt: {prompt} šŸ–¼ļø File: {image_filename} šŸ“¦ Size: {image_size} bytes 🌐 API: {api_url.split('/')[-1]} ā° Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} The image has been generated and saved successfully. You can view it at: {os.path.abspath(image_filename)}""" elif response.status_code == 503: print("ā³ Model is loading, trying next option...") continue elif response.status_code == 429: print("āš ļø Rate limit exceeded, trying next option...") continue elif response.status_code == 401: print("šŸ” Authentication required, trying next option...") continue else: print(f"āŒ API Error: {response.status_code}") if response.text: print(f"Error details: {response.text[:200]}...") continue except requests.exceptions.Timeout: print("ā° Request timeout, trying next option...") continue except requests.exceptions.ConnectionError: print("šŸ”— Connection error, trying next option...") continue except requests.exceptions.RequestException as e: print(f"šŸ”— Request error: {e}") continue except Exception as e: print(f"🚫 Unexpected error: {e}") continue # Method 2: If all APIs fail, create a detailed description print("šŸŽ­ All APIs failed, generating detailed description...") description = f"""šŸŽØ IMAGE CONCEPT GENERATED šŸ“ Original Prompt: "{prompt}" šŸ–¼ļø Detailed Visual Description: This image would feature {prompt.lower()}, rendered in high quality with: šŸŽÆ Main Elements: - Primary subject matter as described in the prompt - Professional composition and framing - Balanced lighting and shadows - Rich, vibrant colors šŸŽØ Style Characteristics: - Digital art quality - Realistic rendering - High resolution output - Professional photography aesthetic šŸ“ Technical Specs: - Dimensions: 512x512 pixels (standard) - Format: PNG with transparency support - Quality: Professional grade - Style: Photorealistic šŸ’” Note: This is a detailed concept description. To generate actual images, please: 1. Set up a Hugging Face token: export HUGGINGFACE_TOKEN=your_token_here 2. Ensure stable internet connection 3. Try again with specific, detailed prompts āš ļø Status: Image generation APIs unavailable - concept description provided instead.""" print("šŸ“‹ Description generated successfully!") return description except Exception as e: error_msg = f"āŒ Image generation failed: {str(e)}" print(error_msg) return error_msg def initialize_agent(): """Initialize the agent with proper error handling""" try: print("šŸš€ Initializing Image Generation Agent...") # Initialize final answer tool final_answer = FinalAnswerTool() # Create model with optimized settings model = HfApiModel( max_tokens=1024, temperature=0.7, model_id='Qwen/Qwen2.5-Coder-32B-Instruct', custom_role_conversions=None, ) # Load prompts with better error handling try: with open("prompts.yaml", 'r', encoding='utf-8') as stream: prompt_templates = yaml.safe_load(stream) print("āœ… Loaded custom prompts from prompts.yaml") except FileNotFoundError: print("āš ļø prompts.yaml not found, using default prompts") prompt_templates = { "system": """You are an AI assistant specialized in image generation. When users request images, use the text_to_image_generator tool with detailed, descriptive prompts. Always provide clear feedback about the image generation process. Be creative and add artistic details to make prompts more vivid.""", "user": "Request: {input}" } except yaml.YAMLError as e: print(f"āš ļø Error parsing prompts.yaml: {e}, using default prompts") prompt_templates = { "system": """You are an AI assistant specialized in image generation. When users request images, use the text_to_image_generator tool with detailed, descriptive prompts. Always provide clear feedback about the image generation process.""", "user": "Request: {input}" } # Create agent with simplified tool set agent = CodeAgent( model=model, tools=[final_answer, text_to_image_generator], max_steps=3, verbosity_level=1, grammar=None, planning_interval=None, name="StringBasedImageAgent", description="AI agent that generates images and returns detailed text descriptions", prompt_templates=prompt_templates ) print("āœ… Agent initialized successfully") print("šŸŽÆ Available tools: FinalAnswer, TextToImageGenerator") return agent except Exception as e: print(f"āŒ Agent initialization failed: {e}") raise def launch_agent(): """Launch the agent with proper error handling""" try: agent = initialize_agent() print("🌐 Starting Gradio interface...") # Check if GradioUI is available try: ui = GradioUI(agent) ui.launch(share=False, debug=True) except Exception as e: print(f"āŒ Gradio launch failed: {e}") print("šŸ’” Possible solutions:") print("1. Install Gradio: pip install gradio") print("2. Check GradioUI import path") print("3. Ensure all dependencies are installed") # Fallback: simple command line interface print("\nšŸ”„ Falling back to command line interface...") run_cli_interface(agent) except Exception as e: print(f"āŒ Launch failed: {e}") print("šŸ’” Please check your dependencies and configuration") def run_cli_interface(agent): """Simple command line interface as fallback""" print("\n" + "=" * 50) print("šŸŽØ IMAGE GENERATION AGENT - CLI MODE") print("=" * 50) print("Type 'quit' to exit") while True: try: user_input = input("\nšŸŽÆ Enter your image prompt: ").strip() if user_input.lower() in ['quit', 'exit', 'q']: print("šŸ‘‹ Goodbye!") break if user_input: print(f"\nšŸ”„ Processing: {user_input}") result = agent.run(user_input) print(f"\nšŸ“‹ Result:\n{result}") else: print("āš ļø Please enter a valid prompt") except KeyboardInterrupt: print("\nšŸ‘‹ Goodbye!") break except Exception as e: print(f"āŒ Error: {e}") if __name__ == "__main__": print("=" * 50) print("šŸŽØ STRING-BASED IMAGE GENERATION AGENT") print("=" * 50) print("šŸ’” Setup tips:") print("1. Set HF token: export HUGGINGFACE_TOKEN=your_token") print("2. Install dependencies: pip install gradio pyyaml requests") print("3. Ensure tools/ directory exists with required modules") print("=" * 50) launch_agent()