Spaces:
Runtime error
Runtime error
File size: 10,922 Bytes
5d0f983 c19d193 5d0f983 d026c7e 6aae614 9b5b26a 5d0f983 ea4b819 5d0f983 ea4b819 5d0f983 ea4b819 5d0f983 ea4b819 d026c7e 5d0f983 d3c227f d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d026c7e 5d0f983 d3c227f 5d0f983 d026c7e d3c227f 5d0f983 ea4b819 5d0f983 d026c7e 5d0f983 6c4d748 d026c7e 8fe992b 5d0f983 d026c7e ea4b819 d026c7e 5d0f983 d026c7e ea4b819 5d0f983 d026c7e 5d0f983 d026c7e ea4b819 5d0f983 d026c7e 5d0f983 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | from smolagents import CodeAgent, HfApiModel, tool
import yaml
import requests
import base64
import io
import os
import hashlib
from datetime import datetime
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
# Create a custom image generation tool that returns a string instead of AgentImage
@tool
def text_to_image_generator(prompt: str) -> str:
"""Generate an image from text and return the result as a string with image details.
Args:
prompt: A detailed description of the image to generate
"""
try:
print(f"🎨 Starting image generation for: {prompt}")
# Create output directory if it doesn't exist
output_dir = "generated_images"
os.makedirs(output_dir, exist_ok=True)
# Method 1: Try Hugging Face Inference API
api_urls = [
"https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1",
"https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5",
"https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
]
# Get HF token from environment variable if available
hf_token = os.getenv('HUGGINGFACE_TOKEN')
headers = {}
if hf_token:
headers['Authorization'] = f'Bearer {hf_token}'
print("🔐 Using Hugging Face authentication token")
else:
print("⚠️ No HF token found, using public access")
for api_url in api_urls:
try:
print(f"🔄 Trying API: {api_url}")
# Prepare request payload
payload = {
"inputs": prompt,
"parameters": {
"num_inference_steps": 30,
"guidance_scale": 7.5,
"width": 512,
"height": 512
}
}
# Make request
response = requests.post(
api_url,
json=payload,
headers=headers,
timeout=60 # Increased timeout for image generation
)
print(f"📡 API Response: {response.status_code}")
if response.status_code == 200:
print("✅ Image generated successfully!")
# Generate unique filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:8]
image_filename = f"{output_dir}/generated_image_{timestamp}_{prompt_hash}.png"
# Save the image
with open(image_filename, 'wb') as f:
f.write(response.content)
image_size = len(response.content)
print(f"💾 Image saved as: {image_filename} (Size: {image_size} bytes)")
return f"""✅ IMAGE GENERATED SUCCESSFULLY!
📝 Prompt: {prompt}
🖼️ File: {image_filename}
📦 Size: {image_size} bytes
🌐 API: {api_url.split('/')[-1]}
⏰ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
The image has been generated and saved successfully. You can view it at: {os.path.abspath(image_filename)}"""
elif response.status_code == 503:
print("⏳ Model is loading, trying next option...")
continue
elif response.status_code == 429:
print("⚠️ Rate limit exceeded, trying next option...")
continue
elif response.status_code == 401:
print("🔐 Authentication required, trying next option...")
continue
else:
print(f"❌ API Error: {response.status_code}")
if response.text:
print(f"Error details: {response.text[:200]}...")
continue
except requests.exceptions.Timeout:
print("⏰ Request timeout, trying next option...")
continue
except requests.exceptions.ConnectionError:
print("🔗 Connection error, trying next option...")
continue
except requests.exceptions.RequestException as e:
print(f"🔗 Request error: {e}")
continue
except Exception as e:
print(f"🚫 Unexpected error: {e}")
continue
# Method 2: If all APIs fail, create a detailed description
print("🎭 All APIs failed, generating detailed description...")
description = f"""🎨 IMAGE CONCEPT GENERATED
📝 Original Prompt: "{prompt}"
🖼️ Detailed Visual Description:
This image would feature {prompt.lower()}, rendered in high quality with:
🎯 Main Elements:
- Primary subject matter as described in the prompt
- Professional composition and framing
- Balanced lighting and shadows
- Rich, vibrant colors
🎨 Style Characteristics:
- Digital art quality
- Realistic rendering
- High resolution output
- Professional photography aesthetic
📐 Technical Specs:
- Dimensions: 512x512 pixels (standard)
- Format: PNG with transparency support
- Quality: Professional grade
- Style: Photorealistic
💡 Note: This is a detailed concept description. To generate actual images, please:
1. Set up a Hugging Face token: export HUGGINGFACE_TOKEN=your_token_here
2. Ensure stable internet connection
3. Try again with specific, detailed prompts
⚠️ Status: Image generation APIs unavailable - concept description provided instead."""
print("📋 Description generated successfully!")
return description
except Exception as e:
error_msg = f"❌ Image generation failed: {str(e)}"
print(error_msg)
return error_msg
def initialize_agent():
"""Initialize the agent with proper error handling"""
try:
print("🚀 Initializing Image Generation Agent...")
# Initialize final answer tool
final_answer = FinalAnswerTool()
# Create model with optimized settings
model = HfApiModel(
max_tokens=1024,
temperature=0.7,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
# Load prompts with better error handling
try:
with open("prompts.yaml", 'r', encoding='utf-8') as stream:
prompt_templates = yaml.safe_load(stream)
print("✅ Loaded custom prompts from prompts.yaml")
except FileNotFoundError:
print("⚠️ prompts.yaml not found, using default prompts")
prompt_templates = {
"system": """You are an AI assistant specialized in image generation.
When users request images, use the text_to_image_generator tool with detailed, descriptive prompts.
Always provide clear feedback about the image generation process.
Be creative and add artistic details to make prompts more vivid.""",
"user": "Request: {input}"
}
except yaml.YAMLError as e:
print(f"⚠️ Error parsing prompts.yaml: {e}, using default prompts")
prompt_templates = {
"system": """You are an AI assistant specialized in image generation.
When users request images, use the text_to_image_generator tool with detailed, descriptive prompts.
Always provide clear feedback about the image generation process.""",
"user": "Request: {input}"
}
# Create agent with simplified tool set
agent = CodeAgent(
model=model,
tools=[final_answer, text_to_image_generator],
max_steps=3,
verbosity_level=1,
grammar=None,
planning_interval=None,
name="StringBasedImageAgent",
description="AI agent that generates images and returns detailed text descriptions",
prompt_templates=prompt_templates
)
print("✅ Agent initialized successfully")
print("🎯 Available tools: FinalAnswer, TextToImageGenerator")
return agent
except Exception as e:
print(f"❌ Agent initialization failed: {e}")
raise
def launch_agent():
"""Launch the agent with proper error handling"""
try:
agent = initialize_agent()
print("🌐 Starting Gradio interface...")
# Check if GradioUI is available
try:
ui = GradioUI(agent)
ui.launch(share=False, debug=True)
except Exception as e:
print(f"❌ Gradio launch failed: {e}")
print("💡 Possible solutions:")
print("1. Install Gradio: pip install gradio")
print("2. Check GradioUI import path")
print("3. Ensure all dependencies are installed")
# Fallback: simple command line interface
print("\n🔄 Falling back to command line interface...")
run_cli_interface(agent)
except Exception as e:
print(f"❌ Launch failed: {e}")
print("💡 Please check your dependencies and configuration")
def run_cli_interface(agent):
"""Simple command line interface as fallback"""
print("\n" + "=" * 50)
print("🎨 IMAGE GENERATION AGENT - CLI MODE")
print("=" * 50)
print("Type 'quit' to exit")
while True:
try:
user_input = input("\n🎯 Enter your image prompt: ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
print("👋 Goodbye!")
break
if user_input:
print(f"\n🔄 Processing: {user_input}")
result = agent.run(user_input)
print(f"\n📋 Result:\n{result}")
else:
print("⚠️ Please enter a valid prompt")
except KeyboardInterrupt:
print("\n👋 Goodbye!")
break
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
print("=" * 50)
print("🎨 STRING-BASED IMAGE GENERATION AGENT")
print("=" * 50)
print("💡 Setup tips:")
print("1. Set HF token: export HUGGINGFACE_TOKEN=your_token")
print("2. Install dependencies: pip install gradio pyyaml requests")
print("3. Ensure tools/ directory exists with required modules")
print("=" * 50)
launch_agent() |