Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,8 @@ import logging
|
|
| 10 |
import time
|
| 11 |
from typing import Optional, Tuple
|
| 12 |
import warnings
|
|
|
|
|
|
|
| 13 |
warnings.filterwarnings("ignore")
|
| 14 |
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -23,6 +25,33 @@ MAX_IMAGE_SIZE = 1024
|
|
| 23 |
RATE_LIMIT_DELAY = 3
|
| 24 |
API_RETRY_COUNT = 3
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
if GEMINI_API_KEY:
|
| 27 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 28 |
logger.info("Gemini API configured")
|
|
@@ -77,28 +106,34 @@ class NanoBananaApp:
|
|
| 77 |
def _apply_rate_limiting(self):
|
| 78 |
time.sleep(RATE_LIMIT_DELAY)
|
| 79 |
|
| 80 |
-
def
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
try:
|
| 85 |
buffered = io.BytesIO()
|
| 86 |
image.save(buffered, format='PNG')
|
| 87 |
image_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 88 |
|
| 89 |
-
|
| 90 |
-
"realistic": "photorealistic, high-quality construction, professional architecture",
|
| 91 |
-
"futuristic": "futuristic, high-tech, modern glass and steel, sci-fi architecture",
|
| 92 |
-
"artistic": "artistic, creative design, unique architecture, colorful and innovative"
|
| 93 |
-
}
|
| 94 |
-
|
| 95 |
-
mode_descriptions = {
|
| 96 |
-
"complete": "Complete this unfinished construction",
|
| 97 |
-
"edit": "Edit and transform this construction image",
|
| 98 |
-
"blend": "Blend and reimagine this construction"
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
full_prompt = f"{mode_descriptions.get(editing_mode, 'Transform')} {prompt}. Style: {style_modifiers.get(style, '')}. Make it look professional and realistic."
|
| 102 |
|
| 103 |
url = "https://api.freepik.com/v1/ai/gemini-2-5-flash-image-preview"
|
| 104 |
payload = {
|
|
@@ -107,7 +142,7 @@ class NanoBananaApp:
|
|
| 107 |
"webhook_url": None
|
| 108 |
}
|
| 109 |
headers = {
|
| 110 |
-
"x-freepik-api-key":
|
| 111 |
"Content-Type": "application/json"
|
| 112 |
}
|
| 113 |
|
|
@@ -119,13 +154,123 @@ class NanoBananaApp:
|
|
| 119 |
img_response = requests.get(result['image_url'], timeout=30)
|
| 120 |
if img_response.status_code == 200:
|
| 121 |
generated_image = Image.open(io.BytesIO(img_response.content))
|
| 122 |
-
return generated_image, "Generated with Freepik Gemini 2.5 Flash"
|
| 123 |
|
| 124 |
return None, f"Freepik API error: {response.status_code}"
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
logger.error(f"Freepik generation failed: {e}")
|
| 128 |
return None, f"Freepik error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
def load_yolo_optional(self):
|
| 131 |
if not yolo_available:
|
|
@@ -151,29 +296,36 @@ class NanoBananaApp:
|
|
| 151 |
except Exception as e:
|
| 152 |
return image, f"Detection failed: {str(e)}"
|
| 153 |
|
| 154 |
-
def
|
| 155 |
-
if not self.gemini_model:
|
| 156 |
-
if not GEMINI_API_KEY:
|
| 157 |
-
return image, "π API key required for Nano Banana. Add GEMINI_API_KEY to use this feature."
|
| 158 |
-
return image, "Gemini Nano Banana not available"
|
| 159 |
-
|
| 160 |
if not prompt.strip():
|
| 161 |
return image, "Please provide a transformation prompt"
|
| 162 |
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
try:
|
| 166 |
image = self._resize_image_if_needed(image)
|
| 167 |
self._apply_rate_limiting()
|
| 168 |
|
| 169 |
-
|
| 170 |
-
if
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
if editing_mode == "complete":
|
| 178 |
base_prompt = self._get_completion_prompt(style)
|
| 179 |
analysis_prompt = f"Analyze this construction image and describe how to {base_prompt.lower()} User request: {prompt}. Provide detailed description of the completed construction."
|
|
@@ -189,7 +341,7 @@ class NanoBananaApp:
|
|
| 189 |
buffered = io.BytesIO()
|
| 190 |
image.save(buffered, format='PNG', quality=85)
|
| 191 |
image_bytes = buffered.getvalue()
|
| 192 |
-
|
| 193 |
if len(image_bytes) > 10 * 1024 * 1024:
|
| 194 |
return image, "Image too large. Please use a smaller image."
|
| 195 |
|
|
@@ -316,6 +468,56 @@ class NanoBananaApp:
|
|
| 316 |
|
| 317 |
app = NanoBananaApp()
|
| 318 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
def process_nano_banana(image, prompt, style, editing_mode, enable_detection, enable_voice):
|
| 320 |
if not image:
|
| 321 |
return None, None, None, None, "π· Please upload an image to get started", None
|
|
@@ -430,15 +632,47 @@ with gr.Blocks(title="π Nano Banana - Dynamic Image Creation", theme=gr.theme
|
|
| 430 |
value="edit",
|
| 431 |
label="Nano Banana Mode",
|
| 432 |
info="Complete: Finish construction β’ Edit: Modify image β’ Blend: Fuse elements"
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
label="Style",
|
| 439 |
info="Choose the aesthetic approach"
|
| 440 |
)
|
| 441 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
with gr.Group():
|
| 443 |
gr.Markdown("### βοΈ Optional Features")
|
| 444 |
enable_detection = gr.Checkbox(
|
|
@@ -475,16 +709,16 @@ with gr.Blocks(title="π Nano Banana - Dynamic Image Creation", theme=gr.theme
|
|
| 475 |
with gr.Row():
|
| 476 |
gr.Examples(
|
| 477 |
examples=[
|
| 478 |
-
["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", False, False],
|
| 479 |
-
["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", True, False],
|
| 480 |
-
["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", False, True],
|
| 481 |
-
["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", False, False],
|
| 482 |
-
["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", True, False],
|
| 483 |
-
["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", False, False],
|
| 484 |
-
["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", False, False],
|
| 485 |
-
["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", False, True]
|
| 486 |
],
|
| 487 |
-
inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice],
|
| 488 |
label="π― Try These Examples"
|
| 489 |
)
|
| 490 |
|
|
@@ -498,9 +732,15 @@ with gr.Blocks(title="π Nano Banana - Dynamic Image Creation", theme=gr.theme
|
|
| 498 |
- **π Smart Fallbacks**: Multiple processing modes for reliability
|
| 499 |
""")
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
process_btn.click(
|
| 502 |
-
fn=
|
| 503 |
-
inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice],
|
| 504 |
outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
|
| 505 |
)
|
| 506 |
|
|
|
|
| 10 |
import time
|
| 11 |
from typing import Optional, Tuple
|
| 12 |
import warnings
|
| 13 |
+
import requests
|
| 14 |
+
import json
|
| 15 |
warnings.filterwarnings("ignore")
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 25 |
RATE_LIMIT_DELAY = 3
|
| 26 |
API_RETRY_COUNT = 3
|
| 27 |
|
| 28 |
+
IMAGE_MODELS = {
|
| 29 |
+
"Freepik Gemini 2.5 Flash": {
|
| 30 |
+
"name": "Freepik Gemini 2.5 Flash Image Preview",
|
| 31 |
+
"api": "freepik",
|
| 32 |
+
"url": "https://api.freepik.com/v1/ai/gemini-2-5-flash-image-preview",
|
| 33 |
+
"description": "π Best for competition - Real Gemini 2.5 Flash generation"
|
| 34 |
+
},
|
| 35 |
+
"OpenAI DALL-E 3": {
|
| 36 |
+
"name": "DALL-E 3",
|
| 37 |
+
"api": "openai",
|
| 38 |
+
"url": "https://api.openai.com/v1/images/generations",
|
| 39 |
+
"description": "π¨ High-quality creative image generation"
|
| 40 |
+
},
|
| 41 |
+
"Stable Diffusion XL": {
|
| 42 |
+
"name": "Stable Diffusion XL",
|
| 43 |
+
"api": "stabilityai",
|
| 44 |
+
"url": "https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
|
| 45 |
+
"description": "π Open-source high-resolution generation"
|
| 46 |
+
},
|
| 47 |
+
"Gemini Analysis + Demo": {
|
| 48 |
+
"name": "Gemini Analysis + Visual Demo",
|
| 49 |
+
"api": "gemini",
|
| 50 |
+
"url": None,
|
| 51 |
+
"description": "π Analysis + visual overlay (fallback mode)"
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
if GEMINI_API_KEY:
|
| 56 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 57 |
logger.info("Gemini API configured")
|
|
|
|
| 106 |
def _apply_rate_limiting(self):
|
| 107 |
time.sleep(RATE_LIMIT_DELAY)
|
| 108 |
|
| 109 |
+
def _generate_with_model(self, image, prompt, style, editing_mode, model_name, api_keys):
|
| 110 |
+
model_info = IMAGE_MODELS.get(model_name)
|
| 111 |
+
if not model_info:
|
| 112 |
+
return None, f"Unknown model: {model_name}"
|
| 113 |
+
|
| 114 |
+
api_type = model_info["api"]
|
| 115 |
+
|
| 116 |
+
if api_type == "freepik":
|
| 117 |
+
return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik"))
|
| 118 |
+
elif api_type == "openai":
|
| 119 |
+
return self._generate_with_dalle(image, prompt, style, editing_mode, api_keys.get("openai"))
|
| 120 |
+
elif api_type == "stabilityai":
|
| 121 |
+
return self._generate_with_stable_diffusion(image, prompt, style, editing_mode, api_keys.get("stabilityai"))
|
| 122 |
+
elif api_type == "gemini":
|
| 123 |
+
return self._generate_with_gemini_demo(image, prompt, style, editing_mode, api_keys.get("gemini"))
|
| 124 |
+
else:
|
| 125 |
+
return None, f"Unsupported API type: {api_type}"
|
| 126 |
+
|
| 127 |
+
def _generate_with_freepik(self, image, prompt, style, editing_mode, api_key):
|
| 128 |
+
if not api_key:
|
| 129 |
+
return None, "Freepik API key not provided"
|
| 130 |
|
| 131 |
try:
|
| 132 |
buffered = io.BytesIO()
|
| 133 |
image.save(buffered, format='PNG')
|
| 134 |
image_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 135 |
|
| 136 |
+
full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
url = "https://api.freepik.com/v1/ai/gemini-2-5-flash-image-preview"
|
| 139 |
payload = {
|
|
|
|
| 142 |
"webhook_url": None
|
| 143 |
}
|
| 144 |
headers = {
|
| 145 |
+
"x-freepik-api-key": api_key,
|
| 146 |
"Content-Type": "application/json"
|
| 147 |
}
|
| 148 |
|
|
|
|
| 154 |
img_response = requests.get(result['image_url'], timeout=30)
|
| 155 |
if img_response.status_code == 200:
|
| 156 |
generated_image = Image.open(io.BytesIO(img_response.content))
|
| 157 |
+
return generated_image, "π Generated with Freepik Gemini 2.5 Flash"
|
| 158 |
|
| 159 |
return None, f"Freepik API error: {response.status_code}"
|
| 160 |
|
| 161 |
except Exception as e:
|
| 162 |
logger.error(f"Freepik generation failed: {e}")
|
| 163 |
return None, f"Freepik error: {str(e)}"
|
| 164 |
+
|
| 165 |
+
def _generate_with_dalle(self, image, prompt, style, editing_mode, api_key):
|
| 166 |
+
if not api_key:
|
| 167 |
+
return None, "OpenAI API key not provided"
|
| 168 |
+
|
| 169 |
+
try:
|
| 170 |
+
full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
|
| 171 |
+
|
| 172 |
+
headers = {
|
| 173 |
+
"Authorization": f"Bearer {api_key}",
|
| 174 |
+
"Content-Type": "application/json"
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
payload = {
|
| 178 |
+
"model": "dall-e-3",
|
| 179 |
+
"prompt": full_prompt,
|
| 180 |
+
"n": 1,
|
| 181 |
+
"size": "1024x1024"
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
response = requests.post(
|
| 185 |
+
"https://api.openai.com/v1/images/generations",
|
| 186 |
+
headers=headers,
|
| 187 |
+
json=payload,
|
| 188 |
+
timeout=60
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
if response.status_code == 200:
|
| 192 |
+
result = response.json()
|
| 193 |
+
if result.get('data') and len(result['data']) > 0:
|
| 194 |
+
image_url = result['data'][0]['url']
|
| 195 |
+
img_response = requests.get(image_url, timeout=30)
|
| 196 |
+
if img_response.status_code == 200:
|
| 197 |
+
generated_image = Image.open(io.BytesIO(img_response.content))
|
| 198 |
+
return generated_image, "π¨ Generated with DALL-E 3"
|
| 199 |
+
|
| 200 |
+
return None, f"DALL-E API error: {response.status_code}"
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
logger.error(f"DALL-E generation failed: {e}")
|
| 204 |
+
return None, f"DALL-E error: {str(e)}"
|
| 205 |
+
|
| 206 |
+
def _generate_with_stable_diffusion(self, image, prompt, style, editing_mode, api_key):
|
| 207 |
+
if not api_key:
|
| 208 |
+
return None, "Stability AI API key not provided"
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
|
| 212 |
+
|
| 213 |
+
headers = {
|
| 214 |
+
"Authorization": f"Bearer {api_key}",
|
| 215 |
+
"Content-Type": "application/json"
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
payload = {
|
| 219 |
+
"text_prompts": [{"text": full_prompt}],
|
| 220 |
+
"cfg_scale": 7,
|
| 221 |
+
"height": 1024,
|
| 222 |
+
"width": 1024,
|
| 223 |
+
"samples": 1,
|
| 224 |
+
"steps": 30
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
response = requests.post(
|
| 228 |
+
"https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
|
| 229 |
+
headers=headers,
|
| 230 |
+
json=payload,
|
| 231 |
+
timeout=60
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
if response.status_code == 200:
|
| 235 |
+
result = response.json()
|
| 236 |
+
if result.get('artifacts') and len(result['artifacts']) > 0:
|
| 237 |
+
image_b64 = result['artifacts'][0]['base64']
|
| 238 |
+
image_data = base64.b64decode(image_b64)
|
| 239 |
+
generated_image = Image.open(io.BytesIO(image_data))
|
| 240 |
+
return generated_image, "π Generated with Stable Diffusion XL"
|
| 241 |
+
|
| 242 |
+
return None, f"Stability AI error: {response.status_code}"
|
| 243 |
+
|
| 244 |
+
except Exception as e:
|
| 245 |
+
logger.error(f"Stable Diffusion generation failed: {e}")
|
| 246 |
+
return None, f"Stable Diffusion error: {str(e)}"
|
| 247 |
+
|
| 248 |
+
def _generate_with_gemini_demo(self, image, prompt, style, editing_mode, api_key):
|
| 249 |
+
if not api_key:
|
| 250 |
+
try:
|
| 251 |
+
genai.configure(api_key=api_key)
|
| 252 |
+
except:
|
| 253 |
+
return None, "Gemini API key not provided or invalid"
|
| 254 |
+
|
| 255 |
+
return self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
|
| 256 |
+
|
| 257 |
+
def _build_enhanced_prompt(self, prompt, style, editing_mode):
|
| 258 |
+
style_modifiers = {
|
| 259 |
+
"realistic": "photorealistic, high-quality construction, professional architecture",
|
| 260 |
+
"futuristic": "futuristic, high-tech, modern glass and steel, sci-fi architecture",
|
| 261 |
+
"artistic": "artistic, creative design, unique architecture, colorful and innovative"
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
mode_descriptions = {
|
| 265 |
+
"complete": "Complete this unfinished construction",
|
| 266 |
+
"edit": "Edit and transform this construction image",
|
| 267 |
+
"blend": "Blend and reimagine this construction"
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
base_desc = mode_descriptions.get(editing_mode, 'Transform')
|
| 271 |
+
style_desc = style_modifiers.get(style, '')
|
| 272 |
+
|
| 273 |
+
return f"{base_desc} {prompt}. Style: {style_desc}. Make it look professional and realistic. Architecture, construction, building."
|
| 274 |
|
| 275 |
def load_yolo_optional(self):
|
| 276 |
if not yolo_available:
|
|
|
|
| 296 |
except Exception as e:
|
| 297 |
return image, f"Detection failed: {str(e)}"
|
| 298 |
|
| 299 |
+
def nano_banana_edit_with_model(self, image, prompt, style="realistic", editing_mode="complete", model_name="Freepik Gemini 2.5 Flash", api_keys=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
if not prompt.strip():
|
| 301 |
return image, "Please provide a transformation prompt"
|
| 302 |
|
| 303 |
+
if api_keys is None:
|
| 304 |
+
api_keys = {}
|
| 305 |
+
|
| 306 |
+
logger.info(f"Processing with {model_name}: {editing_mode} mode, {style} style, prompt: {prompt[:50]}...")
|
| 307 |
|
| 308 |
try:
|
| 309 |
image = self._resize_image_if_needed(image)
|
| 310 |
self._apply_rate_limiting()
|
| 311 |
|
| 312 |
+
result, message = self._generate_with_model(image, prompt, style, editing_mode, model_name, api_keys)
|
| 313 |
+
if result is not None:
|
| 314 |
+
return result, message
|
| 315 |
+
else:
|
| 316 |
+
logger.warning(f"{model_name} failed: {message}, falling back to demo mode")
|
| 317 |
+
return self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
|
| 318 |
+
|
| 319 |
+
except Exception as e:
|
| 320 |
+
logger.error(f"Processing failed: {e}")
|
| 321 |
+
return self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
|
| 322 |
+
|
| 323 |
+
def _fallback_to_gemini_demo(self, image, prompt, style, editing_mode):
|
| 324 |
+
if not self.gemini_model:
|
| 325 |
+
demo_result = self._create_nano_banana_demo(image, f"Demo: {editing_mode} mode with {style} style", style, editing_mode)
|
| 326 |
+
return demo_result, f"π Demo Mode: {editing_mode} mode with {style} style"
|
| 327 |
+
|
| 328 |
+
try:
|
| 329 |
if editing_mode == "complete":
|
| 330 |
base_prompt = self._get_completion_prompt(style)
|
| 331 |
analysis_prompt = f"Analyze this construction image and describe how to {base_prompt.lower()} User request: {prompt}. Provide detailed description of the completed construction."
|
|
|
|
| 341 |
buffered = io.BytesIO()
|
| 342 |
image.save(buffered, format='PNG', quality=85)
|
| 343 |
image_bytes = buffered.getvalue()
|
| 344 |
+
|
| 345 |
if len(image_bytes) > 10 * 1024 * 1024:
|
| 346 |
return image, "Image too large. Please use a smaller image."
|
| 347 |
|
|
|
|
| 468 |
|
| 469 |
app = NanoBananaApp()
|
| 470 |
|
| 471 |
+
def update_model_description(model_name):
|
| 472 |
+
return IMAGE_MODELS.get(model_name, {}).get("description", "Model description not available")
|
| 473 |
+
|
| 474 |
+
def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_name, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice):
|
| 475 |
+
if not image:
|
| 476 |
+
return None, None, None, None, "π· Please upload an image to get started", None
|
| 477 |
+
|
| 478 |
+
if not prompt or not prompt.strip():
|
| 479 |
+
return image, image, image, None, "π Please provide a transformation prompt", None
|
| 480 |
+
|
| 481 |
+
user_api_keys = {
|
| 482 |
+
"freepik": freepik_key or FREEPIK_API_KEY,
|
| 483 |
+
"openai": openai_key,
|
| 484 |
+
"stabilityai": stability_key,
|
| 485 |
+
"gemini": gemini_key or GEMINI_API_KEY
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
try:
|
| 489 |
+
detection_result = image
|
| 490 |
+
detection_msg = "Detection disabled"
|
| 491 |
+
|
| 492 |
+
if enable_detection:
|
| 493 |
+
detection_result, detection_msg = app.detect_structures_optional(image)
|
| 494 |
+
|
| 495 |
+
processed_image, process_msg = app.nano_banana_edit_with_model(
|
| 496 |
+
image, prompt, style, editing_mode, model_name, user_api_keys
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
if processed_image == image and "API key" in process_msg:
|
| 500 |
+
return image, detection_result, image, None, f"π {process_msg}", None
|
| 501 |
+
|
| 502 |
+
comparison = app.create_comparison(image, processed_image)
|
| 503 |
+
|
| 504 |
+
audio = None
|
| 505 |
+
voice_msg = ""
|
| 506 |
+
if enable_voice:
|
| 507 |
+
if processed_image != image:
|
| 508 |
+
voice_text = f"Using {model_name}, the AI processed this construction image with {editing_mode} mode and {style} style. Request: {prompt}"
|
| 509 |
+
audio = app.generate_voice_optional(voice_text)
|
| 510 |
+
voice_msg = "π Voice generated" if audio else "π Voice unavailable"
|
| 511 |
+
else:
|
| 512 |
+
voice_msg = "π Voice skipped (no changes)"
|
| 513 |
+
|
| 514 |
+
status = f"π {process_msg}\nπ Detection: {detection_msg}\nπ΅ Voice: {voice_msg}"
|
| 515 |
+
return image, detection_result, processed_image, comparison, status, audio
|
| 516 |
+
|
| 517 |
+
except Exception as e:
|
| 518 |
+
logger.error(f"Processing error: {e}")
|
| 519 |
+
return image, image, image, None, f"β Error: {str(e)}", None
|
| 520 |
+
|
| 521 |
def process_nano_banana(image, prompt, style, editing_mode, enable_detection, enable_voice):
|
| 522 |
if not image:
|
| 523 |
return None, None, None, None, "π· Please upload an image to get started", None
|
|
|
|
| 632 |
value="edit",
|
| 633 |
label="Nano Banana Mode",
|
| 634 |
info="Complete: Finish construction β’ Edit: Modify image β’ Blend: Fuse elements"
|
| 635 |
+
)
|
| 636 |
+
|
| 637 |
+
style_selector = gr.Radio(
|
| 638 |
+
choices=["realistic", "futuristic", "artistic"],
|
| 639 |
+
value="realistic",
|
| 640 |
label="Style",
|
| 641 |
info="Choose the aesthetic approach"
|
| 642 |
)
|
| 643 |
|
| 644 |
+
with gr.Group():
|
| 645 |
+
gr.Markdown("### π€ AI Model & API Settings")
|
| 646 |
+
model_selector = gr.Dropdown(
|
| 647 |
+
choices=list(IMAGE_MODELS.keys()),
|
| 648 |
+
value="Freepik Gemini 2.5 Flash",
|
| 649 |
+
label="Image Generation Model",
|
| 650 |
+
info="Choose your preferred AI model"
|
| 651 |
+
)
|
| 652 |
+
model_description = gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"])
|
| 653 |
+
|
| 654 |
+
with gr.Accordion("π API Keys (Optional - Use Your Own)", open=False):
|
| 655 |
+
freepik_key = gr.Textbox(
|
| 656 |
+
label="Freepik API Key",
|
| 657 |
+
placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation",
|
| 658 |
+
type="password"
|
| 659 |
+
)
|
| 660 |
+
openai_key = gr.Textbox(
|
| 661 |
+
label="OpenAI API Key",
|
| 662 |
+
placeholder="Enter your OpenAI API key for DALL-E 3",
|
| 663 |
+
type="password"
|
| 664 |
+
)
|
| 665 |
+
stability_key = gr.Textbox(
|
| 666 |
+
label="Stability AI API Key",
|
| 667 |
+
placeholder="Enter your Stability AI key for Stable Diffusion XL",
|
| 668 |
+
type="password"
|
| 669 |
+
)
|
| 670 |
+
gemini_key = gr.Textbox(
|
| 671 |
+
label="Gemini API Key",
|
| 672 |
+
placeholder="Enter your Gemini API key for analysis mode",
|
| 673 |
+
type="password"
|
| 674 |
+
)
|
| 675 |
+
|
| 676 |
with gr.Group():
|
| 677 |
gr.Markdown("### βοΈ Optional Features")
|
| 678 |
enable_detection = gr.Checkbox(
|
|
|
|
| 709 |
with gr.Row():
|
| 710 |
gr.Examples(
|
| 711 |
examples=[
|
| 712 |
+
["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "Freepik Gemini 2.5 Flash", "", "", "", "", False, False],
|
| 713 |
+
["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "OpenAI DALL-E 3", "", "", "", "", True, False],
|
| 714 |
+
["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "Stable Diffusion XL", "", "", "", "", False, True],
|
| 715 |
+
["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "Freepik Gemini 2.5 Flash", "", "", "", "", False, False],
|
| 716 |
+
["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "Gemini Analysis + Demo", "", "", "", "", True, False],
|
| 717 |
+
["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "OpenAI DALL-E 3", "", "", "", "", False, False],
|
| 718 |
+
["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "Stable Diffusion XL", "", "", "", "", False, False],
|
| 719 |
+
["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "Freepik Gemini 2.5 Flash", "", "", "", "", False, True]
|
| 720 |
],
|
| 721 |
+
inputs=[image_input, prompt_input, style_selector, editing_mode, model_selector, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice],
|
| 722 |
label="π― Try These Examples"
|
| 723 |
)
|
| 724 |
|
|
|
|
| 732 |
- **π Smart Fallbacks**: Multiple processing modes for reliability
|
| 733 |
""")
|
| 734 |
|
| 735 |
+
model_selector.change(
|
| 736 |
+
fn=update_model_description,
|
| 737 |
+
inputs=[model_selector],
|
| 738 |
+
outputs=[model_description]
|
| 739 |
+
)
|
| 740 |
+
|
| 741 |
process_btn.click(
|
| 742 |
+
fn=process_nano_banana_with_settings,
|
| 743 |
+
inputs=[image_input, prompt_input, style_selector, editing_mode, model_selector, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice],
|
| 744 |
outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
|
| 745 |
)
|
| 746 |
|