Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,25 +30,7 @@ IMAGE_MODELS = {
|
|
| 30 |
"name": "Freepik Gemini 2.5 Flash Image Preview",
|
| 31 |
"api": "freepik",
|
| 32 |
"url": "https://api.freepik.com/v1/ai/text-to-image",
|
| 33 |
-
"description": "π <strong>
|
| 34 |
-
},
|
| 35 |
-
"OpenAI DALL-E 3": {
|
| 36 |
-
"name": "DALL-E 3",
|
| 37 |
-
"api": "openai",
|
| 38 |
-
"url": "https://api.openai.com/v1/images/generations",
|
| 39 |
-
"description": "π¨ <strong>High-quality creative images</strong> - Excellent for artistic styles<br/>π³ <em>Requires OpenAI API key + billing account</em>"
|
| 40 |
-
},
|
| 41 |
-
"Stable Diffusion XL": {
|
| 42 |
-
"name": "Stable Diffusion XL",
|
| 43 |
-
"api": "stabilityai",
|
| 44 |
-
"url": "https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
|
| 45 |
-
"description": "π <strong>Open-source powerhouse</strong> - High-resolution generation<br/>π° <em>Requires Stability AI credits + API key</em>"
|
| 46 |
-
},
|
| 47 |
-
"Gemini Analysis + Demo": {
|
| 48 |
-
"name": "Gemini Analysis + Visual Demo",
|
| 49 |
-
"api": "gemini",
|
| 50 |
-
"url": None,
|
| 51 |
-
"description": "π <strong>Smart fallback</strong> - AI analysis + visual demo overlay<br/>β
<em>Works with basic Gemini key or demo mode</em>"
|
| 52 |
}
|
| 53 |
}
|
| 54 |
|
|
@@ -118,12 +100,6 @@ class NanoBananaApp:
|
|
| 118 |
|
| 119 |
if api_type == "freepik":
|
| 120 |
return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik"))
|
| 121 |
-
elif api_type == "openai":
|
| 122 |
-
return self._generate_with_dalle(image, prompt, style, editing_mode, api_keys.get("openai"))
|
| 123 |
-
elif api_type == "stabilityai":
|
| 124 |
-
return self._generate_with_stable_diffusion(image, prompt, style, editing_mode, api_keys.get("stabilityai"))
|
| 125 |
-
elif api_type == "gemini":
|
| 126 |
-
return self._generate_with_gemini_demo(image, prompt, style, editing_mode, api_keys.get("gemini"))
|
| 127 |
else:
|
| 128 |
return None, f"Unsupported API type: {api_type}"
|
| 129 |
|
|
@@ -223,105 +199,6 @@ class NanoBananaApp:
|
|
| 223 |
logger.error(f"Freepik generation failed: {e}")
|
| 224 |
return None, f"Freepik error: {str(e)}"
|
| 225 |
|
| 226 |
-
def _generate_with_dalle(self, image, prompt, style, editing_mode, api_key):
|
| 227 |
-
if not api_key:
|
| 228 |
-
return None, "OpenAI API key not provided"
|
| 229 |
-
|
| 230 |
-
try:
|
| 231 |
-
full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
|
| 232 |
-
|
| 233 |
-
headers = {
|
| 234 |
-
"Authorization": f"Bearer {api_key}",
|
| 235 |
-
"Content-Type": "application/json"
|
| 236 |
-
}
|
| 237 |
-
|
| 238 |
-
payload = {
|
| 239 |
-
"model": "dall-e-3",
|
| 240 |
-
"prompt": full_prompt,
|
| 241 |
-
"n": 1,
|
| 242 |
-
"size": "1024x1024"
|
| 243 |
-
}
|
| 244 |
-
|
| 245 |
-
response = requests.post(
|
| 246 |
-
"https://api.openai.com/v1/images/generations",
|
| 247 |
-
headers=headers,
|
| 248 |
-
json=payload,
|
| 249 |
-
timeout=60
|
| 250 |
-
)
|
| 251 |
-
|
| 252 |
-
if response.status_code == 200:
|
| 253 |
-
result = response.json()
|
| 254 |
-
if result.get('data') and len(result['data']) > 0:
|
| 255 |
-
image_url = result['data'][0]['url']
|
| 256 |
-
img_response = requests.get(image_url, timeout=30)
|
| 257 |
-
if img_response.status_code == 200:
|
| 258 |
-
generated_image = Image.open(io.BytesIO(img_response.content))
|
| 259 |
-
return generated_image, "π¨ Generated with DALL-E 3"
|
| 260 |
-
|
| 261 |
-
return None, f"DALL-E API error: {response.status_code}"
|
| 262 |
-
|
| 263 |
-
except Exception as e:
|
| 264 |
-
logger.error(f"DALL-E generation failed: {e}")
|
| 265 |
-
return None, f"DALL-E error: {str(e)}"
|
| 266 |
-
|
| 267 |
-
def _generate_with_stable_diffusion(self, image, prompt, style, editing_mode, api_key):
|
| 268 |
-
if not api_key:
|
| 269 |
-
return None, "Stability AI API key not provided"
|
| 270 |
-
|
| 271 |
-
try:
|
| 272 |
-
full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
|
| 273 |
-
|
| 274 |
-
headers = {
|
| 275 |
-
"Authorization": f"Bearer {api_key}",
|
| 276 |
-
"Content-Type": "application/json"
|
| 277 |
-
}
|
| 278 |
-
|
| 279 |
-
payload = {
|
| 280 |
-
"text_prompts": [{"text": full_prompt}],
|
| 281 |
-
"cfg_scale": 7,
|
| 282 |
-
"height": 1024,
|
| 283 |
-
"width": 1024,
|
| 284 |
-
"samples": 1,
|
| 285 |
-
"steps": 30
|
| 286 |
-
}
|
| 287 |
-
|
| 288 |
-
response = requests.post(
|
| 289 |
-
"https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
|
| 290 |
-
headers=headers,
|
| 291 |
-
json=payload,
|
| 292 |
-
timeout=60
|
| 293 |
-
)
|
| 294 |
-
|
| 295 |
-
if response.status_code == 200:
|
| 296 |
-
result = response.json()
|
| 297 |
-
if result.get('artifacts') and len(result['artifacts']) > 0:
|
| 298 |
-
image_b64 = result['artifacts'][0]['base64']
|
| 299 |
-
image_data = base64.b64decode(image_b64)
|
| 300 |
-
generated_image = Image.open(io.BytesIO(image_data))
|
| 301 |
-
return generated_image, "π Generated with Stable Diffusion XL"
|
| 302 |
-
|
| 303 |
-
return None, f"Stability AI error: {response.status_code}"
|
| 304 |
-
|
| 305 |
-
except Exception as e:
|
| 306 |
-
logger.error(f"Stable Diffusion generation failed: {e}")
|
| 307 |
-
return None, f"Stable Diffusion error: {str(e)}"
|
| 308 |
-
|
| 309 |
-
def _generate_with_gemini_demo(self, image, prompt, style, editing_mode, api_key):
|
| 310 |
-
if api_key:
|
| 311 |
-
try:
|
| 312 |
-
# Temporarily configure with user's key
|
| 313 |
-
original_key = GEMINI_API_KEY
|
| 314 |
-
genai.configure(api_key=api_key)
|
| 315 |
-
result = self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
|
| 316 |
-
# Restore original key
|
| 317 |
-
if original_key:
|
| 318 |
-
genai.configure(api_key=original_key)
|
| 319 |
-
return result
|
| 320 |
-
except Exception as e:
|
| 321 |
-
logger.warning(f"User Gemini key failed: {e}")
|
| 322 |
-
return None, f"Invalid Gemini API key: {str(e)}"
|
| 323 |
-
|
| 324 |
-
return self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
|
| 325 |
|
| 326 |
def _build_enhanced_prompt(self, prompt, style, editing_mode):
|
| 327 |
style_modifiers = {
|
|
@@ -407,9 +284,9 @@ class NanoBananaApp:
|
|
| 407 |
|
| 408 |
for attempt in range(API_RETRY_COUNT):
|
| 409 |
try:
|
| 410 |
-
|
| 411 |
image.save(buffered, format='PNG', quality=85)
|
| 412 |
-
|
| 413 |
|
| 414 |
if len(image_bytes) > 10 * 1024 * 1024:
|
| 415 |
return image, "Image too large. Please use a smaller image."
|
|
@@ -417,11 +294,11 @@ class NanoBananaApp:
|
|
| 417 |
try:
|
| 418 |
response = self.gemini_model.generate_content([
|
| 419 |
analysis_prompt,
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
except Exception as img_error:
|
| 426 |
if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error):
|
| 427 |
logger.warning("API key doesn't support image processing, using demo mode")
|
|
@@ -537,10 +414,8 @@ class NanoBananaApp:
|
|
| 537 |
|
| 538 |
app = NanoBananaApp()
|
| 539 |
|
| 540 |
-
def update_model_description(model_name):
|
| 541 |
-
return IMAGE_MODELS.get(model_name, {}).get("description", "Model description not available")
|
| 542 |
|
| 543 |
-
def
|
| 544 |
if not image:
|
| 545 |
return None, None, None, None, "π· Please upload an image to get started", None
|
| 546 |
|
|
@@ -548,10 +423,7 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
|
|
| 548 |
return image, image, image, None, "π Please provide a transformation prompt", None
|
| 549 |
|
| 550 |
user_api_keys = {
|
| 551 |
-
"freepik": freepik_key or FREEPIK_API_KEY
|
| 552 |
-
"openai": openai_key,
|
| 553 |
-
"stabilityai": stability_key,
|
| 554 |
-
"gemini": gemini_key or GEMINI_API_KEY
|
| 555 |
}
|
| 556 |
|
| 557 |
try:
|
|
@@ -561,12 +433,14 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
|
|
| 561 |
if enable_detection:
|
| 562 |
detection_result, detection_msg = app.detect_structures_optional(image)
|
| 563 |
|
|
|
|
| 564 |
processed_image, process_msg = app.nano_banana_edit_with_model(
|
| 565 |
-
image, prompt, style, editing_mode,
|
| 566 |
)
|
| 567 |
|
|
|
|
| 568 |
if processed_image == image and "API key" in process_msg:
|
| 569 |
-
|
| 570 |
|
| 571 |
comparison = app.create_comparison(image, processed_image)
|
| 572 |
|
|
@@ -574,7 +448,7 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
|
|
| 574 |
voice_msg = ""
|
| 575 |
if enable_voice:
|
| 576 |
if processed_image != image:
|
| 577 |
-
voice_text = f"Using
|
| 578 |
audio = app.generate_voice_optional(voice_text)
|
| 579 |
voice_msg = "π Voice generated" if audio else "π Voice unavailable"
|
| 580 |
else:
|
|
@@ -657,21 +531,13 @@ custom_css = """
|
|
| 657 |
"""
|
| 658 |
|
| 659 |
demo_mode_notice = ""
|
| 660 |
-
if not FREEPIK_API_KEY
|
| 661 |
-
demo_mode_notice = """
|
| 662 |
-
<div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;">
|
| 663 |
-
<h3>π API Keys Required</h3>
|
| 664 |
-
<p><strong>For best results:</strong> Add <strong>FREEPIK_API_KEY</strong> for real image generation</p>
|
| 665 |
-
<p><strong>For analysis:</strong> Add <strong>GEMINI_API_KEY</strong> from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p>
|
| 666 |
-
<p><em>Demo mode available without API keys</em></p>
|
| 667 |
-
</div>
|
| 668 |
-
"""
|
| 669 |
-
elif not FREEPIK_API_KEY:
|
| 670 |
demo_mode_notice = """
|
| 671 |
-
<div style="background: #
|
| 672 |
-
<h3>π
|
| 673 |
-
<p>Add <strong>FREEPIK_API_KEY</strong>
|
| 674 |
-
<p
|
|
|
|
| 675 |
</div>
|
| 676 |
"""
|
| 677 |
|
|
@@ -711,35 +577,15 @@ with gr.Blocks(title="π Nano Banana - Dynamic Image Creation", theme=gr.theme
|
|
| 711 |
)
|
| 712 |
|
| 713 |
with gr.Group():
|
| 714 |
-
gr.Markdown("###
|
| 715 |
-
|
| 716 |
-
choices=list(IMAGE_MODELS.keys()),
|
| 717 |
-
value="Freepik Gemini 2.5 Flash",
|
| 718 |
-
label="Image Generation Model",
|
| 719 |
-
info="Choose your preferred AI model"
|
| 720 |
-
)
|
| 721 |
-
model_description = gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"])
|
| 722 |
|
| 723 |
-
with gr.Accordion("π API
|
| 724 |
freepik_key = gr.Textbox(
|
| 725 |
label="Freepik API Key",
|
| 726 |
placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation",
|
| 727 |
-
type="password"
|
| 728 |
-
|
| 729 |
-
openai_key = gr.Textbox(
|
| 730 |
-
label="OpenAI API Key",
|
| 731 |
-
placeholder="Enter your OpenAI API key for DALL-E 3",
|
| 732 |
-
type="password"
|
| 733 |
-
)
|
| 734 |
-
stability_key = gr.Textbox(
|
| 735 |
-
label="Stability AI API Key",
|
| 736 |
-
placeholder="Enter your Stability AI key for Stable Diffusion XL",
|
| 737 |
-
type="password"
|
| 738 |
-
)
|
| 739 |
-
gemini_key = gr.Textbox(
|
| 740 |
-
label="Gemini API Key",
|
| 741 |
-
placeholder="Enter your Gemini API key for analysis mode",
|
| 742 |
-
type="password"
|
| 743 |
)
|
| 744 |
|
| 745 |
with gr.Group():
|
|
@@ -778,38 +624,32 @@ with gr.Blocks(title="π Nano Banana - Dynamic Image Creation", theme=gr.theme
|
|
| 778 |
with gr.Row():
|
| 779 |
gr.Examples(
|
| 780 |
examples=[
|
| 781 |
-
["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "
|
| 782 |
-
["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "
|
| 783 |
-
["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "
|
| 784 |
-
["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "
|
| 785 |
-
["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "
|
| 786 |
-
["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "
|
| 787 |
-
["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "
|
| 788 |
-
["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "
|
| 789 |
],
|
| 790 |
-
inputs=[image_input, prompt_input, style_selector, editing_mode,
|
| 791 |
label="π― Try These Examples"
|
| 792 |
)
|
| 793 |
|
| 794 |
gr.Markdown("""
|
| 795 |
-
### π Competition Features
|
| 796 |
-
- **π Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview
|
| 797 |
-
- **π¨ Word-Based Editing**: Transform images with natural language prompts
|
| 798 |
-
- **π Reality Blending**:
|
| 799 |
-
- **β‘ Real-time Processing**: Fast image transformations
|
| 800 |
- **π οΈ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs)
|
| 801 |
-
- **π Smart Fallbacks**:
|
| 802 |
""")
|
| 803 |
|
| 804 |
-
model_selector.change(
|
| 805 |
-
fn=update_model_description,
|
| 806 |
-
inputs=[model_selector],
|
| 807 |
-
outputs=[model_description]
|
| 808 |
-
)
|
| 809 |
-
|
| 810 |
process_btn.click(
|
| 811 |
-
fn=
|
| 812 |
-
inputs=[image_input, prompt_input, style_selector, editing_mode,
|
| 813 |
outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
|
| 814 |
)
|
| 815 |
|
|
|
|
| 30 |
"name": "Freepik Gemini 2.5 Flash Image Preview",
|
| 31 |
"api": "freepik",
|
| 32 |
"url": "https://api.freepik.com/v1/ai/text-to-image",
|
| 33 |
+
"description": "π <strong>Nano Banana Competition Model</strong> - Real Gemini 2.5 Flash generation<br/>β οΈ <em>Requires paid Freepik subscription + valid API key</em>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
}
|
| 36 |
|
|
|
|
| 100 |
|
| 101 |
if api_type == "freepik":
|
| 102 |
return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
else:
|
| 104 |
return None, f"Unsupported API type: {api_type}"
|
| 105 |
|
|
|
|
| 199 |
logger.error(f"Freepik generation failed: {e}")
|
| 200 |
return None, f"Freepik error: {str(e)}"
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
def _build_enhanced_prompt(self, prompt, style, editing_mode):
|
| 204 |
style_modifiers = {
|
|
|
|
| 284 |
|
| 285 |
for attempt in range(API_RETRY_COUNT):
|
| 286 |
try:
|
| 287 |
+
buffered = io.BytesIO()
|
| 288 |
image.save(buffered, format='PNG', quality=85)
|
| 289 |
+
image_bytes = buffered.getvalue()
|
| 290 |
|
| 291 |
if len(image_bytes) > 10 * 1024 * 1024:
|
| 292 |
return image, "Image too large. Please use a smaller image."
|
|
|
|
| 294 |
try:
|
| 295 |
response = self.gemini_model.generate_content([
|
| 296 |
analysis_prompt,
|
| 297 |
+
{
|
| 298 |
+
'mime_type': 'image/png',
|
| 299 |
+
'data': base64.b64encode(image_bytes).decode('utf-8')
|
| 300 |
+
}
|
| 301 |
+
])
|
| 302 |
except Exception as img_error:
|
| 303 |
if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error):
|
| 304 |
logger.warning("API key doesn't support image processing, using demo mode")
|
|
|
|
| 414 |
|
| 415 |
app = NanoBananaApp()
|
| 416 |
|
|
|
|
|
|
|
| 417 |
|
| 418 |
+
def process_nano_banana_with_freepik(image, prompt, style, editing_mode, freepik_key, enable_detection, enable_voice):
|
| 419 |
if not image:
|
| 420 |
return None, None, None, None, "π· Please upload an image to get started", None
|
| 421 |
|
|
|
|
| 423 |
return image, image, image, None, "π Please provide a transformation prompt", None
|
| 424 |
|
| 425 |
user_api_keys = {
|
| 426 |
+
"freepik": freepik_key or FREEPIK_API_KEY
|
|
|
|
|
|
|
|
|
|
| 427 |
}
|
| 428 |
|
| 429 |
try:
|
|
|
|
| 433 |
if enable_detection:
|
| 434 |
detection_result, detection_msg = app.detect_structures_optional(image)
|
| 435 |
|
| 436 |
+
# Try Freepik first, fallback to demo mode if needed
|
| 437 |
processed_image, process_msg = app.nano_banana_edit_with_model(
|
| 438 |
+
image, prompt, style, editing_mode, "Freepik Gemini 2.5 Flash", user_api_keys
|
| 439 |
)
|
| 440 |
|
| 441 |
+
# If Freepik fails, fallback to Gemini demo mode
|
| 442 |
if processed_image == image and "API key" in process_msg:
|
| 443 |
+
processed_image, process_msg = app._fallback_to_gemini_demo(image, prompt, style, editing_mode)
|
| 444 |
|
| 445 |
comparison = app.create_comparison(image, processed_image)
|
| 446 |
|
|
|
|
| 448 |
voice_msg = ""
|
| 449 |
if enable_voice:
|
| 450 |
if processed_image != image:
|
| 451 |
+
voice_text = f"Using Gemini 2.5 Flash, the AI processed this construction image with {editing_mode} mode and {style} style. Request: {prompt}"
|
| 452 |
audio = app.generate_voice_optional(voice_text)
|
| 453 |
voice_msg = "π Voice generated" if audio else "π Voice unavailable"
|
| 454 |
else:
|
|
|
|
| 531 |
"""
|
| 532 |
|
| 533 |
demo_mode_notice = ""
|
| 534 |
+
if not FREEPIK_API_KEY:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
demo_mode_notice = """
|
| 536 |
+
<div style="background: #fff3e0; border: 1px solid #ff9800; border-radius: 8px; padding: 15px; margin: 10px 0;">
|
| 537 |
+
<h3>π Nano Banana - Competition Mode</h3>
|
| 538 |
+
<p><strong>For real image generation:</strong> Add your <strong>FREEPIK_API_KEY</strong> in the API settings below</p>
|
| 539 |
+
<p>Get your key from: <a href="https://www.freepik.com/api" target="_blank">Freepik API Portal</a></p>
|
| 540 |
+
<p><em>Demo mode with visual overlay available without API key</em></p>
|
| 541 |
</div>
|
| 542 |
"""
|
| 543 |
|
|
|
|
| 577 |
)
|
| 578 |
|
| 579 |
with gr.Group():
|
| 580 |
+
gr.Markdown("### π Nano Banana - Gemini 2.5 Flash")
|
| 581 |
+
gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
|
| 583 |
+
with gr.Accordion("π Freepik API Key (Required for Image Generation)", open=True):
|
| 584 |
freepik_key = gr.Textbox(
|
| 585 |
label="Freepik API Key",
|
| 586 |
placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation",
|
| 587 |
+
type="password",
|
| 588 |
+
info="Get your key from: https://www.freepik.com/api"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
)
|
| 590 |
|
| 591 |
with gr.Group():
|
|
|
|
| 624 |
with gr.Row():
|
| 625 |
gr.Examples(
|
| 626 |
examples=[
|
| 627 |
+
["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "", False, False],
|
| 628 |
+
["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "", True, False],
|
| 629 |
+
["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "", False, True],
|
| 630 |
+
["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "", False, False],
|
| 631 |
+
["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "", True, False],
|
| 632 |
+
["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "", False, False],
|
| 633 |
+
["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "", False, False],
|
| 634 |
+
["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "", False, True]
|
| 635 |
],
|
| 636 |
+
inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice],
|
| 637 |
label="π― Try These Examples"
|
| 638 |
)
|
| 639 |
|
| 640 |
gr.Markdown("""
|
| 641 |
+
### π Competition Features - Gemini 2.5 Flash
|
| 642 |
+
- **π Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview - the official competition model
|
| 643 |
+
- **π¨ Word-Based Editing**: Transform construction images with natural language prompts
|
| 644 |
+
- **π Reality Blending**: Complete unfinished buildings, edit existing structures, blend architectural elements
|
| 645 |
+
- **β‘ Real-time Processing**: Fast image transformations powered by Gemini 2.5 Flash
|
| 646 |
- **π οΈ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs)
|
| 647 |
+
- **π Smart Fallbacks**: Demo mode with visual overlays when API key not available
|
| 648 |
""")
|
| 649 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
process_btn.click(
|
| 651 |
+
fn=process_nano_banana_with_freepik,
|
| 652 |
+
inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice],
|
| 653 |
outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
|
| 654 |
)
|
| 655 |
|