Abs6187 commited on
Commit
7a259c7
Β·
verified Β·
1 Parent(s): cf991a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -203
app.py CHANGED
@@ -30,25 +30,7 @@ IMAGE_MODELS = {
30
  "name": "Freepik Gemini 2.5 Flash Image Preview",
31
  "api": "freepik",
32
  "url": "https://api.freepik.com/v1/ai/text-to-image",
33
- "description": "🍌 <strong>Best for competition</strong> - Real Gemini 2.5 Flash generation<br/>⚠️ <em>Requires paid Freepik subscription + valid API key</em>"
34
- },
35
- "OpenAI DALL-E 3": {
36
- "name": "DALL-E 3",
37
- "api": "openai",
38
- "url": "https://api.openai.com/v1/images/generations",
39
- "description": "🎨 <strong>High-quality creative images</strong> - Excellent for artistic styles<br/>πŸ’³ <em>Requires OpenAI API key + billing account</em>"
40
- },
41
- "Stable Diffusion XL": {
42
- "name": "Stable Diffusion XL",
43
- "api": "stabilityai",
44
- "url": "https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
45
- "description": "πŸš€ <strong>Open-source powerhouse</strong> - High-resolution generation<br/>πŸ’° <em>Requires Stability AI credits + API key</em>"
46
- },
47
- "Gemini Analysis + Demo": {
48
- "name": "Gemini Analysis + Visual Demo",
49
- "api": "gemini",
50
- "url": None,
51
- "description": "πŸ“Š <strong>Smart fallback</strong> - AI analysis + visual demo overlay<br/>βœ… <em>Works with basic Gemini key or demo mode</em>"
52
  }
53
  }
54
 
@@ -118,12 +100,6 @@ class NanoBananaApp:
118
 
119
  if api_type == "freepik":
120
  return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik"))
121
- elif api_type == "openai":
122
- return self._generate_with_dalle(image, prompt, style, editing_mode, api_keys.get("openai"))
123
- elif api_type == "stabilityai":
124
- return self._generate_with_stable_diffusion(image, prompt, style, editing_mode, api_keys.get("stabilityai"))
125
- elif api_type == "gemini":
126
- return self._generate_with_gemini_demo(image, prompt, style, editing_mode, api_keys.get("gemini"))
127
  else:
128
  return None, f"Unsupported API type: {api_type}"
129
 
@@ -223,105 +199,6 @@ class NanoBananaApp:
223
  logger.error(f"Freepik generation failed: {e}")
224
  return None, f"Freepik error: {str(e)}"
225
 
226
- def _generate_with_dalle(self, image, prompt, style, editing_mode, api_key):
227
- if not api_key:
228
- return None, "OpenAI API key not provided"
229
-
230
- try:
231
- full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
232
-
233
- headers = {
234
- "Authorization": f"Bearer {api_key}",
235
- "Content-Type": "application/json"
236
- }
237
-
238
- payload = {
239
- "model": "dall-e-3",
240
- "prompt": full_prompt,
241
- "n": 1,
242
- "size": "1024x1024"
243
- }
244
-
245
- response = requests.post(
246
- "https://api.openai.com/v1/images/generations",
247
- headers=headers,
248
- json=payload,
249
- timeout=60
250
- )
251
-
252
- if response.status_code == 200:
253
- result = response.json()
254
- if result.get('data') and len(result['data']) > 0:
255
- image_url = result['data'][0]['url']
256
- img_response = requests.get(image_url, timeout=30)
257
- if img_response.status_code == 200:
258
- generated_image = Image.open(io.BytesIO(img_response.content))
259
- return generated_image, "🎨 Generated with DALL-E 3"
260
-
261
- return None, f"DALL-E API error: {response.status_code}"
262
-
263
- except Exception as e:
264
- logger.error(f"DALL-E generation failed: {e}")
265
- return None, f"DALL-E error: {str(e)}"
266
-
267
- def _generate_with_stable_diffusion(self, image, prompt, style, editing_mode, api_key):
268
- if not api_key:
269
- return None, "Stability AI API key not provided"
270
-
271
- try:
272
- full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
273
-
274
- headers = {
275
- "Authorization": f"Bearer {api_key}",
276
- "Content-Type": "application/json"
277
- }
278
-
279
- payload = {
280
- "text_prompts": [{"text": full_prompt}],
281
- "cfg_scale": 7,
282
- "height": 1024,
283
- "width": 1024,
284
- "samples": 1,
285
- "steps": 30
286
- }
287
-
288
- response = requests.post(
289
- "https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
290
- headers=headers,
291
- json=payload,
292
- timeout=60
293
- )
294
-
295
- if response.status_code == 200:
296
- result = response.json()
297
- if result.get('artifacts') and len(result['artifacts']) > 0:
298
- image_b64 = result['artifacts'][0]['base64']
299
- image_data = base64.b64decode(image_b64)
300
- generated_image = Image.open(io.BytesIO(image_data))
301
- return generated_image, "πŸš€ Generated with Stable Diffusion XL"
302
-
303
- return None, f"Stability AI error: {response.status_code}"
304
-
305
- except Exception as e:
306
- logger.error(f"Stable Diffusion generation failed: {e}")
307
- return None, f"Stable Diffusion error: {str(e)}"
308
-
309
- def _generate_with_gemini_demo(self, image, prompt, style, editing_mode, api_key):
310
- if api_key:
311
- try:
312
- # Temporarily configure with user's key
313
- original_key = GEMINI_API_KEY
314
- genai.configure(api_key=api_key)
315
- result = self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
316
- # Restore original key
317
- if original_key:
318
- genai.configure(api_key=original_key)
319
- return result
320
- except Exception as e:
321
- logger.warning(f"User Gemini key failed: {e}")
322
- return None, f"Invalid Gemini API key: {str(e)}"
323
-
324
- return self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
325
 
326
  def _build_enhanced_prompt(self, prompt, style, editing_mode):
327
  style_modifiers = {
@@ -407,9 +284,9 @@ class NanoBananaApp:
407
 
408
  for attempt in range(API_RETRY_COUNT):
409
  try:
410
- buffered = io.BytesIO()
411
  image.save(buffered, format='PNG', quality=85)
412
- image_bytes = buffered.getvalue()
413
 
414
  if len(image_bytes) > 10 * 1024 * 1024:
415
  return image, "Image too large. Please use a smaller image."
@@ -417,11 +294,11 @@ class NanoBananaApp:
417
  try:
418
  response = self.gemini_model.generate_content([
419
  analysis_prompt,
420
- {
421
- 'mime_type': 'image/png',
422
- 'data': base64.b64encode(image_bytes).decode('utf-8')
423
- }
424
- ])
425
  except Exception as img_error:
426
  if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error):
427
  logger.warning("API key doesn't support image processing, using demo mode")
@@ -537,10 +414,8 @@ class NanoBananaApp:
537
 
538
  app = NanoBananaApp()
539
 
540
- def update_model_description(model_name):
541
- return IMAGE_MODELS.get(model_name, {}).get("description", "Model description not available")
542
 
543
- def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_name, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice):
544
  if not image:
545
  return None, None, None, None, "πŸ“· Please upload an image to get started", None
546
 
@@ -548,10 +423,7 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
548
  return image, image, image, None, "πŸ’­ Please provide a transformation prompt", None
549
 
550
  user_api_keys = {
551
- "freepik": freepik_key or FREEPIK_API_KEY,
552
- "openai": openai_key,
553
- "stabilityai": stability_key,
554
- "gemini": gemini_key or GEMINI_API_KEY
555
  }
556
 
557
  try:
@@ -561,12 +433,14 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
561
  if enable_detection:
562
  detection_result, detection_msg = app.detect_structures_optional(image)
563
 
 
564
  processed_image, process_msg = app.nano_banana_edit_with_model(
565
- image, prompt, style, editing_mode, model_name, user_api_keys
566
  )
567
 
 
568
  if processed_image == image and "API key" in process_msg:
569
- return image, detection_result, image, None, f"πŸ”‘ {process_msg}", None
570
 
571
  comparison = app.create_comparison(image, processed_image)
572
 
@@ -574,7 +448,7 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
574
  voice_msg = ""
575
  if enable_voice:
576
  if processed_image != image:
577
- voice_text = f"Using {model_name}, the AI processed this construction image with {editing_mode} mode and {style} style. Request: {prompt}"
578
  audio = app.generate_voice_optional(voice_text)
579
  voice_msg = "πŸ”Š Voice generated" if audio else "πŸ”‡ Voice unavailable"
580
  else:
@@ -657,21 +531,13 @@ custom_css = """
657
  """
658
 
659
  demo_mode_notice = ""
660
- if not FREEPIK_API_KEY and not GEMINI_API_KEY:
661
- demo_mode_notice = """
662
- <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;">
663
- <h3>πŸ”‘ API Keys Required</h3>
664
- <p><strong>For best results:</strong> Add <strong>FREEPIK_API_KEY</strong> for real image generation</p>
665
- <p><strong>For analysis:</strong> Add <strong>GEMINI_API_KEY</strong> from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p>
666
- <p><em>Demo mode available without API keys</em></p>
667
- </div>
668
- """
669
- elif not FREEPIK_API_KEY:
670
  demo_mode_notice = """
671
- <div style="background: #e3f2fd; border: 1px solid #2196f3; border-radius: 8px; padding: 15px; margin: 10px 0;">
672
- <h3>🍌 Enhanced Mode Available</h3>
673
- <p>Add <strong>FREEPIK_API_KEY</strong> for real Gemini 2.5 Flash image generation!</p>
674
- <p><em>Currently using Gemini analysis + demo mode</em></p>
 
675
  </div>
676
  """
677
 
@@ -711,35 +577,15 @@ with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.theme
711
  )
712
 
713
  with gr.Group():
714
- gr.Markdown("### πŸ€– AI Model & API Settings")
715
- model_selector = gr.Dropdown(
716
- choices=list(IMAGE_MODELS.keys()),
717
- value="Freepik Gemini 2.5 Flash",
718
- label="Image Generation Model",
719
- info="Choose your preferred AI model"
720
- )
721
- model_description = gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"])
722
 
723
- with gr.Accordion("πŸ”‘ API Keys (Optional - Use Your Own)", open=False):
724
  freepik_key = gr.Textbox(
725
  label="Freepik API Key",
726
  placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation",
727
- type="password"
728
- )
729
- openai_key = gr.Textbox(
730
- label="OpenAI API Key",
731
- placeholder="Enter your OpenAI API key for DALL-E 3",
732
- type="password"
733
- )
734
- stability_key = gr.Textbox(
735
- label="Stability AI API Key",
736
- placeholder="Enter your Stability AI key for Stable Diffusion XL",
737
- type="password"
738
- )
739
- gemini_key = gr.Textbox(
740
- label="Gemini API Key",
741
- placeholder="Enter your Gemini API key for analysis mode",
742
- type="password"
743
  )
744
 
745
  with gr.Group():
@@ -778,38 +624,32 @@ with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.theme
778
  with gr.Row():
779
  gr.Examples(
780
  examples=[
781
- ["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "Freepik Gemini 2.5 Flash", "", "", "", "", False, False],
782
- ["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "OpenAI DALL-E 3", "", "", "", "", True, False],
783
- ["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "Stable Diffusion XL", "", "", "", "", False, True],
784
- ["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "Freepik Gemini 2.5 Flash", "", "", "", "", False, False],
785
- ["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "Gemini Analysis + Demo", "", "", "", "", True, False],
786
- ["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "OpenAI DALL-E 3", "", "", "", "", False, False],
787
- ["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "Stable Diffusion XL", "", "", "", "", False, False],
788
- ["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "Freepik Gemini 2.5 Flash", "", "", "", "", False, True]
789
  ],
790
- inputs=[image_input, prompt_input, style_selector, editing_mode, model_selector, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice],
791
  label="🎯 Try These Examples"
792
  )
793
 
794
  gr.Markdown("""
795
- ### πŸ† Competition Features
796
- - **🍌 Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview for real image generation
797
- - **🎨 Word-Based Editing**: Transform images with natural language prompts
798
- - **🌟 Reality Blending**: Seamlessly fuse different visual elements
799
- - **⚑ Real-time Processing**: Fast image transformations and generation
800
  - **πŸ› οΈ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs)
801
- - **πŸ”„ Smart Fallbacks**: Multiple processing modes for reliability
802
  """)
803
 
804
- model_selector.change(
805
- fn=update_model_description,
806
- inputs=[model_selector],
807
- outputs=[model_description]
808
- )
809
-
810
  process_btn.click(
811
- fn=process_nano_banana_with_settings,
812
- inputs=[image_input, prompt_input, style_selector, editing_mode, model_selector, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice],
813
  outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
814
  )
815
 
 
30
  "name": "Freepik Gemini 2.5 Flash Image Preview",
31
  "api": "freepik",
32
  "url": "https://api.freepik.com/v1/ai/text-to-image",
33
+ "description": "🍌 <strong>Nano Banana Competition Model</strong> - Real Gemini 2.5 Flash generation<br/>⚠️ <em>Requires paid Freepik subscription + valid API key</em>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  }
36
 
 
100
 
101
  if api_type == "freepik":
102
  return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik"))
 
 
 
 
 
 
103
  else:
104
  return None, f"Unsupported API type: {api_type}"
105
 
 
199
  logger.error(f"Freepik generation failed: {e}")
200
  return None, f"Freepik error: {str(e)}"
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  def _build_enhanced_prompt(self, prompt, style, editing_mode):
204
  style_modifiers = {
 
284
 
285
  for attempt in range(API_RETRY_COUNT):
286
  try:
287
+ buffered = io.BytesIO()
288
  image.save(buffered, format='PNG', quality=85)
289
+ image_bytes = buffered.getvalue()
290
 
291
  if len(image_bytes) > 10 * 1024 * 1024:
292
  return image, "Image too large. Please use a smaller image."
 
294
  try:
295
  response = self.gemini_model.generate_content([
296
  analysis_prompt,
297
+ {
298
+ 'mime_type': 'image/png',
299
+ 'data': base64.b64encode(image_bytes).decode('utf-8')
300
+ }
301
+ ])
302
  except Exception as img_error:
303
  if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error):
304
  logger.warning("API key doesn't support image processing, using demo mode")
 
414
 
415
  app = NanoBananaApp()
416
 
 
 
417
 
418
+ def process_nano_banana_with_freepik(image, prompt, style, editing_mode, freepik_key, enable_detection, enable_voice):
419
  if not image:
420
  return None, None, None, None, "πŸ“· Please upload an image to get started", None
421
 
 
423
  return image, image, image, None, "πŸ’­ Please provide a transformation prompt", None
424
 
425
  user_api_keys = {
426
+ "freepik": freepik_key or FREEPIK_API_KEY
 
 
 
427
  }
428
 
429
  try:
 
433
  if enable_detection:
434
  detection_result, detection_msg = app.detect_structures_optional(image)
435
 
436
+ # Try Freepik first, fallback to demo mode if needed
437
  processed_image, process_msg = app.nano_banana_edit_with_model(
438
+ image, prompt, style, editing_mode, "Freepik Gemini 2.5 Flash", user_api_keys
439
  )
440
 
441
+ # If Freepik fails, fallback to Gemini demo mode
442
  if processed_image == image and "API key" in process_msg:
443
+ processed_image, process_msg = app._fallback_to_gemini_demo(image, prompt, style, editing_mode)
444
 
445
  comparison = app.create_comparison(image, processed_image)
446
 
 
448
  voice_msg = ""
449
  if enable_voice:
450
  if processed_image != image:
451
+ voice_text = f"Using Gemini 2.5 Flash, the AI processed this construction image with {editing_mode} mode and {style} style. Request: {prompt}"
452
  audio = app.generate_voice_optional(voice_text)
453
  voice_msg = "πŸ”Š Voice generated" if audio else "πŸ”‡ Voice unavailable"
454
  else:
 
531
  """
532
 
533
  demo_mode_notice = ""
534
+ if not FREEPIK_API_KEY:
 
 
 
 
 
 
 
 
 
535
  demo_mode_notice = """
536
+ <div style="background: #fff3e0; border: 1px solid #ff9800; border-radius: 8px; padding: 15px; margin: 10px 0;">
537
+ <h3>🍌 Nano Banana - Competition Mode</h3>
538
+ <p><strong>For real image generation:</strong> Add your <strong>FREEPIK_API_KEY</strong> in the API settings below</p>
539
+ <p>Get your key from: <a href="https://www.freepik.com/api" target="_blank">Freepik API Portal</a></p>
540
+ <p><em>Demo mode with visual overlay available without API key</em></p>
541
  </div>
542
  """
543
 
 
577
  )
578
 
579
  with gr.Group():
580
+ gr.Markdown("### 🍌 Nano Banana - Gemini 2.5 Flash")
581
+ gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"])
 
 
 
 
 
 
582
 
583
+ with gr.Accordion("πŸ”‘ Freepik API Key (Required for Image Generation)", open=True):
584
  freepik_key = gr.Textbox(
585
  label="Freepik API Key",
586
  placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation",
587
+ type="password",
588
+ info="Get your key from: https://www.freepik.com/api"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  )
590
 
591
  with gr.Group():
 
624
  with gr.Row():
625
  gr.Examples(
626
  examples=[
627
+ ["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "", False, False],
628
+ ["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "", True, False],
629
+ ["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "", False, True],
630
+ ["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "", False, False],
631
+ ["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "", True, False],
632
+ ["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "", False, False],
633
+ ["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "", False, False],
634
+ ["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "", False, True]
635
  ],
636
+ inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice],
637
  label="🎯 Try These Examples"
638
  )
639
 
640
  gr.Markdown("""
641
+ ### πŸ† Competition Features - Gemini 2.5 Flash
642
+ - **🍌 Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview - the official competition model
643
+ - **🎨 Word-Based Editing**: Transform construction images with natural language prompts
644
+ - **🌟 Reality Blending**: Complete unfinished buildings, edit existing structures, blend architectural elements
645
+ - **⚑ Real-time Processing**: Fast image transformations powered by Gemini 2.5 Flash
646
  - **πŸ› οΈ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs)
647
+ - **πŸ”„ Smart Fallbacks**: Demo mode with visual overlays when API key not available
648
  """)
649
 
 
 
 
 
 
 
650
  process_btn.click(
651
+ fn=process_nano_banana_with_freepik,
652
+ inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice],
653
  outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
654
  )
655