import gradio as gr import google.generativeai as genai import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont import os import base64 import io import logging import time from typing import Optional, Tuple import warnings warnings.filterwarnings("ignore") logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") MAX_IMAGE_SIZE = 1024 RATE_LIMIT_DELAY = 3 API_RETRY_COUNT = 3 if GEMINI_API_KEY: genai.configure(api_key=GEMINI_API_KEY) logger.info("Gemini API configured") else: logger.warning("GEMINI_API_KEY not found - using demo mode") try: from elevenlabs import generate, set_api_key if ELEVENLABS_API_KEY: set_api_key(ELEVENLABS_API_KEY) logger.info("ElevenLabs configured") else: logger.info("ElevenLabs not configured - optional feature") except ImportError: logger.info("ElevenLabs not available - optional feature") try: from ultralytics import YOLO yolo_available = True except ImportError: yolo_available = False logger.info("YOLO not available - optional feature") class NanoBananaApp: def __init__(self): self.gemini_model = None self.yolo_model = None self._initialize_gemini() def _initialize_gemini(self): if not GEMINI_API_KEY: logger.warning("No API key - demo mode") return try: self.gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp') logger.info("Nano Banana (Gemini 2.5 Flash Image) initialized") except Exception as e: logger.error(f"Failed to initialize Gemini: {e}") def _resize_image_if_needed(self, image): if image.width > MAX_IMAGE_SIZE or image.height > MAX_IMAGE_SIZE: ratio = min(MAX_IMAGE_SIZE / image.width, MAX_IMAGE_SIZE / image.height) new_size = (int(image.width * ratio), int(image.height * ratio)) return image.resize(new_size, Image.Resampling.LANCZOS) return image def _apply_rate_limiting(self): time.sleep(RATE_LIMIT_DELAY) def load_yolo_optional(self): if not yolo_available: return False try: model_path = 'best.pt' if os.path.exists('best.pt') else 'yolov11n.pt' self.yolo_model = YOLO(model_path) return True except Exception as e: logger.warning(f"YOLO loading failed: {e}") return False def detect_structures_optional(self, image): if not self.yolo_model and not self.load_yolo_optional(): return image, "Structure detection unavailable (optional feature)" try: img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) results = self.yolo_model(img_cv) annotated_img = results[0].plot() annotated_pil = Image.fromarray(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)) return annotated_pil, "Structures detected" except Exception as e: return image, f"Detection failed: {str(e)}" def nano_banana_edit(self, image, prompt, style="realistic", editing_mode="complete"): if not self.gemini_model: if not GEMINI_API_KEY: return image, "🔑 API key required for Nano Banana. Add GEMINI_API_KEY to use this feature." return image, "Gemini Nano Banana not available" if not prompt.strip(): return image, "Please provide a transformation prompt" try: image = self._resize_image_if_needed(image) self._apply_rate_limiting() if editing_mode == "complete": base_prompt = self._get_completion_prompt(style) full_prompt = f"{base_prompt} {prompt}" elif editing_mode == "edit": full_prompt = f"Edit this image: {prompt}. Make the changes look natural and maintain image quality." elif editing_mode == "blend": full_prompt = f"Blend and transform this image: {prompt}. Create a seamless fusion of elements." else: full_prompt = prompt for attempt in range(API_RETRY_COUNT): try: buffered = io.BytesIO() image.save(buffered, format='PNG', quality=85) image_bytes = buffered.getvalue() if len(image_bytes) > 10 * 1024 * 1024: return image, "Image too large. Please use a smaller image." response = self.gemini_model.generate_content([ full_prompt, { 'mime_type': 'image/png', 'data': base64.b64encode(image_bytes).decode('utf-8') } ]) if hasattr(response, 'candidates') and response.candidates: for part in response.candidates[0].content.parts: if hasattr(part, 'inline_data') and part.inline_data: if hasattr(part.inline_data, 'data'): image_data = base64.b64decode(part.inline_data.data) result_image = Image.open(io.BytesIO(image_data)).convert('RGB') return result_image, f"✨ Nano Banana: {editing_mode} mode with {style} style" if attempt < API_RETRY_COUNT - 1: time.sleep(2 ** attempt) continue return image, "No image generated - please try a different prompt" except Exception as retry_error: if attempt < API_RETRY_COUNT - 1: logger.warning(f"Attempt {attempt + 1} failed: {retry_error}") time.sleep(2 ** attempt) continue raise retry_error except Exception as e: logger.error(f"Nano Banana error: {e}") if "quota" in str(e).lower() or "limit" in str(e).lower(): return image, "⏱️ API rate limit reached. Please try again in a few minutes." return image, f"Processing failed: {str(e)}" def _get_completion_prompt(self, style): prompts = { "realistic": "Complete this unfinished construction realistically with proper materials and architectural details.", "futuristic": "Transform this construction into a futuristic high-tech building with modern elements.", "artistic": "Complete this construction with creative artistic elements and unique design features." } return prompts.get(style, prompts["realistic"]) def generate_voice_optional(self, text): if not ELEVENLABS_API_KEY: return None try: audio = generate(text=text, voice="Rachel", model="eleven_monolingual_v1") return audio except Exception as e: logger.warning(f"Voice generation failed: {e}") return None def create_comparison(self, original, processed): if not original or not processed: return None try: height = min(original.height, processed.height, 512) width = min(original.width, processed.width, 512) orig_resized = original.resize((width, height), Image.Resampling.LANCZOS) proc_resized = processed.resize((width, height), Image.Resampling.LANCZOS) comparison = Image.new('RGB', (width * 2 + 20, height + 40), 'white') comparison.paste(orig_resized, (0, 20)) comparison.paste(proc_resized, (width + 20, 20)) draw = ImageDraw.Draw(comparison) try: font = ImageFont.load_default() draw.text((width//2 - 30, 5), "BEFORE", fill='black', font=font) draw.text((width + 20 + width//2 - 30, 5), "AFTER", fill='black', font=font) except: pass return comparison except Exception as e: logger.warning(f"Comparison creation failed: {e}") return None app = NanoBananaApp() def process_nano_banana(image, prompt, style, editing_mode, enable_detection, enable_voice): if not image: return None, None, None, None, "📷 Please upload an image to get started", None if not prompt or not prompt.strip(): return image, image, image, None, "💭 Please provide a transformation prompt", None try: detection_result = image detection_msg = "Detection disabled" if enable_detection: detection_result, detection_msg = app.detect_structures_optional(image) processed_image, process_msg = app.nano_banana_edit(image, prompt, style, editing_mode) if processed_image == image and "API key required" in process_msg: return image, detection_result, image, None, f"🔑 {process_msg}", None comparison = app.create_comparison(image, processed_image) audio = None voice_msg = "" if enable_voice: if processed_image != image: voice_text = f"Image transformed using Nano Banana with {editing_mode} mode and {style} style. {prompt}" audio = app.generate_voice_optional(voice_text) voice_msg = "🔊 Voice generated" if audio else "🔇 Voice unavailable" else: voice_msg = "🔇 Voice skipped (no changes)" status_parts = [f"🍌 {process_msg}"] if enable_detection: status_parts.append(f"📍 Detection: {detection_msg}") if enable_voice: status_parts.append(f"🎵 Voice: {voice_msg}") status = "\n".join(status_parts) return image, detection_result, processed_image, comparison, status, audio except Exception as e: logger.error(f"Processing error: {e}") return image, image, image, None, f"❌ Unexpected error: {str(e)}", None custom_css = """ .nano-banner { background: linear-gradient(45deg, #ff6b6b, #feca57, #48dbfb, #ff9ff3); background-size: 400% 400%; animation: gradient 15s ease infinite; padding: 20px; border-radius: 10px; text-align: center; margin-bottom: 20px; } @keyframes gradient { 0% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } 100% { background-position: 0% 50%; } } .feature-highlight { border: 2px solid #4CAF50; border-radius: 8px; padding: 15px; margin: 10px 0; } """ demo_mode_notice = "" if not GEMINI_API_KEY: demo_mode_notice = """

🔑 API Key Required

To use Nano Banana features, add your GEMINI_API_KEY in the Space settings.

Get your free API key from Google AI Studio

""" with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.themes.Soft(), css=custom_css) as demo: gr.HTML(f"""

🍌 Nano Banana: Dynamic Image Creation

Powered by Gemini 2.5 Flash Image Preview

Edit with words • Blend realities • Transform visuals

{demo_mode_notice} """) with gr.Row(): with gr.Column(scale=1): with gr.Group(): gr.Markdown("### 🎨 Core Nano Banana Features") image_input = gr.Image(label="Upload Image", type="pil", height=300) prompt_input = gr.Textbox( label="Transformation Prompt", placeholder="Describe how you want to transform this image...", lines=3 ) editing_mode = gr.Radio( choices=["complete", "edit", "blend"], value="edit", label="Nano Banana Mode", info="Complete: Finish construction • Edit: Modify image • Blend: Fuse elements" ) style_selector = gr.Radio( choices=["realistic", "futuristic", "artistic"], value="realistic", label="Style", info="Choose the aesthetic approach" ) with gr.Group(): gr.Markdown("### ⚙️ Optional Features") enable_detection = gr.Checkbox( label="🔍 Structure Detection (YOLO)", value=False, info="Optional: Detect and highlight structures" ) enable_voice = gr.Checkbox( label="🔊 Voice Narration (ElevenLabs)", value=False, info="Optional: Generate audio description" ) process_btn = gr.Button("🚀 Transform with Nano Banana", variant="primary", size="lg") status_output = gr.Textbox(label="Status", interactive=False, lines=4) with gr.Column(scale=2): with gr.Tabs(): with gr.Tab("📷 Original"): original_output = gr.Image(label="Original Image", height=400) with gr.Tab("🔍 Detection (Optional)"): detection_output = gr.Image(label="Structure Detection", height=400) with gr.Tab("🍌 Nano Banana Result"): result_output = gr.Image(label="Transformed Image", height=400, elem_classes=["feature-highlight"]) with gr.Tab("📊 Before/After"): comparison_output = gr.Image(label="Comparison View", height=400) with gr.Row(): audio_output = gr.Audio(label="🔊 Voice Description (Optional)", visible=True) with gr.Row(): gr.Examples( examples=[ ["samples_imagen/skyscraper_construction.jpg", "Complete this modern skyscraper with glass facades", "futuristic", "complete", True, False], ["samples_imagen/suspension_bridge.jpg", "Add a golden sunset reflection on the bridge", "artistic", "edit", False, True], ["samples_imagen/highway_construction.jpg", "Transform into a smart highway with digital elements", "futuristic", "blend", True, False], ["samples_imagen/residential_construction.jpg", "Complete as a sustainable eco-friendly home", "realistic", "complete", False, False] ], inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice], label="🎯 Try These Examples" ) gr.Markdown(""" ### 🏆 Competition Features - **Nano Banana Core**: Gemini 2.5 Flash Image for dynamic creation - **Word-Based Editing**: Transform images with natural language - **Reality Blending**: Seamlessly fuse different visual elements - **Optional Enhancements**: Structure detection and voice narration - **Real-time Processing**: Fast image transformations """) process_btn.click( fn=process_nano_banana, inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice], outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output] ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=True )