Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import google.generativeai as genai | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont | |
| import os | |
| import base64 | |
| import io | |
| import logging | |
| import time | |
| from typing import Optional, Tuple | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") | |
| MAX_IMAGE_SIZE = 1024 | |
| RATE_LIMIT_DELAY = 3 | |
| API_RETRY_COUNT = 3 | |
| if GEMINI_API_KEY: | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| logger.info("Gemini API configured") | |
| else: | |
| logger.warning("GEMINI_API_KEY not found - using demo mode") | |
| try: | |
| from elevenlabs import generate, set_api_key | |
| if ELEVENLABS_API_KEY: | |
| set_api_key(ELEVENLABS_API_KEY) | |
| logger.info("ElevenLabs configured") | |
| else: | |
| logger.info("ElevenLabs not configured - optional feature") | |
| except ImportError: | |
| logger.info("ElevenLabs not available - optional feature") | |
| try: | |
| from ultralytics import YOLO | |
| yolo_available = True | |
| except ImportError: | |
| yolo_available = False | |
| logger.info("YOLO not available - optional feature") | |
| class NanoBananaApp: | |
| def __init__(self): | |
| self.gemini_model = None | |
| self.yolo_model = None | |
| self._initialize_gemini() | |
| def _initialize_gemini(self): | |
| if not GEMINI_API_KEY: | |
| logger.warning("No API key - demo mode") | |
| return | |
| try: | |
| self.gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp') | |
| logger.info("Nano Banana (Gemini 2.5 Flash Image) initialized") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize Gemini: {e}") | |
| def _resize_image_if_needed(self, image): | |
| if image.width > MAX_IMAGE_SIZE or image.height > MAX_IMAGE_SIZE: | |
| ratio = min(MAX_IMAGE_SIZE / image.width, MAX_IMAGE_SIZE / image.height) | |
| new_size = (int(image.width * ratio), int(image.height * ratio)) | |
| return image.resize(new_size, Image.Resampling.LANCZOS) | |
| return image | |
| def _apply_rate_limiting(self): | |
| time.sleep(RATE_LIMIT_DELAY) | |
| def load_yolo_optional(self): | |
| if not yolo_available: | |
| return False | |
| try: | |
| model_path = 'best.pt' if os.path.exists('best.pt') else 'yolov11n.pt' | |
| self.yolo_model = YOLO(model_path) | |
| return True | |
| except Exception as e: | |
| logger.warning(f"YOLO loading failed: {e}") | |
| return False | |
| def detect_structures_optional(self, image): | |
| if not self.yolo_model and not self.load_yolo_optional(): | |
| return image, "Structure detection unavailable (optional feature)" | |
| try: | |
| img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| results = self.yolo_model(img_cv) | |
| annotated_img = results[0].plot() | |
| annotated_pil = Image.fromarray(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)) | |
| return annotated_pil, "Structures detected" | |
| except Exception as e: | |
| return image, f"Detection failed: {str(e)}" | |
| def nano_banana_edit(self, image, prompt, style="realistic", editing_mode="complete"): | |
| if not self.gemini_model: | |
| if not GEMINI_API_KEY: | |
| return image, "π API key required for Nano Banana. Add GEMINI_API_KEY to use this feature." | |
| return image, "Gemini Nano Banana not available" | |
| if not prompt.strip(): | |
| return image, "Please provide a transformation prompt" | |
| try: | |
| image = self._resize_image_if_needed(image) | |
| self._apply_rate_limiting() | |
| if editing_mode == "complete": | |
| base_prompt = self._get_completion_prompt(style) | |
| full_prompt = f"{base_prompt} {prompt}" | |
| elif editing_mode == "edit": | |
| full_prompt = f"Edit this image: {prompt}. Make the changes look natural and maintain image quality." | |
| elif editing_mode == "blend": | |
| full_prompt = f"Blend and transform this image: {prompt}. Create a seamless fusion of elements." | |
| else: | |
| full_prompt = prompt | |
| for attempt in range(API_RETRY_COUNT): | |
| try: | |
| buffered = io.BytesIO() | |
| image.save(buffered, format='PNG', quality=85) | |
| image_bytes = buffered.getvalue() | |
| if len(image_bytes) > 10 * 1024 * 1024: | |
| return image, "Image too large. Please use a smaller image." | |
| response = self.gemini_model.generate_content([ | |
| full_prompt, | |
| { | |
| 'mime_type': 'image/png', | |
| 'data': base64.b64encode(image_bytes).decode('utf-8') | |
| } | |
| ]) | |
| if hasattr(response, 'candidates') and response.candidates: | |
| for part in response.candidates[0].content.parts: | |
| if hasattr(part, 'inline_data') and part.inline_data: | |
| if hasattr(part.inline_data, 'data'): | |
| image_data = base64.b64decode(part.inline_data.data) | |
| result_image = Image.open(io.BytesIO(image_data)).convert('RGB') | |
| return result_image, f"β¨ Nano Banana: {editing_mode} mode with {style} style" | |
| if attempt < API_RETRY_COUNT - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return image, "No image generated - please try a different prompt" | |
| except Exception as retry_error: | |
| if attempt < API_RETRY_COUNT - 1: | |
| logger.warning(f"Attempt {attempt + 1} failed: {retry_error}") | |
| time.sleep(2 ** attempt) | |
| continue | |
| raise retry_error | |
| except Exception as e: | |
| logger.error(f"Nano Banana error: {e}") | |
| if "quota" in str(e).lower() or "limit" in str(e).lower(): | |
| return image, "β±οΈ API rate limit reached. Please try again in a few minutes." | |
| return image, f"Processing failed: {str(e)}" | |
| def _get_completion_prompt(self, style): | |
| prompts = { | |
| "realistic": "Complete this unfinished construction realistically with proper materials and architectural details.", | |
| "futuristic": "Transform this construction into a futuristic high-tech building with modern elements.", | |
| "artistic": "Complete this construction with creative artistic elements and unique design features." | |
| } | |
| return prompts.get(style, prompts["realistic"]) | |
| def generate_voice_optional(self, text): | |
| if not ELEVENLABS_API_KEY: | |
| return None | |
| try: | |
| audio = generate(text=text, voice="Rachel", model="eleven_monolingual_v1") | |
| return audio | |
| except Exception as e: | |
| logger.warning(f"Voice generation failed: {e}") | |
| return None | |
| def create_comparison(self, original, processed): | |
| if not original or not processed: | |
| return None | |
| try: | |
| height = min(original.height, processed.height, 512) | |
| width = min(original.width, processed.width, 512) | |
| orig_resized = original.resize((width, height), Image.Resampling.LANCZOS) | |
| proc_resized = processed.resize((width, height), Image.Resampling.LANCZOS) | |
| comparison = Image.new('RGB', (width * 2 + 20, height + 40), 'white') | |
| comparison.paste(orig_resized, (0, 20)) | |
| comparison.paste(proc_resized, (width + 20, 20)) | |
| draw = ImageDraw.Draw(comparison) | |
| try: | |
| font = ImageFont.load_default() | |
| draw.text((width//2 - 30, 5), "BEFORE", fill='black', font=font) | |
| draw.text((width + 20 + width//2 - 30, 5), "AFTER", fill='black', font=font) | |
| except: | |
| pass | |
| return comparison | |
| except Exception as e: | |
| logger.warning(f"Comparison creation failed: {e}") | |
| return None | |
| app = NanoBananaApp() | |
| def process_nano_banana(image, prompt, style, editing_mode, enable_detection, enable_voice): | |
| if not image: | |
| return None, None, None, None, "π· Please upload an image to get started", None | |
| if not prompt or not prompt.strip(): | |
| return image, image, image, None, "π Please provide a transformation prompt", None | |
| try: | |
| detection_result = image | |
| detection_msg = "Detection disabled" | |
| if enable_detection: | |
| detection_result, detection_msg = app.detect_structures_optional(image) | |
| processed_image, process_msg = app.nano_banana_edit(image, prompt, style, editing_mode) | |
| if processed_image == image and "API key required" in process_msg: | |
| return image, detection_result, image, None, f"π {process_msg}", None | |
| comparison = app.create_comparison(image, processed_image) | |
| audio = None | |
| voice_msg = "" | |
| if enable_voice: | |
| if processed_image != image: | |
| voice_text = f"Image transformed using Nano Banana with {editing_mode} mode and {style} style. {prompt}" | |
| audio = app.generate_voice_optional(voice_text) | |
| voice_msg = "π Voice generated" if audio else "π Voice unavailable" | |
| else: | |
| voice_msg = "π Voice skipped (no changes)" | |
| status_parts = [f"π {process_msg}"] | |
| if enable_detection: | |
| status_parts.append(f"π Detection: {detection_msg}") | |
| if enable_voice: | |
| status_parts.append(f"π΅ Voice: {voice_msg}") | |
| status = "\n".join(status_parts) | |
| return image, detection_result, processed_image, comparison, status, audio | |
| except Exception as e: | |
| logger.error(f"Processing error: {e}") | |
| return image, image, image, None, f"β Unexpected error: {str(e)}", None | |
| custom_css = """ | |
| .nano-banner { | |
| background: linear-gradient(45deg, #ff6b6b, #feca57, #48dbfb, #ff9ff3); | |
| background-size: 400% 400%; | |
| animation: gradient 15s ease infinite; | |
| padding: 20px; | |
| border-radius: 10px; | |
| text-align: center; | |
| margin-bottom: 20px; | |
| } | |
| @keyframes gradient { | |
| 0% { background-position: 0% 50%; } | |
| 50% { background-position: 100% 50%; } | |
| 100% { background-position: 0% 50%; } | |
| } | |
| .feature-highlight { | |
| border: 2px solid #4CAF50; | |
| border-radius: 8px; | |
| padding: 15px; | |
| margin: 10px 0; | |
| } | |
| """ | |
| demo_mode_notice = "" | |
| if not GEMINI_API_KEY: | |
| demo_mode_notice = """ | |
| <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;"> | |
| <h3>π API Key Required</h3> | |
| <p>To use Nano Banana features, add your <strong>GEMINI_API_KEY</strong> in the Space settings.</p> | |
| <p>Get your free API key from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p> | |
| </div> | |
| """ | |
| with gr.Blocks(title="π Nano Banana - Dynamic Image Creation", theme=gr.themes.Soft(), css=custom_css) as demo: | |
| gr.HTML(f""" | |
| <div class="nano-banner"> | |
| <h1>π Nano Banana: Dynamic Image Creation</h1> | |
| <p><strong>Powered by Gemini 2.5 Flash Image Preview</strong></p> | |
| <p>Edit with words β’ Blend realities β’ Transform visuals</p> | |
| </div> | |
| {demo_mode_notice} | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### π¨ Core Nano Banana Features") | |
| image_input = gr.Image(label="Upload Image", type="pil", height=300) | |
| prompt_input = gr.Textbox( | |
| label="Transformation Prompt", | |
| placeholder="Describe how you want to transform this image...", | |
| lines=3 | |
| ) | |
| editing_mode = gr.Radio( | |
| choices=["complete", "edit", "blend"], | |
| value="edit", | |
| label="Nano Banana Mode", | |
| info="Complete: Finish construction β’ Edit: Modify image β’ Blend: Fuse elements" | |
| ) | |
| style_selector = gr.Radio( | |
| choices=["realistic", "futuristic", "artistic"], | |
| value="realistic", | |
| label="Style", | |
| info="Choose the aesthetic approach" | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### βοΈ Optional Features") | |
| enable_detection = gr.Checkbox( | |
| label="π Structure Detection (YOLO)", | |
| value=False, | |
| info="Optional: Detect and highlight structures" | |
| ) | |
| enable_voice = gr.Checkbox( | |
| label="π Voice Narration (ElevenLabs)", | |
| value=False, | |
| info="Optional: Generate audio description" | |
| ) | |
| process_btn = gr.Button("π Transform with Nano Banana", variant="primary", size="lg") | |
| status_output = gr.Textbox(label="Status", interactive=False, lines=4) | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.Tab("π· Original"): | |
| original_output = gr.Image(label="Original Image", height=400) | |
| with gr.Tab("π Detection (Optional)"): | |
| detection_output = gr.Image(label="Structure Detection", height=400) | |
| with gr.Tab("π Nano Banana Result"): | |
| result_output = gr.Image(label="Transformed Image", height=400, elem_classes=["feature-highlight"]) | |
| with gr.Tab("π Before/After"): | |
| comparison_output = gr.Image(label="Comparison View", height=400) | |
| with gr.Row(): | |
| audio_output = gr.Audio(label="π Voice Description (Optional)", visible=True) | |
| with gr.Row(): | |
| gr.Examples( | |
| examples=[ | |
| ["samples_imagen/skyscraper_construction.jpg", "Complete this modern skyscraper with glass facades", "futuristic", "complete", True, False], | |
| ["samples_imagen/suspension_bridge.jpg", "Add a golden sunset reflection on the bridge", "artistic", "edit", False, True], | |
| ["samples_imagen/highway_construction.jpg", "Transform into a smart highway with digital elements", "futuristic", "blend", True, False], | |
| ["samples_imagen/residential_construction.jpg", "Complete as a sustainable eco-friendly home", "realistic", "complete", False, False] | |
| ], | |
| inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice], | |
| label="π― Try These Examples" | |
| ) | |
| gr.Markdown(""" | |
| ### π Competition Features | |
| - **Nano Banana Core**: Gemini 2.5 Flash Image for dynamic creation | |
| - **Word-Based Editing**: Transform images with natural language | |
| - **Reality Blending**: Seamlessly fuse different visual elements | |
| - **Optional Enhancements**: Structure detection and voice narration | |
| - **Real-time Processing**: Fast image transformations | |
| """) | |
| process_btn.click( | |
| fn=process_nano_banana, | |
| inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice], | |
| outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True | |
| ) |