Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import google.generativeai as genai | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont | |
| import os | |
| import base64 | |
| import io | |
| import logging | |
| import time | |
| from typing import Optional, Tuple, Dict, List | |
| import warnings | |
| import requests | |
| import json | |
| import random | |
| from functools import wraps | |
| warnings.filterwarnings("ignore") | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(), | |
| logging.FileHandler('nano_banana.log') | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def validate_api_key(api_key, api_name): | |
| if not api_key: | |
| return False, f"{api_name} API key not provided" | |
| if len(api_key.strip()) < 10: | |
| return False, f"{api_name} API key appears to be too short" | |
| if api_key.startswith(' ') or api_key.endswith(' '): | |
| return False, f"{api_name} API key has leading/trailing whitespace" | |
| return True, f"{api_name} API key format validated" | |
| def get_secure_api_key(env_var_name, api_name): | |
| api_key = os.getenv(env_var_name) | |
| if api_key: | |
| is_valid, message = validate_api_key(api_key, api_name) | |
| if is_valid: | |
| logger.info(f"{api_name} API key loaded and validated") | |
| return api_key.strip() | |
| else: | |
| logger.warning(f"{api_name} API key validation failed: {message}") | |
| return None | |
| return None | |
| GEMINI_API_KEY = get_secure_api_key("GEMINI_API_KEY", "Gemini") | |
| ELEVENLABS_API_KEY = get_secure_api_key("ELEVENLABS_API_KEY", "ElevenLabs") | |
| FREEPIK_API_KEY = get_secure_api_key("FREEPIK_API_KEY", "Freepik") | |
| MAX_IMAGE_SIZE = 1024 | |
| RATE_LIMIT_DELAY = 3 | |
| API_RETRY_COUNT = 5 | |
| MAX_BACKOFF_TIME = 60 | |
| BASE_BACKOFF = 1 | |
| IMAGE_MODELS = { | |
| "Freepik Gemini 2.5 Flash": { | |
| "name": "Freepik Gemini 2.5 Flash Image Preview", | |
| "api": "freepik", | |
| "url": "https://api.freepik.com/v1/ai/text-to-image", | |
| "description": "π <strong>Nano Banana Competition Model</strong> - Real Gemini 2.5 Flash generation<br/>β οΈ <em>Requires paid Freepik subscription + valid API key</em>" | |
| } | |
| } | |
| if GEMINI_API_KEY: | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| logger.info("Gemini API configured") | |
| else: | |
| logger.warning("GEMINI_API_KEY not found - using demo mode") | |
| try: | |
| from elevenlabs import generate, set_api_key | |
| if ELEVENLABS_API_KEY: | |
| set_api_key(ELEVENLABS_API_KEY) | |
| logger.info("ElevenLabs configured") | |
| elevenlabs_available = True | |
| else: | |
| logger.info("ElevenLabs not configured - optional feature") | |
| elevenlabs_available = False | |
| except ImportError: | |
| logger.info("ElevenLabs not available - optional feature") | |
| elevenlabs_available = False | |
| try: | |
| from ultralytics import YOLO | |
| yolo_available = True | |
| except ImportError: | |
| yolo_available = False | |
| logger.info("YOLO not available - optional feature") | |
| def retry_with_backoff(max_retries=API_RETRY_COUNT, base_delay=BASE_BACKOFF): | |
| def decorator(func): | |
| def wrapper(*args, **kwargs): | |
| last_exception = None | |
| for attempt in range(max_retries): | |
| try: | |
| return func(*args, **kwargs) | |
| except requests.exceptions.RequestException as e: | |
| last_exception = e | |
| if hasattr(e, 'response') and e.response is not None: | |
| status_code = e.response.status_code | |
| if status_code == 429: | |
| wait_time = min(base_delay * (2 ** attempt) + random.uniform(0, 1), MAX_BACKOFF_TIME) | |
| logger.warning(f"Rate limit hit, retrying in {wait_time:.2f}s (attempt {attempt + 1}/{max_retries})") | |
| time.sleep(wait_time) | |
| continue | |
| elif status_code in [500, 502, 503, 504]: | |
| wait_time = min(base_delay * (2 ** attempt), MAX_BACKOFF_TIME) | |
| logger.warning(f"Server error {status_code}, retrying in {wait_time:.2f}s (attempt {attempt + 1}/{max_retries})") | |
| time.sleep(wait_time) | |
| continue | |
| else: | |
| logger.error(f"Non-retryable error: {status_code}") | |
| break | |
| else: | |
| wait_time = min(base_delay * (2 ** attempt), MAX_BACKOFF_TIME) | |
| logger.warning(f"Network error, retrying in {wait_time:.2f}s (attempt {attempt + 1}/{max_retries})") | |
| time.sleep(wait_time) | |
| except Exception as e: | |
| last_exception = e | |
| logger.error(f"Unexpected error on attempt {attempt + 1}: {e}") | |
| break | |
| if last_exception: | |
| raise last_exception | |
| return None | |
| return wrapper | |
| return decorator | |
| class NanoBananaApp: | |
| def __init__(self): | |
| self.gemini_model = None | |
| self.yolo_model = None | |
| self.session = self._create_session() | |
| self._initialize_gemini() | |
| def _create_session(self): | |
| session = requests.Session() | |
| session.headers.update({ | |
| 'User-Agent': 'NanoBanana/1.0', | |
| 'Accept': 'application/json', | |
| 'Content-Type': 'application/json' | |
| }) | |
| adapter = requests.adapters.HTTPAdapter( | |
| max_retries=requests.adapters.Retry( | |
| total=3, | |
| backoff_factor=0.3, | |
| status_forcelist=[500, 502, 503, 504] | |
| ) | |
| ) | |
| session.mount('http://', adapter) | |
| session.mount('https://', adapter) | |
| return session | |
| def _log_api_response(self, response, api_name="API"): | |
| logger.info(f"{api_name} Response: {response.status_code}") | |
| if response.status_code != 200: | |
| logger.error(f"{api_name} Error: {response.status_code} - {response.text[:500]}") | |
| else: | |
| logger.info(f"{api_name} Success: Request completed") | |
| def _validate_api_response(self, response, expected_keys=None): | |
| if not response: | |
| return False, "Empty response" | |
| if expected_keys: | |
| missing_keys = [key for key in expected_keys if key not in response] | |
| if missing_keys: | |
| return False, f"Missing keys: {missing_keys}" | |
| return True, "Valid response" | |
| def _initialize_gemini(self): | |
| if not GEMINI_API_KEY: | |
| logger.warning("No API key - demo mode") | |
| return | |
| try: | |
| self.gemini_model = genai.GenerativeModel('gemini-2.5-flash') | |
| logger.info("Nano Banana (Gemini 1.5 Flash) initialized") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize Gemini: {e}") | |
| try: | |
| self.gemini_model = genai.GenerativeModel('gemini-2.5-pro') | |
| logger.info("Fallback: Gemini 1.5 Pro initialized") | |
| except Exception as e2: | |
| logger.error(f"Fallback also failed: {e2}") | |
| def _resize_image_if_needed(self, image): | |
| if image.width > MAX_IMAGE_SIZE or image.height > MAX_IMAGE_SIZE: | |
| ratio = min(MAX_IMAGE_SIZE / image.width, MAX_IMAGE_SIZE / image.height) | |
| new_size = (int(image.width * ratio), int(image.height * ratio)) | |
| return image.resize(new_size, Image.Resampling.LANCZOS) | |
| return image | |
| def _apply_rate_limiting(self): | |
| time.sleep(RATE_LIMIT_DELAY) | |
| def _generate_with_model(self, image, prompt, style, editing_mode, model_name, api_keys): | |
| model_info = IMAGE_MODELS.get(model_name) | |
| if not model_info: | |
| return None, f"Unknown model: {model_name}" | |
| api_type = model_info["api"] | |
| if api_type == "freepik": | |
| return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik")) | |
| else: | |
| return None, f"Unsupported API type: {api_type}" | |
| def _generate_with_freepik(self, image, prompt, style, editing_mode, api_key): | |
| if not api_key: | |
| return None, "Freepik API key not provided" | |
| try: | |
| buffered = io.BytesIO() | |
| image.save(buffered, format='PNG', optimize=True, quality=95) | |
| image_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8') | |
| full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode) | |
| logger.info(f"Enhanced prompt (first 200 chars): {full_prompt[:200]}...") | |
| url = "https://api.freepik.com/v1/ai/text-to-image" | |
| payload = { | |
| "prompt": full_prompt, | |
| "num_images": 1, | |
| "image": { | |
| "size": "1024x1024" | |
| }, | |
| "style": style, | |
| "mode": editing_mode | |
| } | |
| self.session.headers.update({ | |
| "x-freepik-api-key": api_key | |
| }) | |
| response = self.session.post(url, json=payload, timeout=60) | |
| self._log_api_response(response, "Freepik") | |
| if response.status_code == 200: | |
| result = response.json() | |
| logger.info(f"Freepik response data keys: {list(result.keys())}") | |
| is_valid, validation_msg = self._validate_api_response(result, ['data']) | |
| if not is_valid: | |
| logger.warning(f"Response validation failed: {validation_msg}") | |
| if 'data' in result and len(result['data']) > 0: | |
| data_item = result['data'][0] | |
| # Check if we have base64 data directly | |
| if 'base64' in data_item: | |
| try: | |
| image_data = base64.b64decode(data_item['base64']) | |
| generated_image = Image.open(io.BytesIO(image_data)) | |
| return generated_image, "π Generated with Freepik Gemini 2.5 Flash" | |
| except Exception as e: | |
| logger.error(f"Error decoding base64 image: {e}") | |
| return None, f"Error processing generated image: {str(e)}" | |
| # Check for image URL | |
| elif 'url' in data_item: | |
| image_url = data_item['url'] | |
| elif 'image_url' in data_item: | |
| image_url = data_item['image_url'] | |
| else: | |
| return None, "Freepik API: No image data or URL found in response" | |
| # If we have a URL, download it | |
| if 'image_url' in locals(): | |
| img_response = requests.get(image_url, timeout=30) | |
| if img_response.status_code == 200: | |
| generated_image = Image.open(io.BytesIO(img_response.content)) | |
| return generated_image, "π Generated with Freepik API" | |
| else: | |
| return None, f"Failed to download image: {img_response.status_code}" | |
| elif 'image_url' in result: | |
| image_url = result['image_url'] | |
| img_response = requests.get(image_url, timeout=30) | |
| if img_response.status_code == 200: | |
| generated_image = Image.open(io.BytesIO(img_response.content)) | |
| return generated_image, "π Generated with Freepik API" | |
| elif 'url' in result: | |
| image_url = result['url'] | |
| img_response = requests.get(image_url, timeout=30) | |
| if img_response.status_code == 200: | |
| generated_image = Image.open(io.BytesIO(img_response.content)) | |
| return generated_image, "π Generated with Freepik API" | |
| else: | |
| return None, f"Freepik API: Unexpected response format - available keys: {list(result.keys())}" | |
| elif response.status_code == 401: | |
| logger.error("Freepik API authentication failed") | |
| return None, "π Invalid or expired Freepik API key. Please check your API key." | |
| elif response.status_code == 403: | |
| logger.error("Freepik API access forbidden") | |
| return None, "π« Freepik API access forbidden. Please check your subscription plan and permissions." | |
| elif response.status_code == 429: | |
| logger.warning("Freepik API rate limit exceeded") | |
| return None, "β±οΈ Freepik API rate limit exceeded. Please wait before making another request." | |
| elif response.status_code == 400: | |
| logger.error("Freepik API bad request") | |
| return None, "π Invalid request parameters. Please check your prompt and settings." | |
| elif response.status_code >= 500: | |
| logger.error(f"Freepik API server error: {response.status_code}") | |
| return None, f"π οΈ Freepik API server error ({response.status_code}). Please try again later." | |
| else: | |
| try: | |
| error_data = response.json() | |
| error_message = error_data.get('message', 'Unknown error') | |
| logger.error(f"Freepik API error {response.status_code}: {error_message}") | |
| return None, f"β Freepik API error ({response.status_code}): {error_message}" | |
| except: | |
| logger.error(f"Freepik API error {response.status_code}: {response.text[:200]}") | |
| return None, f"β Freepik API error ({response.status_code}): {response.text[:200]}" | |
| except Exception as e: | |
| logger.error(f"Freepik generation failed: {e}") | |
| return None, f"Freepik error: {str(e)}" | |
| def _build_enhanced_prompt(self, prompt, style, editing_mode): | |
| style_modifiers = { | |
| "realistic": { | |
| "base": "photorealistic, high-quality construction, professional architecture", | |
| "materials": "concrete, steel, glass, brick, modern building materials", | |
| "lighting": "natural lighting, architectural photography, professional composition", | |
| "details": "precise construction details, realistic textures, accurate proportions" | |
| }, | |
| "futuristic": { | |
| "base": "futuristic, high-tech, modern glass and steel, sci-fi architecture", | |
| "materials": "smart glass, carbon fiber, LED lighting, metallic surfaces", | |
| "lighting": "neon accents, ambient lighting, dynamic color schemes", | |
| "details": "sleek lines, geometric patterns, technological integration" | |
| }, | |
| "artistic": { | |
| "base": "artistic, creative design, unique architecture, colorful and innovative", | |
| "materials": "mixed media, colorful facades, creative textures, artistic elements", | |
| "lighting": "dramatic lighting, vibrant colors, artistic composition", | |
| "details": "creative patterns, artistic flourishes, unique design elements" | |
| } | |
| } | |
| mode_descriptions = { | |
| "complete": { | |
| "action": "Complete and finish this unfinished construction project", | |
| "focus": "seamless integration, structural completion, architectural consistency", | |
| "outcome": "fully realized building with complete structure and details" | |
| }, | |
| "edit": { | |
| "action": "Edit and transform specific elements of this construction image", | |
| "focus": "targeted modifications, enhanced features, improved design", | |
| "outcome": "enhanced version with specific improvements and modifications" | |
| }, | |
| "blend": { | |
| "action": "Blend and reimagine this construction with new architectural elements", | |
| "focus": "creative fusion, architectural harmony, innovative design integration", | |
| "outcome": "seamlessly blended design combining existing and new elements" | |
| } | |
| } | |
| style_info = style_modifiers.get(style, style_modifiers["realistic"]) | |
| mode_info = mode_descriptions.get(editing_mode, mode_descriptions["edit"]) | |
| enhanced_prompt = f""" | |
| {mode_info['action']}: {prompt} | |
| Style Requirements: {style_info['base']} | |
| Materials: {style_info['materials']} | |
| Lighting: {style_info['lighting']} | |
| Details: {style_info['details']} | |
| Focus: {mode_info['focus']} | |
| Desired Outcome: {mode_info['outcome']} | |
| Quality: High-resolution, professional architecture rendering, detailed construction imagery | |
| Context: Modern construction, building architecture, structural engineering | |
| """ | |
| return enhanced_prompt.strip() | |
| def load_yolo_optional(self): | |
| if not yolo_available: | |
| return False | |
| try: | |
| model_path = 'best.pt' if os.path.exists('best.pt') else 'yolov11n.pt' | |
| self.yolo_model = YOLO(model_path) | |
| return True | |
| except Exception as e: | |
| logger.warning(f"YOLO loading failed: {e}") | |
| return False | |
| def detect_structures_optional(self, image): | |
| if not self.yolo_model and not self.load_yolo_optional(): | |
| return image, "Structure detection unavailable (optional feature)" | |
| try: | |
| img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| results = self.yolo_model(img_cv) | |
| annotated_img = results[0].plot() | |
| annotated_pil = Image.fromarray(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)) | |
| return annotated_pil, "Structures detected" | |
| except Exception as e: | |
| return image, f"Detection failed: {str(e)}" | |
| def nano_banana_edit_with_model(self, image, prompt, style="realistic", editing_mode="complete", model_name="Freepik Gemini 2.5 Flash", api_keys=None): | |
| if not prompt.strip(): | |
| return image, "Please provide a transformation prompt" | |
| if api_keys is None: | |
| api_keys = {} | |
| logger.info(f"Processing with {model_name}: {editing_mode} mode, {style} style, prompt: {prompt[:50]}...") | |
| try: | |
| image = self._resize_image_if_needed(image) | |
| self._apply_rate_limiting() | |
| result, message = self._generate_with_model(image, prompt, style, editing_mode, model_name, api_keys) | |
| if result is not None: | |
| return result, message | |
| else: | |
| logger.warning(f"{model_name} failed: {message}, falling back to demo mode") | |
| return self._fallback_to_gemini_demo(image, prompt, style, editing_mode) | |
| except Exception as e: | |
| logger.error(f"Processing failed: {e}") | |
| return self._fallback_to_gemini_demo(image, prompt, style, editing_mode) | |
| def _fallback_to_gemini_demo(self, image, prompt, style, editing_mode): | |
| if not self.gemini_model: | |
| demo_result = self._create_nano_banana_demo(image, f"Demo: {editing_mode} mode with {style} style", style, editing_mode) | |
| return demo_result, f"π Demo Mode: {editing_mode} mode with {style} style" | |
| try: | |
| if editing_mode == "complete": | |
| base_prompt = self._get_completion_prompt(style) | |
| analysis_prompt = f"Analyze this construction image and describe how to {base_prompt.lower()} User request: {prompt}. Provide detailed description of the completed construction." | |
| elif editing_mode == "edit": | |
| analysis_prompt = f"Analyze this construction image and describe how to edit it: {prompt}. Explain the changes that would transform this image." | |
| elif editing_mode == "blend": | |
| analysis_prompt = f"Analyze this construction image and describe how to blend and transform it: {prompt}. Explain how to create a seamless fusion." | |
| else: | |
| analysis_prompt = f"Analyze this construction image: {prompt}" | |
| for attempt in range(API_RETRY_COUNT): | |
| try: | |
| buffered = io.BytesIO() | |
| image.save(buffered, format='PNG', quality=85) | |
| image_bytes = buffered.getvalue() | |
| if len(image_bytes) > 10 * 1024 * 1024: | |
| return image, "Image too large. Please use a smaller image." | |
| try: | |
| response = self.gemini_model.generate_content([ | |
| analysis_prompt, | |
| { | |
| 'mime_type': 'image/png', | |
| 'data': base64.b64encode(image_bytes).decode('utf-8') | |
| } | |
| ]) | |
| except Exception as img_error: | |
| if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error): | |
| logger.warning("API key doesn't support image processing, using demo mode") | |
| processed_image = self._create_nano_banana_demo(image, f"Demo mode: {prompt}", style, editing_mode) | |
| return processed_image, f"π Nano Banana Demo: {editing_mode} mode with {style} style (Image analysis unavailable)" | |
| else: | |
| raise img_error | |
| if hasattr(response, 'text') and response.text: | |
| processed_image = self._create_nano_banana_demo(image, response.text, style, editing_mode) | |
| return processed_image, f"β¨ Nano Banana Analysis: {editing_mode} mode with {style} style" | |
| if attempt < API_RETRY_COUNT - 1: | |
| time.sleep(2 ** attempt) | |
| continue | |
| return image, "No analysis generated - please try a different prompt" | |
| except Exception as retry_error: | |
| if attempt < API_RETRY_COUNT - 1: | |
| logger.warning(f"Attempt {attempt + 1} failed: {retry_error}") | |
| time.sleep(2 ** attempt) | |
| continue | |
| raise retry_error | |
| except Exception as e: | |
| logger.error(f"Nano Banana error: {e}") | |
| if "quota" in str(e).lower() or "limit" in str(e).lower(): | |
| return image, "β±οΈ API rate limit reached. Please try again in a few minutes." | |
| return image, f"Processing failed: {str(e)}" | |
| def _create_nano_banana_demo(self, image, analysis_text, style, editing_mode): | |
| try: | |
| demo_image = image.copy() | |
| draw = ImageDraw.Draw(demo_image) | |
| style_colors = { | |
| "realistic": (34, 139, 34), | |
| "futuristic": (0, 191, 255), | |
| "artistic": (255, 20, 147) | |
| } | |
| color = style_colors.get(style, (100, 100, 100)) | |
| width, height = demo_image.size | |
| overlay = Image.new('RGBA', (width, height), (0, 0, 0, 0)) | |
| overlay_draw = ImageDraw.Draw(overlay) | |
| border_width = 8 | |
| overlay_draw.rectangle([0, 0, width, height], outline=color + (180,), width=border_width) | |
| text_bg_height = 60 | |
| overlay_draw.rectangle([0, height-text_bg_height, width, height], fill=color + (200,)) | |
| try: | |
| font = ImageFont.load_default() | |
| text = f"π Nano Banana: {editing_mode.title()} - {style.title()}" | |
| text_width = draw.textlength(text, font=font) | |
| text_x = (width - text_width) // 2 | |
| overlay_draw.text((text_x, height - 35), text, fill=(255, 255, 255), font=font) | |
| except: | |
| pass | |
| demo_image = Image.alpha_composite(demo_image.convert('RGBA'), overlay).convert('RGB') | |
| return demo_image | |
| except Exception as e: | |
| logger.warning(f"Demo overlay failed: {e}") | |
| return image | |
| def _get_completion_prompt(self, style): | |
| prompts = { | |
| "realistic": "Complete this unfinished construction realistically with proper materials and architectural details.", | |
| "futuristic": "Transform this construction into a futuristic high-tech building with modern elements.", | |
| "artistic": "Complete this construction with creative artistic elements and unique design features." | |
| } | |
| return prompts.get(style, prompts["realistic"]) | |
| def generate_voice_optional(self, text): | |
| if not elevenlabs_available or not ELEVENLABS_API_KEY: | |
| return None | |
| try: | |
| audio = generate(text=text, voice="Rachel", model="eleven_monolingual_v1") | |
| return audio | |
| except Exception as e: | |
| logger.warning(f"Voice generation failed: {e}") | |
| return None | |
| def create_comparison(self, original, processed): | |
| if not original or not processed: | |
| return None | |
| try: | |
| height = min(original.height, processed.height, 512) | |
| width = min(original.width, processed.width, 512) | |
| orig_resized = original.resize((width, height), Image.Resampling.LANCZOS) | |
| proc_resized = processed.resize((width, height), Image.Resampling.LANCZOS) | |
| comparison = Image.new('RGB', (width * 2 + 20, height + 40), 'white') | |
| comparison.paste(orig_resized, (0, 20)) | |
| comparison.paste(proc_resized, (width + 20, 20)) | |
| draw = ImageDraw.Draw(comparison) | |
| try: | |
| font = ImageFont.load_default() | |
| draw.text((width//2 - 30, 5), "BEFORE", fill='black', font=font) | |
| draw.text((width + 20 + width//2 - 30, 5), "AFTER", fill='black', font=font) | |
| except: | |
| pass | |
| return comparison | |
| except Exception as e: | |
| logger.warning(f"Comparison creation failed: {e}") | |
| return None | |
| app = NanoBananaApp() | |
| def process_nano_banana_with_freepik(image, prompt, style, editing_mode, freepik_key, enable_detection, enable_voice): | |
| if not image: | |
| return None, None, None, None, "π· Please upload an image to get started", None | |
| if not prompt or not prompt.strip(): | |
| return image, image, image, None, "π Please provide a transformation prompt", None | |
| final_freepik_key = freepik_key or FREEPIK_API_KEY | |
| if final_freepik_key: | |
| is_valid, validation_msg = validate_api_key(final_freepik_key, "Freepik") | |
| if not is_valid: | |
| return image, image, image, None, f"π API Key Error: {validation_msg}", None | |
| logger.info("User-provided Freepik API key validated") | |
| user_api_keys = { | |
| "freepik": final_freepik_key | |
| } | |
| try: | |
| detection_result = image | |
| detection_msg = "Detection disabled" | |
| if enable_detection: | |
| detection_result, detection_msg = app.detect_structures_optional(image) | |
| # Try Freepik first, fallback to demo mode if needed | |
| processed_image, process_msg = app.nano_banana_edit_with_model( | |
| image, prompt, style, editing_mode, "Freepik Gemini 2.5 Flash", user_api_keys | |
| ) | |
| # If Freepik fails, fallback to Gemini demo mode | |
| if processed_image == image and "API key" in process_msg: | |
| processed_image, process_msg = app._fallback_to_gemini_demo(image, prompt, style, editing_mode) | |
| comparison = app.create_comparison(image, processed_image) | |
| audio = None | |
| voice_msg = "" | |
| if enable_voice: | |
| if processed_image != image: | |
| voice_text = f"Using Gemini 2.5 Flash, the AI processed this construction image with {editing_mode} mode and {style} style. Request: {prompt}" | |
| audio = app.generate_voice_optional(voice_text) | |
| voice_msg = "π Voice generated" if audio else "π Voice unavailable" | |
| else: | |
| voice_msg = "π Voice skipped (no changes)" | |
| status = f"π {process_msg}\nπ Detection: {detection_msg}\nπ΅ Voice: {voice_msg}" | |
| return image, detection_result, processed_image, comparison, status, audio | |
| except Exception as e: | |
| logger.error(f"Processing error: {e}") | |
| return image, image, image, None, f"β Error: {str(e)}", None | |
| def process_nano_banana(image, prompt, style, editing_mode, enable_detection, enable_voice): | |
| if not image: | |
| return None, None, None, None, "π· Please upload an image to get started", None | |
| if not prompt or not prompt.strip(): | |
| return image, image, image, None, "π Please provide a transformation prompt", None | |
| try: | |
| detection_result = image | |
| detection_msg = "Detection disabled" | |
| if enable_detection: | |
| detection_result, detection_msg = app.detect_structures_optional(image) | |
| processed_image, process_msg = app.nano_banana_edit(image, prompt, style, editing_mode) | |
| if processed_image == image and "API key required" in process_msg: | |
| return image, detection_result, image, None, f"π {process_msg}", None | |
| comparison = app.create_comparison(image, processed_image) | |
| audio = None | |
| voice_msg = "" | |
| if enable_voice: | |
| if processed_image != image: | |
| voice_text = f"Nano Banana analyzed this construction image using {editing_mode} mode with {style} style. The AI has processed the request: {prompt}" | |
| audio = app.generate_voice_optional(voice_text) | |
| voice_msg = "π Voice generated" if audio else "π Voice unavailable" | |
| else: | |
| voice_msg = "π Voice skipped (no changes)" | |
| status_parts = [f"π {process_msg}"] | |
| if enable_detection: | |
| status_parts.append(f"π Detection: {detection_msg}") | |
| if enable_voice: | |
| status_parts.append(f"π΅ Voice: {voice_msg}") | |
| status = "\n".join(status_parts) | |
| return image, detection_result, processed_image, comparison, status, audio | |
| except Exception as e: | |
| logger.error(f"Processing error: {e}") | |
| return image, image, image, None, f"β Unexpected error: {str(e)}", None | |
| custom_css = """ | |
| .nano-banner { | |
| background: linear-gradient(45deg, #ff6b6b, #feca57, #48dbfb, #ff9ff3); | |
| background-size: 400% 400%; | |
| animation: gradient 15s ease infinite; | |
| padding: 20px; | |
| border-radius: 10px; | |
| text-align: center; | |
| margin-bottom: 20px; | |
| } | |
| @keyframes gradient { | |
| 0% { background-position: 0% 50%; } | |
| 50% { background-position: 100% 50%; } | |
| 100% { background-position: 0% 50%; } | |
| } | |
| .feature-highlight { | |
| border: 2px solid #4CAF50; | |
| border-radius: 8px; | |
| padding: 15px; | |
| margin: 10px 0; | |
| } | |
| """ | |
| demo_mode_notice = "" | |
| if not FREEPIK_API_KEY: | |
| demo_mode_notice = """ | |
| <div style="background: #fff3e0; border: 1px solid #ff9800; border-radius: 8px; padding: 15px; margin: 10px 0;"> | |
| <h3>π Nano Banana - Competition Mode</h3> | |
| <p><strong>For real image generation:</strong> Add your <strong>FREEPIK_API_KEY</strong> in the API settings below</p> | |
| <p>Get your key from: <a href="https://www.freepik.com/api" target="_blank">Freepik API Portal</a></p> | |
| <p><em>Demo mode with visual overlay available without API key</em></p> | |
| </div> | |
| """ | |
| with gr.Blocks(title="π Nano Banana - Dynamic Image Creation", theme=gr.themes.Soft(), css=custom_css) as demo: | |
| gr.HTML(f""" | |
| <div class="nano-banner"> | |
| <h1>π Nano Banana: Dynamic Image Creation</h1> | |
| <p><strong>Powered by Gemini 2.5 Flash Image Preview</strong></p> | |
| <p>Edit with words β’ Blend realities β’ Transform visuals</p> | |
| </div> | |
| {demo_mode_notice} | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Group(): | |
| gr.Markdown("### π¨ Core Nano Banana Features") | |
| image_input = gr.Image(label="Upload Image", type="pil", height=300) | |
| prompt_input = gr.Textbox( | |
| label="Transformation Prompt", | |
| placeholder="Describe how you want to transform this image...", | |
| lines=3 | |
| ) | |
| editing_mode = gr.Radio( | |
| choices=["complete", "edit", "blend"], | |
| value="edit", | |
| label="Nano Banana Mode", | |
| info="Complete: Finish construction β’ Edit: Modify image β’ Blend: Fuse elements" | |
| ) | |
| style_selector = gr.Radio( | |
| choices=["realistic", "futuristic", "artistic"], | |
| value="realistic", | |
| label="Style", | |
| info="Choose the aesthetic approach" | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### π Nano Banana - Gemini 2.5 Flash") | |
| gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"]) | |
| with gr.Accordion("π Freepik API Key (Required for Image Generation)", open=True): | |
| freepik_key = gr.Textbox( | |
| label="Freepik API Key", | |
| placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation", | |
| type="password", | |
| info="Get your key from: https://www.freepik.com/api" | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### βοΈ Optional Features") | |
| enable_detection = gr.Checkbox( | |
| label="π Structure Detection (YOLO)", | |
| value=False, | |
| info="Optional: Detect and highlight structures" | |
| ) | |
| enable_voice = gr.Checkbox( | |
| label="π Voice Narration (ElevenLabs)", | |
| value=False, | |
| info="Optional: Generate audio description" | |
| ) | |
| process_btn = gr.Button("π Transform with Nano Banana", variant="primary", size="lg") | |
| status_output = gr.Textbox(label="Status", interactive=False, lines=4) | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.Tab("π· Original"): | |
| original_output = gr.Image(label="Original Image", height=400) | |
| with gr.Tab("π Detection (Optional)"): | |
| detection_output = gr.Image(label="Structure Detection", height=400) | |
| with gr.Tab("π Nano Banana Result"): | |
| result_output = gr.Image(label="Transformed Image", height=400, elem_classes=["feature-highlight"]) | |
| with gr.Tab("π Before/After"): | |
| comparison_output = gr.Image(label="Comparison View", height=400) | |
| with gr.Row(): | |
| audio_output = gr.Audio(label="π Voice Description (Optional)", visible=True) | |
| with gr.Row(): | |
| gr.Examples( | |
| examples=[ | |
| ["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "", False, False], | |
| ["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "", True, False], | |
| ["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "", False, True], | |
| ["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "", False, False], | |
| ["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "", True, False], | |
| ["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "", False, False], | |
| ["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "", False, False], | |
| ["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "", False, True] | |
| ], | |
| inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice], | |
| label="π― Try These Examples" | |
| ) | |
| gr.Markdown(""" | |
| ### π Competition Features - Gemini 2.5 Flash | |
| - **π Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview - the official competition model | |
| - **π¨ Word-Based Editing**: Transform construction images with natural language prompts | |
| - **π Reality Blending**: Complete unfinished buildings, edit existing structures, blend architectural elements | |
| - **β‘ Real-time Processing**: Fast image transformations powered by Gemini 2.5 Flash | |
| - **π οΈ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs) | |
| - **π Smart Fallbacks**: Demo mode with visual overlays when API key not available | |
| """) | |
| process_btn.click( | |
| fn=process_nano_banana_with_freepik, | |
| inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice], | |
| outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True | |
| ) |