BuildTheFuture / app.py
Abs6187's picture
Upload 16 files
e98d661 verified
raw
history blame
16.7 kB
import gradio as gr
import google.generativeai as genai
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import base64
import io
import logging
import time
from typing import Optional, Tuple
import warnings
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
MAX_IMAGE_SIZE = 1024
RATE_LIMIT_DELAY = 3
API_RETRY_COUNT = 3
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
logger.info("Gemini API configured")
else:
logger.warning("GEMINI_API_KEY not found - using demo mode")
try:
from elevenlabs import generate, set_api_key
if ELEVENLABS_API_KEY:
set_api_key(ELEVENLABS_API_KEY)
logger.info("ElevenLabs configured")
else:
logger.info("ElevenLabs not configured - optional feature")
except ImportError:
logger.info("ElevenLabs not available - optional feature")
try:
from ultralytics import YOLO
yolo_available = True
except ImportError:
yolo_available = False
logger.info("YOLO not available - optional feature")
class NanoBananaApp:
def __init__(self):
self.gemini_model = None
self.yolo_model = None
self._initialize_gemini()
def _initialize_gemini(self):
if not GEMINI_API_KEY:
logger.warning("No API key - demo mode")
return
try:
self.gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp')
logger.info("Nano Banana (Gemini 2.5 Flash Image) initialized")
except Exception as e:
logger.error(f"Failed to initialize Gemini: {e}")
def _resize_image_if_needed(self, image):
if image.width > MAX_IMAGE_SIZE or image.height > MAX_IMAGE_SIZE:
ratio = min(MAX_IMAGE_SIZE / image.width, MAX_IMAGE_SIZE / image.height)
new_size = (int(image.width * ratio), int(image.height * ratio))
return image.resize(new_size, Image.Resampling.LANCZOS)
return image
def _apply_rate_limiting(self):
time.sleep(RATE_LIMIT_DELAY)
def load_yolo_optional(self):
if not yolo_available:
return False
try:
model_path = 'best.pt' if os.path.exists('best.pt') else 'yolov11n.pt'
self.yolo_model = YOLO(model_path)
return True
except Exception as e:
logger.warning(f"YOLO loading failed: {e}")
return False
def detect_structures_optional(self, image):
if not self.yolo_model and not self.load_yolo_optional():
return image, "Structure detection unavailable (optional feature)"
try:
img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
results = self.yolo_model(img_cv)
annotated_img = results[0].plot()
annotated_pil = Image.fromarray(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
return annotated_pil, "Structures detected"
except Exception as e:
return image, f"Detection failed: {str(e)}"
def nano_banana_edit(self, image, prompt, style="realistic", editing_mode="complete"):
if not self.gemini_model:
if not GEMINI_API_KEY:
return image, "πŸ”‘ API key required for Nano Banana. Add GEMINI_API_KEY to use this feature."
return image, "Gemini Nano Banana not available"
if not prompt.strip():
return image, "Please provide a transformation prompt"
try:
image = self._resize_image_if_needed(image)
self._apply_rate_limiting()
if editing_mode == "complete":
base_prompt = self._get_completion_prompt(style)
full_prompt = f"{base_prompt} {prompt}"
elif editing_mode == "edit":
full_prompt = f"Edit this image: {prompt}. Make the changes look natural and maintain image quality."
elif editing_mode == "blend":
full_prompt = f"Blend and transform this image: {prompt}. Create a seamless fusion of elements."
else:
full_prompt = prompt
for attempt in range(API_RETRY_COUNT):
try:
buffered = io.BytesIO()
image.save(buffered, format='PNG', quality=85)
image_bytes = buffered.getvalue()
if len(image_bytes) > 10 * 1024 * 1024:
return image, "Image too large. Please use a smaller image."
response = self.gemini_model.generate_content([
full_prompt,
{
'mime_type': 'image/png',
'data': base64.b64encode(image_bytes).decode('utf-8')
}
])
if hasattr(response, 'candidates') and response.candidates:
for part in response.candidates[0].content.parts:
if hasattr(part, 'inline_data') and part.inline_data:
if hasattr(part.inline_data, 'data'):
image_data = base64.b64decode(part.inline_data.data)
result_image = Image.open(io.BytesIO(image_data)).convert('RGB')
return result_image, f"✨ Nano Banana: {editing_mode} mode with {style} style"
if attempt < API_RETRY_COUNT - 1:
time.sleep(2 ** attempt)
continue
return image, "No image generated - please try a different prompt"
except Exception as retry_error:
if attempt < API_RETRY_COUNT - 1:
logger.warning(f"Attempt {attempt + 1} failed: {retry_error}")
time.sleep(2 ** attempt)
continue
raise retry_error
except Exception as e:
logger.error(f"Nano Banana error: {e}")
if "quota" in str(e).lower() or "limit" in str(e).lower():
return image, "⏱️ API rate limit reached. Please try again in a few minutes."
return image, f"Processing failed: {str(e)}"
def _get_completion_prompt(self, style):
prompts = {
"realistic": "Complete this unfinished construction realistically with proper materials and architectural details.",
"futuristic": "Transform this construction into a futuristic high-tech building with modern elements.",
"artistic": "Complete this construction with creative artistic elements and unique design features."
}
return prompts.get(style, prompts["realistic"])
def generate_voice_optional(self, text):
if not ELEVENLABS_API_KEY:
return None
try:
audio = generate(text=text, voice="Rachel", model="eleven_monolingual_v1")
return audio
except Exception as e:
logger.warning(f"Voice generation failed: {e}")
return None
def create_comparison(self, original, processed):
if not original or not processed:
return None
try:
height = min(original.height, processed.height, 512)
width = min(original.width, processed.width, 512)
orig_resized = original.resize((width, height), Image.Resampling.LANCZOS)
proc_resized = processed.resize((width, height), Image.Resampling.LANCZOS)
comparison = Image.new('RGB', (width * 2 + 20, height + 40), 'white')
comparison.paste(orig_resized, (0, 20))
comparison.paste(proc_resized, (width + 20, 20))
draw = ImageDraw.Draw(comparison)
try:
font = ImageFont.load_default()
draw.text((width//2 - 30, 5), "BEFORE", fill='black', font=font)
draw.text((width + 20 + width//2 - 30, 5), "AFTER", fill='black', font=font)
except:
pass
return comparison
except Exception as e:
logger.warning(f"Comparison creation failed: {e}")
return None
app = NanoBananaApp()
def process_nano_banana(image, prompt, style, editing_mode, enable_detection, enable_voice):
if not image:
return None, None, None, None, "πŸ“· Please upload an image to get started", None
if not prompt or not prompt.strip():
return image, image, image, None, "πŸ’­ Please provide a transformation prompt", None
try:
detection_result = image
detection_msg = "Detection disabled"
if enable_detection:
detection_result, detection_msg = app.detect_structures_optional(image)
processed_image, process_msg = app.nano_banana_edit(image, prompt, style, editing_mode)
if processed_image == image and "API key required" in process_msg:
return image, detection_result, image, None, f"πŸ”‘ {process_msg}", None
comparison = app.create_comparison(image, processed_image)
audio = None
voice_msg = ""
if enable_voice:
if processed_image != image:
voice_text = f"Image transformed using Nano Banana with {editing_mode} mode and {style} style. {prompt}"
audio = app.generate_voice_optional(voice_text)
voice_msg = "πŸ”Š Voice generated" if audio else "πŸ”‡ Voice unavailable"
else:
voice_msg = "πŸ”‡ Voice skipped (no changes)"
status_parts = [f"🍌 {process_msg}"]
if enable_detection:
status_parts.append(f"πŸ“ Detection: {detection_msg}")
if enable_voice:
status_parts.append(f"🎡 Voice: {voice_msg}")
status = "\n".join(status_parts)
return image, detection_result, processed_image, comparison, status, audio
except Exception as e:
logger.error(f"Processing error: {e}")
return image, image, image, None, f"❌ Unexpected error: {str(e)}", None
custom_css = """
.nano-banner {
background: linear-gradient(45deg, #ff6b6b, #feca57, #48dbfb, #ff9ff3);
background-size: 400% 400%;
animation: gradient 15s ease infinite;
padding: 20px;
border-radius: 10px;
text-align: center;
margin-bottom: 20px;
}
@keyframes gradient {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
.feature-highlight {
border: 2px solid #4CAF50;
border-radius: 8px;
padding: 15px;
margin: 10px 0;
}
"""
demo_mode_notice = ""
if not GEMINI_API_KEY:
demo_mode_notice = """
<div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;">
<h3>πŸ”‘ API Key Required</h3>
<p>To use Nano Banana features, add your <strong>GEMINI_API_KEY</strong> in the Space settings.</p>
<p>Get your free API key from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p>
</div>
"""
with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.themes.Soft(), css=custom_css) as demo:
gr.HTML(f"""
<div class="nano-banner">
<h1>🍌 Nano Banana: Dynamic Image Creation</h1>
<p><strong>Powered by Gemini 2.5 Flash Image Preview</strong></p>
<p>Edit with words β€’ Blend realities β€’ Transform visuals</p>
</div>
{demo_mode_notice}
""")
with gr.Row():
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### 🎨 Core Nano Banana Features")
image_input = gr.Image(label="Upload Image", type="pil", height=300)
prompt_input = gr.Textbox(
label="Transformation Prompt",
placeholder="Describe how you want to transform this image...",
lines=3
)
editing_mode = gr.Radio(
choices=["complete", "edit", "blend"],
value="edit",
label="Nano Banana Mode",
info="Complete: Finish construction β€’ Edit: Modify image β€’ Blend: Fuse elements"
)
style_selector = gr.Radio(
choices=["realistic", "futuristic", "artistic"],
value="realistic",
label="Style",
info="Choose the aesthetic approach"
)
with gr.Group():
gr.Markdown("### βš™οΈ Optional Features")
enable_detection = gr.Checkbox(
label="πŸ” Structure Detection (YOLO)",
value=False,
info="Optional: Detect and highlight structures"
)
enable_voice = gr.Checkbox(
label="πŸ”Š Voice Narration (ElevenLabs)",
value=False,
info="Optional: Generate audio description"
)
process_btn = gr.Button("πŸš€ Transform with Nano Banana", variant="primary", size="lg")
status_output = gr.Textbox(label="Status", interactive=False, lines=4)
with gr.Column(scale=2):
with gr.Tabs():
with gr.Tab("πŸ“· Original"):
original_output = gr.Image(label="Original Image", height=400)
with gr.Tab("πŸ” Detection (Optional)"):
detection_output = gr.Image(label="Structure Detection", height=400)
with gr.Tab("🍌 Nano Banana Result"):
result_output = gr.Image(label="Transformed Image", height=400, elem_classes=["feature-highlight"])
with gr.Tab("πŸ“Š Before/After"):
comparison_output = gr.Image(label="Comparison View", height=400)
with gr.Row():
audio_output = gr.Audio(label="πŸ”Š Voice Description (Optional)", visible=True)
with gr.Row():
gr.Examples(
examples=[
["samples_imagen/skyscraper_construction.jpg", "Complete this modern skyscraper with glass facades", "futuristic", "complete", True, False],
["samples_imagen/suspension_bridge.jpg", "Add a golden sunset reflection on the bridge", "artistic", "edit", False, True],
["samples_imagen/highway_construction.jpg", "Transform into a smart highway with digital elements", "futuristic", "blend", True, False],
["samples_imagen/residential_construction.jpg", "Complete as a sustainable eco-friendly home", "realistic", "complete", False, False]
],
inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice],
label="🎯 Try These Examples"
)
gr.Markdown("""
### πŸ† Competition Features
- **Nano Banana Core**: Gemini 2.5 Flash Image for dynamic creation
- **Word-Based Editing**: Transform images with natural language
- **Reality Blending**: Seamlessly fuse different visual elements
- **Optional Enhancements**: Structure detection and voice narration
- **Real-time Processing**: Fast image transformations
""")
process_btn.click(
fn=process_nano_banana,
inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice],
outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)