Spaces:

Abs6187
/

BuildTheFuture

Sleeping

App Files Files Community

BuildTheFuture / app.py

Abs6187

Upload 16 files

e98d661 verified 4 months ago

raw

history blame

16.7 kB

	import gradio as gr
	import google.generativeai as genai
	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	import os
	import base64
	import io
	import logging
	import time
	from typing import Optional, Tuple
	import warnings
	warnings.filterwarnings("ignore")

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")

	MAX_IMAGE_SIZE = 1024
	RATE_LIMIT_DELAY = 3
	API_RETRY_COUNT = 3

	if GEMINI_API_KEY:
	genai.configure(api_key=GEMINI_API_KEY)
	logger.info("Gemini API configured")
	else:
	logger.warning("GEMINI_API_KEY not found - using demo mode")

	try:
	from elevenlabs import generate, set_api_key
	if ELEVENLABS_API_KEY:
	set_api_key(ELEVENLABS_API_KEY)
	logger.info("ElevenLabs configured")
	else:
	logger.info("ElevenLabs not configured - optional feature")
	except ImportError:
	logger.info("ElevenLabs not available - optional feature")

	try:
	from ultralytics import YOLO
	yolo_available = True
	except ImportError:
	yolo_available = False
	logger.info("YOLO not available - optional feature")

	class NanoBananaApp:
	def __init__(self):
	self.gemini_model = None
	self.yolo_model = None
	self._initialize_gemini()

	def _initialize_gemini(self):
	if not GEMINI_API_KEY:
	logger.warning("No API key - demo mode")
	return
	try:
	self.gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp')
	logger.info("Nano Banana (Gemini 2.5 Flash Image) initialized")
	except Exception as e:
	logger.error(f"Failed to initialize Gemini: {e}")

	def _resize_image_if_needed(self, image):
	if image.width > MAX_IMAGE_SIZE or image.height > MAX_IMAGE_SIZE:
	ratio = min(MAX_IMAGE_SIZE / image.width, MAX_IMAGE_SIZE / image.height)
	new_size = (int(image.width * ratio), int(image.height * ratio))
	return image.resize(new_size, Image.Resampling.LANCZOS)
	return image

	def _apply_rate_limiting(self):
	time.sleep(RATE_LIMIT_DELAY)

	def load_yolo_optional(self):
	if not yolo_available:
	return False
	try:
	model_path = 'best.pt' if os.path.exists('best.pt') else 'yolov11n.pt'
	self.yolo_model = YOLO(model_path)
	return True
	except Exception as e:
	logger.warning(f"YOLO loading failed: {e}")
	return False

	def detect_structures_optional(self, image):
	if not self.yolo_model and not self.load_yolo_optional():
	return image, "Structure detection unavailable (optional feature)"

	try:
	img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	results = self.yolo_model(img_cv)
	annotated_img = results[0].plot()
	annotated_pil = Image.fromarray(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
	return annotated_pil, "Structures detected"
	except Exception as e:
	return image, f"Detection failed: {str(e)}"

	def nano_banana_edit(self, image, prompt, style="realistic", editing_mode="complete"):
	if not self.gemini_model:
	if not GEMINI_API_KEY:
	return image, "🔑 API key required for Nano Banana. Add GEMINI_API_KEY to use this feature."
	return image, "Gemini Nano Banana not available"

	if not prompt.strip():
	return image, "Please provide a transformation prompt"

	try:
	image = self._resize_image_if_needed(image)
	self._apply_rate_limiting()

	if editing_mode == "complete":
	base_prompt = self._get_completion_prompt(style)
	full_prompt = f"{base_prompt} {prompt}"
	elif editing_mode == "edit":
	full_prompt = f"Edit this image: {prompt}. Make the changes look natural and maintain image quality."
	elif editing_mode == "blend":
	full_prompt = f"Blend and transform this image: {prompt}. Create a seamless fusion of elements."
	else:
	full_prompt = prompt

	for attempt in range(API_RETRY_COUNT):
	try:
	buffered = io.BytesIO()
	image.save(buffered, format='PNG', quality=85)
	image_bytes = buffered.getvalue()

	if len(image_bytes) > 10 * 1024 * 1024:
	return image, "Image too large. Please use a smaller image."

	response = self.gemini_model.generate_content([
	full_prompt,
	{
	'mime_type': 'image/png',
	'data': base64.b64encode(image_bytes).decode('utf-8')
	}
	])

	if hasattr(response, 'candidates') and response.candidates:
	for part in response.candidates[0].content.parts:
	if hasattr(part, 'inline_data') and part.inline_data:
	if hasattr(part.inline_data, 'data'):
	image_data = base64.b64decode(part.inline_data.data)
	result_image = Image.open(io.BytesIO(image_data)).convert('RGB')
	return result_image, f"✨ Nano Banana: {editing_mode} mode with {style} style"

	if attempt < API_RETRY_COUNT - 1:
	time.sleep(2 ** attempt)
	continue
	return image, "No image generated - please try a different prompt"

	except Exception as retry_error:
	if attempt < API_RETRY_COUNT - 1:
	logger.warning(f"Attempt {attempt + 1} failed: {retry_error}")
	time.sleep(2 ** attempt)
	continue
	raise retry_error

	except Exception as e:
	logger.error(f"Nano Banana error: {e}")
	if "quota" in str(e).lower() or "limit" in str(e).lower():
	return image, "⏱️ API rate limit reached. Please try again in a few minutes."
	return image, f"Processing failed: {str(e)}"

	def _get_completion_prompt(self, style):
	prompts = {
	"realistic": "Complete this unfinished construction realistically with proper materials and architectural details.",
	"futuristic": "Transform this construction into a futuristic high-tech building with modern elements.",
	"artistic": "Complete this construction with creative artistic elements and unique design features."
	}
	return prompts.get(style, prompts["realistic"])

	def generate_voice_optional(self, text):
	if not ELEVENLABS_API_KEY:
	return None
	try:
	audio = generate(text=text, voice="Rachel", model="eleven_monolingual_v1")
	return audio
	except Exception as e:
	logger.warning(f"Voice generation failed: {e}")
	return None

	def create_comparison(self, original, processed):
	if not original or not processed:
	return None
	try:
	height = min(original.height, processed.height, 512)
	width = min(original.width, processed.width, 512)

	orig_resized = original.resize((width, height), Image.Resampling.LANCZOS)
	proc_resized = processed.resize((width, height), Image.Resampling.LANCZOS)

	comparison = Image.new('RGB', (width * 2 + 20, height + 40), 'white')
	comparison.paste(orig_resized, (0, 20))
	comparison.paste(proc_resized, (width + 20, 20))

	draw = ImageDraw.Draw(comparison)
	try:
	font = ImageFont.load_default()
	draw.text((width//2 - 30, 5), "BEFORE", fill='black', font=font)
	draw.text((width + 20 + width//2 - 30, 5), "AFTER", fill='black', font=font)
	except:
	pass

	return comparison
	except Exception as e:
	logger.warning(f"Comparison creation failed: {e}")
	return None

	app = NanoBananaApp()

	def process_nano_banana(image, prompt, style, editing_mode, enable_detection, enable_voice):
	if not image:
	return None, None, None, None, "📷 Please upload an image to get started", None

	if not prompt or not prompt.strip():
	return image, image, image, None, "💭 Please provide a transformation prompt", None

	try:
	detection_result = image
	detection_msg = "Detection disabled"

	if enable_detection:
	detection_result, detection_msg = app.detect_structures_optional(image)

	processed_image, process_msg = app.nano_banana_edit(image, prompt, style, editing_mode)

	if processed_image == image and "API key required" in process_msg:
	return image, detection_result, image, None, f"🔑 {process_msg}", None

	comparison = app.create_comparison(image, processed_image)

	audio = None
	voice_msg = ""
	if enable_voice:
	if processed_image != image:
	voice_text = f"Image transformed using Nano Banana with {editing_mode} mode and {style} style. {prompt}"
	audio = app.generate_voice_optional(voice_text)
	voice_msg = "🔊 Voice generated" if audio else "🔇 Voice unavailable"
	else:
	voice_msg = "🔇 Voice skipped (no changes)"

	status_parts = [f"🍌 {process_msg}"]
	if enable_detection:
	status_parts.append(f"📍 Detection: {detection_msg}")
	if enable_voice:
	status_parts.append(f"🎵 Voice: {voice_msg}")

	status = "\n".join(status_parts)
	return image, detection_result, processed_image, comparison, status, audio

	except Exception as e:
	logger.error(f"Processing error: {e}")
	return image, image, image, None, f"❌ Unexpected error: {str(e)}", None

	custom_css = """
	.nano-banner {
	background: linear-gradient(45deg, #ff6b6b, #feca57, #48dbfb, #ff9ff3);
	background-size: 400% 400%;
	animation: gradient 15s ease infinite;
	padding: 20px;
	border-radius: 10px;
	text-align: center;
	margin-bottom: 20px;
	}

	@keyframes gradient {
	0% { background-position: 0% 50%; }
	50% { background-position: 100% 50%; }
	100% { background-position: 0% 50%; }
	}

	.feature-highlight {
	border: 2px solid #4CAF50;
	border-radius: 8px;
	padding: 15px;
	margin: 10px 0;
	}
	"""

	demo_mode_notice = ""
	if not GEMINI_API_KEY:
	demo_mode_notice = """
	<div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;">
	<h3>🔑 API Key Required</h3>
	<p>To use Nano Banana features, add your <strong>GEMINI_API_KEY</strong> in the Space settings.</p>
	<p>Get your free API key from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p>
	</div>
	"""

	with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.themes.Soft(), css=custom_css) as demo:
	gr.HTML(f"""
	<div class="nano-banner">
	<h1>🍌 Nano Banana: Dynamic Image Creation</h1>
	<p><strong>Powered by Gemini 2.5 Flash Image Preview</strong></p>
	<p>Edit with words • Blend realities • Transform visuals</p>
	</div>
	{demo_mode_notice}
	""")

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### 🎨 Core Nano Banana Features")
	image_input = gr.Image(label="Upload Image", type="pil", height=300)
	prompt_input = gr.Textbox(
	label="Transformation Prompt",
	placeholder="Describe how you want to transform this image...",
	lines=3
	)

	editing_mode = gr.Radio(
	choices=["complete", "edit", "blend"],
	value="edit",
	label="Nano Banana Mode",
	info="Complete: Finish construction • Edit: Modify image • Blend: Fuse elements"
	)

	style_selector = gr.Radio(
	choices=["realistic", "futuristic", "artistic"],
	value="realistic",
	label="Style",
	info="Choose the aesthetic approach"
	)

	with gr.Group():
	gr.Markdown("### ⚙️ Optional Features")
	enable_detection = gr.Checkbox(
	label="🔍 Structure Detection (YOLO)",
	value=False,
	info="Optional: Detect and highlight structures"
	)
	enable_voice = gr.Checkbox(
	label="🔊 Voice Narration (ElevenLabs)",
	value=False,
	info="Optional: Generate audio description"
	)

	process_btn = gr.Button("🚀 Transform with Nano Banana", variant="primary", size="lg")
	status_output = gr.Textbox(label="Status", interactive=False, lines=4)

	with gr.Column(scale=2):
	with gr.Tabs():
	with gr.Tab("📷 Original"):
	original_output = gr.Image(label="Original Image", height=400)

	with gr.Tab("🔍 Detection (Optional)"):
	detection_output = gr.Image(label="Structure Detection", height=400)

	with gr.Tab("🍌 Nano Banana Result"):
	result_output = gr.Image(label="Transformed Image", height=400, elem_classes=["feature-highlight"])

	with gr.Tab("📊 Before/After"):
	comparison_output = gr.Image(label="Comparison View", height=400)

	with gr.Row():
	audio_output = gr.Audio(label="🔊 Voice Description (Optional)", visible=True)

	with gr.Row():
	gr.Examples(
	examples=[
	["samples_imagen/skyscraper_construction.jpg", "Complete this modern skyscraper with glass facades", "futuristic", "complete", True, False],
	["samples_imagen/suspension_bridge.jpg", "Add a golden sunset reflection on the bridge", "artistic", "edit", False, True],
	["samples_imagen/highway_construction.jpg", "Transform into a smart highway with digital elements", "futuristic", "blend", True, False],
	["samples_imagen/residential_construction.jpg", "Complete as a sustainable eco-friendly home", "realistic", "complete", False, False]
	],
	inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice],
	label="🎯 Try These Examples"
	)

	gr.Markdown("""
	### 🏆 Competition Features
	- Nano Banana Core: Gemini 2.5 Flash Image for dynamic creation
	- Word-Based Editing: Transform images with natural language
	- Reality Blending: Seamlessly fuse different visual elements
	- Optional Enhancements: Structure detection and voice narration
	- Real-time Processing: Fast image transformations
	""")

	process_btn.click(
	fn=process_nano_banana,
	inputs=[image_input, prompt_input, style_selector, editing_mode, enable_detection, enable_voice],
	outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True
	)