Spaces:

Abs6187
/

BuildTheFuture

Sleeping

App Files Files Community

Abs6187 commited on Sep 7, 2025

Commit

7a259c7

verified ·

1 Parent(s): cf991a3

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -203

app.py CHANGED Viewed

@@ -30,25 +30,7 @@ IMAGE_MODELS = {
         "name": "Freepik Gemini 2.5 Flash Image Preview",
         "api": "freepik",
         "url": "https://api.freepik.com/v1/ai/text-to-image",
-        "description": "🍌 <strong>Best for competition</strong> - Real Gemini 2.5 Flash generation<br/>⚠️ <em>Requires paid Freepik subscription + valid API key</em>"
-    },
-    "OpenAI DALL-E 3": {
-        "name": "DALL-E 3",
-        "api": "openai",
-        "url": "https://api.openai.com/v1/images/generations",
-        "description": "🎨 <strong>High-quality creative images</strong> - Excellent for artistic styles<br/>💳 <em>Requires OpenAI API key + billing account</em>"
-    },
-    "Stable Diffusion XL": {
-        "name": "Stable Diffusion XL",
-        "api": "stabilityai",
-        "url": "https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
-        "description": "🚀 <strong>Open-source powerhouse</strong> - High-resolution generation<br/>💰 <em>Requires Stability AI credits + API key</em>"
-    },
-    "Gemini Analysis + Demo": {
-        "name": "Gemini Analysis + Visual Demo",
-        "api": "gemini",
-        "url": None,
-        "description": "📊 <strong>Smart fallback</strong> - AI analysis + visual demo overlay<br/>✅ <em>Works with basic Gemini key or demo mode</em>"
     }
 }
@@ -118,12 +100,6 @@ class NanoBananaApp:
         if api_type == "freepik":
             return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik"))
-        elif api_type == "openai":
-            return self._generate_with_dalle(image, prompt, style, editing_mode, api_keys.get("openai"))
-        elif api_type == "stabilityai":
-            return self._generate_with_stable_diffusion(image, prompt, style, editing_mode, api_keys.get("stabilityai"))
-        elif api_type == "gemini":
-            return self._generate_with_gemini_demo(image, prompt, style, editing_mode, api_keys.get("gemini"))
         else:
             return None, f"Unsupported API type: {api_type}"
@@ -223,105 +199,6 @@ class NanoBananaApp:
             logger.error(f"Freepik generation failed: {e}")
             return None, f"Freepik error: {str(e)}"
-    def _generate_with_dalle(self, image, prompt, style, editing_mode, api_key):
-        if not api_key:
-            return None, "OpenAI API key not provided"
-        try:
-            full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
-            headers = {
-                "Authorization": f"Bearer {api_key}",
-                "Content-Type": "application/json"
-            }
-            payload = {
-                "model": "dall-e-3",
-                "prompt": full_prompt,
-                "n": 1,
-                "size": "1024x1024"
-            }
-            response = requests.post(
-                "https://api.openai.com/v1/images/generations",
-                headers=headers,
-                json=payload,
-                timeout=60
-            )
-            if response.status_code == 200:
-                result = response.json()
-                if result.get('data') and len(result['data']) > 0:
-                    image_url = result['data'][0]['url']
-                    img_response = requests.get(image_url, timeout=30)
-                    if img_response.status_code == 200:
-                        generated_image = Image.open(io.BytesIO(img_response.content))
-                        return generated_image, "🎨 Generated with DALL-E 3"
-            return None, f"DALL-E API error: {response.status_code}"
-        except Exception as e:
-            logger.error(f"DALL-E generation failed: {e}")
-            return None, f"DALL-E error: {str(e)}"
-    def _generate_with_stable_diffusion(self, image, prompt, style, editing_mode, api_key):
-        if not api_key:
-            return None, "Stability AI API key not provided"
-        try:
-            full_prompt = self._build_enhanced_prompt(prompt, style, editing_mode)
-            headers = {
-                "Authorization": f"Bearer {api_key}",
-                "Content-Type": "application/json"
-            }
-            payload = {
-                "text_prompts": [{"text": full_prompt}],
-                "cfg_scale": 7,
-                "height": 1024,
-                "width": 1024,
-                "samples": 1,
-                "steps": 30
-            }
-            response = requests.post(
-                "https://api.stability.ai/v1/generation/stable-diffusion-xl-1024-v1-0/text-to-image",
-                headers=headers,
-                json=payload,
-                timeout=60
-            )
-            if response.status_code == 200:
-                result = response.json()
-                if result.get('artifacts') and len(result['artifacts']) > 0:
-                    image_b64 = result['artifacts'][0]['base64']
-                    image_data = base64.b64decode(image_b64)
-                    generated_image = Image.open(io.BytesIO(image_data))
-                    return generated_image, "🚀 Generated with Stable Diffusion XL"
-            return None, f"Stability AI error: {response.status_code}"
-        except Exception as e:
-            logger.error(f"Stable Diffusion generation failed: {e}")
-            return None, f"Stable Diffusion error: {str(e)}"
-    def _generate_with_gemini_demo(self, image, prompt, style, editing_mode, api_key):
-        if api_key:
-            try:
-                # Temporarily configure with user's key
-                original_key = GEMINI_API_KEY
-                genai.configure(api_key=api_key)
-                result = self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
-                # Restore original key
-                if original_key:
-                    genai.configure(api_key=original_key)
-                return result
-            except Exception as e:
-                logger.warning(f"User Gemini key failed: {e}")
-                return None, f"Invalid Gemini API key: {str(e)}"
-        return self._fallback_to_gemini_demo(image, prompt, style, editing_mode)
     def _build_enhanced_prompt(self, prompt, style, editing_mode):
         style_modifiers = {
@@ -407,9 +284,9 @@ class NanoBananaApp:
             for attempt in range(API_RETRY_COUNT):
                 try:
-            buffered = io.BytesIO()
                     image.save(buffered, format='PNG', quality=85)
-            image_bytes = buffered.getvalue()
                     if len(image_bytes) > 10 * 1024 * 1024:
                         return image, "Image too large. Please use a smaller image."
@@ -417,11 +294,11 @@ class NanoBananaApp:
                     try:
                         response = self.gemini_model.generate_content([
                             analysis_prompt,
-                {
-                    'mime_type': 'image/png',
-                    'data': base64.b64encode(image_bytes).decode('utf-8')
-                }
-            ])
                     except Exception as img_error:
                         if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error):
                             logger.warning("API key doesn't support image processing, using demo mode")
@@ -537,10 +414,8 @@ class NanoBananaApp:
 app = NanoBananaApp()
-def update_model_description(model_name):
-    return IMAGE_MODELS.get(model_name, {}).get("description", "Model description not available")
-def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_name, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice):
     if not image:
         return None, None, None, None, "📷 Please upload an image to get started", None
@@ -548,10 +423,7 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
         return image, image, image, None, "💭 Please provide a transformation prompt", None
     user_api_keys = {
-        "freepik": freepik_key or FREEPIK_API_KEY,
-        "openai": openai_key,
-        "stabilityai": stability_key,
-        "gemini": gemini_key or GEMINI_API_KEY
     }
     try:
@@ -561,12 +433,14 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
         if enable_detection:
             detection_result, detection_msg = app.detect_structures_optional(image)
         processed_image, process_msg = app.nano_banana_edit_with_model(
-            image, prompt, style, editing_mode, model_name, user_api_keys
         )
         if processed_image == image and "API key" in process_msg:
-            return image, detection_result, image, None, f"🔑 {process_msg}", None
         comparison = app.create_comparison(image, processed_image)
@@ -574,7 +448,7 @@ def process_nano_banana_with_settings(image, prompt, style, editing_mode, model_
         voice_msg = ""
         if enable_voice:
             if processed_image != image:
-                voice_text = f"Using {model_name}, the AI processed this construction image with {editing_mode} mode and {style} style. Request: {prompt}"
                 audio = app.generate_voice_optional(voice_text)
                 voice_msg = "🔊 Voice generated" if audio else "🔇 Voice unavailable"
             else:
@@ -657,21 +531,13 @@ custom_css = """
 """
 demo_mode_notice = ""
-if not FREEPIK_API_KEY and not GEMINI_API_KEY:
-    demo_mode_notice = """
-    <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;">
-        <h3>🔑 API Keys Required</h3>
-        <p><strong>For best results:</strong> Add <strong>FREEPIK_API_KEY</strong> for real image generation</p>
-        <p><strong>For analysis:</strong> Add <strong>GEMINI_API_KEY</strong> from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p>
-        <p><em>Demo mode available without API keys</em></p>
-    </div>
-    """
-elif not FREEPIK_API_KEY:
     demo_mode_notice = """
-    <div style="background: #e3f2fd; border: 1px solid #2196f3; border-radius: 8px; padding: 15px; margin: 10px 0;">
-        <h3>🍌 Enhanced Mode Available</h3>
-        <p>Add <strong>FREEPIK_API_KEY</strong> for real Gemini 2.5 Flash image generation!</p>
-        <p><em>Currently using Gemini analysis + demo mode</em></p>
     </div>
     """
@@ -711,35 +577,15 @@ with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.theme
                 )
             with gr.Group():
-                gr.Markdown("### 🤖 AI Model & API Settings")
-                model_selector = gr.Dropdown(
-                    choices=list(IMAGE_MODELS.keys()),
-                    value="Freepik Gemini 2.5 Flash",
-                    label="Image Generation Model",
-                    info="Choose your preferred AI model"
-                )
-                model_description = gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"])
-                with gr.Accordion("🔑 API Keys (Optional - Use Your Own)", open=False):
                     freepik_key = gr.Textbox(
                         label="Freepik API Key",
                         placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation",
-                        type="password"
-                    )
-                    openai_key = gr.Textbox(
-                        label="OpenAI API Key",
-                        placeholder="Enter your OpenAI API key for DALL-E 3",
-                        type="password"
-                    )
-                    stability_key = gr.Textbox(
-                        label="Stability AI API Key",
-                        placeholder="Enter your Stability AI key for Stable Diffusion XL",
-                        type="password"
-                    )
-                    gemini_key = gr.Textbox(
-                        label="Gemini API Key",
-                        placeholder="Enter your Gemini API key for analysis mode",
-                        type="password"
                     )
             with gr.Group():
@@ -778,38 +624,32 @@ with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.theme
     with gr.Row():
         gr.Examples(
             examples=[
-                ["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "Freepik Gemini 2.5 Flash", "", "", "", "", False, False],
-                ["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "OpenAI DALL-E 3", "", "", "", "", True, False],
-                ["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "Stable Diffusion XL", "", "", "", "", False, True],
-                ["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "Freepik Gemini 2.5 Flash", "", "", "", "", False, False],
-                ["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "Gemini Analysis + Demo", "", "", "", "", True, False],
-                ["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "OpenAI DALL-E 3", "", "", "", "", False, False],
-                ["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "Stable Diffusion XL", "", "", "", "", False, False],
-                ["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "Freepik Gemini 2.5 Flash", "", "", "", "", False, True]
             ],
-            inputs=[image_input, prompt_input, style_selector, editing_mode, model_selector, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice],
             label="🎯 Try These Examples"
         )
     gr.Markdown("""
-    ### 🏆 Competition Features
-    - **🍌 Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview for real image generation
-    - **🎨 Word-Based Editing**: Transform images with natural language prompts
-    - **🌟 Reality Blending**: Seamlessly fuse different visual elements
-    - **⚡ Real-time Processing**: Fast image transformations and generation
     - **🛠️ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs)
-    - **🔄 Smart Fallbacks**: Multiple processing modes for reliability
     """)
-    model_selector.change(
-        fn=update_model_description,
-        inputs=[model_selector],
-        outputs=[model_description]
-    )
     process_btn.click(
-        fn=process_nano_banana_with_settings,
-        inputs=[image_input, prompt_input, style_selector, editing_mode, model_selector, freepik_key, openai_key, stability_key, gemini_key, enable_detection, enable_voice],
         outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
     )

         "name": "Freepik Gemini 2.5 Flash Image Preview",
         "api": "freepik",
         "url": "https://api.freepik.com/v1/ai/text-to-image",
+        "description": "🍌 <strong>Nano Banana Competition Model</strong> - Real Gemini 2.5 Flash generation<br/>⚠️ <em>Requires paid Freepik subscription + valid API key</em>"
     }
 }
         if api_type == "freepik":
             return self._generate_with_freepik(image, prompt, style, editing_mode, api_keys.get("freepik"))
         else:
             return None, f"Unsupported API type: {api_type}"
             logger.error(f"Freepik generation failed: {e}")
             return None, f"Freepik error: {str(e)}"
     def _build_enhanced_prompt(self, prompt, style, editing_mode):
         style_modifiers = {
             for attempt in range(API_RETRY_COUNT):
                 try:
+                    buffered = io.BytesIO()
                     image.save(buffered, format='PNG', quality=85)
+                    image_bytes = buffered.getvalue()
                     if len(image_bytes) > 10 * 1024 * 1024:
                         return image, "Image too large. Please use a smaller image."
                     try:
                         response = self.gemini_model.generate_content([
                             analysis_prompt,
+                            {
+                                'mime_type': 'image/png',
+                                'data': base64.b64encode(image_bytes).decode('utf-8')
+                            }
+                        ])
                     except Exception as img_error:
                         if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error):
                             logger.warning("API key doesn't support image processing, using demo mode")
 app = NanoBananaApp()
+def process_nano_banana_with_freepik(image, prompt, style, editing_mode, freepik_key, enable_detection, enable_voice):
     if not image:
         return None, None, None, None, "📷 Please upload an image to get started", None
         return image, image, image, None, "💭 Please provide a transformation prompt", None
     user_api_keys = {
+        "freepik": freepik_key or FREEPIK_API_KEY
     }
     try:
         if enable_detection:
             detection_result, detection_msg = app.detect_structures_optional(image)
+        # Try Freepik first, fallback to demo mode if needed
         processed_image, process_msg = app.nano_banana_edit_with_model(
+            image, prompt, style, editing_mode, "Freepik Gemini 2.5 Flash", user_api_keys
         )
+        # If Freepik fails, fallback to Gemini demo mode
         if processed_image == image and "API key" in process_msg:
+            processed_image, process_msg = app._fallback_to_gemini_demo(image, prompt, style, editing_mode)
         comparison = app.create_comparison(image, processed_image)
         voice_msg = ""
         if enable_voice:
             if processed_image != image:
+                voice_text = f"Using Gemini 2.5 Flash, the AI processed this construction image with {editing_mode} mode and {style} style. Request: {prompt}"
                 audio = app.generate_voice_optional(voice_text)
                 voice_msg = "🔊 Voice generated" if audio else "🔇 Voice unavailable"
             else:
 """
 demo_mode_notice = ""
+if not FREEPIK_API_KEY:
     demo_mode_notice = """
+    <div style="background: #fff3e0; border: 1px solid #ff9800; border-radius: 8px; padding: 15px; margin: 10px 0;">
+        <h3>🍌 Nano Banana - Competition Mode</h3>
+        <p><strong>For real image generation:</strong> Add your <strong>FREEPIK_API_KEY</strong> in the API settings below</p>
+        <p>Get your key from: <a href="https://www.freepik.com/api" target="_blank">Freepik API Portal</a></p>
+        <p><em>Demo mode with visual overlay available without API key</em></p>
     </div>
     """
                 )
             with gr.Group():
+                gr.Markdown("### 🍌 Nano Banana - Gemini 2.5 Flash")
+                gr.HTML(value=IMAGE_MODELS["Freepik Gemini 2.5 Flash"]["description"])
+                with gr.Accordion("🔑 Freepik API Key (Required for Image Generation)", open=True):
                     freepik_key = gr.Textbox(
                         label="Freepik API Key",
                         placeholder="Enter your Freepik API key for real Gemini 2.5 Flash generation",
+                        type="password",
+                        info="Get your key from: https://www.freepik.com/api"
                     )
             with gr.Group():
     with gr.Row():
         gr.Examples(
             examples=[
+                ["samples/building_001.jpg", "Complete this modern building with glass facade", "realistic", "complete", "", False, False],
+                ["samples/bridge_049.jpg", "Transform into futuristic suspension bridge", "futuristic", "edit", "", True, False],
+                ["samples/road_088.jpg", "Complete as smart highway with LED lights", "futuristic", "blend", "", False, True],
+                ["samples/construction_019.jpg", "Add artistic elements and colorful design", "artistic", "edit", "", False, False],
+                ["samples/infrastructure_015.jpg", "Complete with sustainable green technology", "realistic", "complete", "", True, False],
+                ["samples/residential_004.jpg", "Transform into eco-friendly smart home", "futuristic", "blend", "", False, False],
+                ["samples/commercial_010.jpg", "Add modern commercial design elements", "realistic", "edit", "", False, False],
+                ["samples/construction_111.jpg", "Complete with artistic architectural details", "artistic", "complete", "", False, True]
             ],
+            inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice],
             label="🎯 Try These Examples"
         )
     gr.Markdown("""
+    ### 🏆 Competition Features - Gemini 2.5 Flash
+    - **🍌 Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview - the official competition model
+    - **🎨 Word-Based Editing**: Transform construction images with natural language prompts
+    - **🌟 Reality Blending**: Complete unfinished buildings, edit existing structures, blend architectural elements
+    - **⚡ Real-time Processing**: Fast image transformations powered by Gemini 2.5 Flash
     - **🛠️ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs)
+    - **🔄 Smart Fallbacks**: Demo mode with visual overlays when API key not available
     """)
     process_btn.click(
+        fn=process_nano_banana_with_freepik,
+        inputs=[image_input, prompt_input, style_selector, editing_mode, freepik_key, enable_detection, enable_voice],
         outputs=[original_output, detection_output, result_output, comparison_output, status_output, audio_output]
     )