Abs6187 commited on
Commit
41875bc
Β·
verified Β·
1 Parent(s): 4699c39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -20
app.py CHANGED
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
17
 
18
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
19
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
 
20
 
21
  MAX_IMAGE_SIZE = 1024
22
  RATE_LIMIT_DELAY = 3
@@ -56,13 +57,13 @@ class NanoBananaApp:
56
  logger.warning("No API key - demo mode")
57
  return
58
  try:
59
- self.gemini_model = genai.GenerativeModel('gemini-2.5-flash-image-preview')
60
- logger.info("Nano Banana (gemini-2.5-flash-image-preview) initialized")
61
  except Exception as e:
62
  logger.error(f"Failed to initialize Gemini: {e}")
63
  try:
64
- self.gemini_model = genai.GenerativeModel('gemini-2.5-flash-image-preview')
65
- logger.info("Fallback: gemini-2.5-flash-image-preview")
66
  except Exception as e2:
67
  logger.error(f"Fallback also failed: {e2}")
68
 
@@ -75,6 +76,56 @@ class NanoBananaApp:
75
 
76
  def _apply_rate_limiting(self):
77
  time.sleep(RATE_LIMIT_DELAY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  def load_yolo_optional(self):
80
  if not yolo_available:
@@ -109,10 +160,20 @@ class NanoBananaApp:
109
  if not prompt.strip():
110
  return image, "Please provide a transformation prompt"
111
 
 
 
112
  try:
113
  image = self._resize_image_if_needed(image)
114
  self._apply_rate_limiting()
115
 
 
 
 
 
 
 
 
 
116
  if editing_mode == "complete":
117
  base_prompt = self._get_completion_prompt(style)
118
  analysis_prompt = f"Analyze this construction image and describe how to {base_prompt.lower()} User request: {prompt}. Provide detailed description of the completed construction."
@@ -132,13 +193,21 @@ class NanoBananaApp:
132
  if len(image_bytes) > 10 * 1024 * 1024:
133
  return image, "Image too large. Please use a smaller image."
134
 
135
- response = self.gemini_model.generate_content([
136
- analysis_prompt,
137
- {
138
- 'mime_type': 'image/png',
139
- 'data': base64.b64encode(image_bytes).decode('utf-8')
140
- }
141
- ])
 
 
 
 
 
 
 
 
142
 
143
  if hasattr(response, 'text') and response.text:
144
  processed_image = self._create_nano_banana_demo(image, response.text, style, editing_mode)
@@ -317,12 +386,21 @@ custom_css = """
317
  """
318
 
319
  demo_mode_notice = ""
320
- if not GEMINI_API_KEY:
321
  demo_mode_notice = """
322
  <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;">
323
- <h3>πŸ”‘ API Key Required</h3>
324
- <p>To use Nano Banana features, add your <strong>GEMINI_API_KEY</strong> in the Space settings.</p>
325
- <p>Get your free API key from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p>
 
 
 
 
 
 
 
 
 
326
  </div>
327
  """
328
 
@@ -412,11 +490,12 @@ with gr.Blocks(title="🍌 Nano Banana - Dynamic Image Creation", theme=gr.theme
412
 
413
  gr.Markdown("""
414
  ### πŸ† Competition Features
415
- - **Nano Banana Core**: Gemini 2.5 Flash Image for dynamic creation
416
- - **Word-Based Editing**: Transform images with natural language
417
- - **Reality Blending**: Seamlessly fuse different visual elements
418
- - **Optional Enhancements**: Structure detection and voice narration
419
- - **Real-time Processing**: Fast image transformations
 
420
  """)
421
 
422
  process_btn.click(
 
17
 
18
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
19
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
20
+ FREEPIK_API_KEY = os.getenv("FREEPIK_API_KEY")
21
 
22
  MAX_IMAGE_SIZE = 1024
23
  RATE_LIMIT_DELAY = 3
 
57
  logger.warning("No API key - demo mode")
58
  return
59
  try:
60
+ self.gemini_model = genai.GenerativeModel('gemini-2.5-flash')
61
+ logger.info("Nano Banana (Gemini 1.5 Flash) initialized")
62
  except Exception as e:
63
  logger.error(f"Failed to initialize Gemini: {e}")
64
  try:
65
+ self.gemini_model = genai.GenerativeModel('gemini-2.5-pro')
66
+ logger.info("Fallback: Gemini 1.5 Pro initialized")
67
  except Exception as e2:
68
  logger.error(f"Fallback also failed: {e2}")
69
 
 
76
 
77
  def _apply_rate_limiting(self):
78
  time.sleep(RATE_LIMIT_DELAY)
79
+
80
+ def _generate_with_freepik(self, image, prompt, style, editing_mode):
81
+ if not FREEPIK_API_KEY:
82
+ return None, "Freepik API key not available"
83
+
84
+ try:
85
+ buffered = io.BytesIO()
86
+ image.save(buffered, format='PNG')
87
+ image_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
88
+
89
+ style_modifiers = {
90
+ "realistic": "photorealistic, high-quality construction, professional architecture",
91
+ "futuristic": "futuristic, high-tech, modern glass and steel, sci-fi architecture",
92
+ "artistic": "artistic, creative design, unique architecture, colorful and innovative"
93
+ }
94
+
95
+ mode_descriptions = {
96
+ "complete": "Complete this unfinished construction",
97
+ "edit": "Edit and transform this construction image",
98
+ "blend": "Blend and reimagine this construction"
99
+ }
100
+
101
+ full_prompt = f"{mode_descriptions.get(editing_mode, 'Transform')} {prompt}. Style: {style_modifiers.get(style, '')}. Make it look professional and realistic."
102
+
103
+ url = "https://api.freepik.com/v1/ai/gemini-2-5-flash-image-preview"
104
+ payload = {
105
+ "prompt": full_prompt,
106
+ "reference_images": [image_b64],
107
+ "webhook_url": None
108
+ }
109
+ headers = {
110
+ "x-freepik-api-key": FREEPIK_API_KEY,
111
+ "Content-Type": "application/json"
112
+ }
113
+
114
+ response = requests.post(url, json=payload, headers=headers, timeout=30)
115
+
116
+ if response.status_code == 200:
117
+ result = response.json()
118
+ if 'image_url' in result:
119
+ img_response = requests.get(result['image_url'], timeout=30)
120
+ if img_response.status_code == 200:
121
+ generated_image = Image.open(io.BytesIO(img_response.content))
122
+ return generated_image, "Generated with Freepik Gemini 2.5 Flash"
123
+
124
+ return None, f"Freepik API error: {response.status_code}"
125
+
126
+ except Exception as e:
127
+ logger.error(f"Freepik generation failed: {e}")
128
+ return None, f"Freepik error: {str(e)}"
129
 
130
  def load_yolo_optional(self):
131
  if not yolo_available:
 
160
  if not prompt.strip():
161
  return image, "Please provide a transformation prompt"
162
 
163
+ logger.info(f"Processing: {editing_mode} mode, {style} style, prompt: {prompt[:50]}...")
164
+
165
  try:
166
  image = self._resize_image_if_needed(image)
167
  self._apply_rate_limiting()
168
 
169
+ # Try Freepik Gemini 2.5 Flash first (best quality)
170
+ if FREEPIK_API_KEY:
171
+ freepik_result, freepik_msg = self._generate_with_freepik(image, prompt, style, editing_mode)
172
+ if freepik_result is not None:
173
+ return freepik_result, f"🍌 Nano Banana (Freepik): {freepik_msg}"
174
+ logger.warning(f"Freepik failed: {freepik_msg}, falling back to Gemini analysis")
175
+
176
+ # Fallback to Gemini analysis + demo overlay
177
  if editing_mode == "complete":
178
  base_prompt = self._get_completion_prompt(style)
179
  analysis_prompt = f"Analyze this construction image and describe how to {base_prompt.lower()} User request: {prompt}. Provide detailed description of the completed construction."
 
193
  if len(image_bytes) > 10 * 1024 * 1024:
194
  return image, "Image too large. Please use a smaller image."
195
 
196
+ try:
197
+ response = self.gemini_model.generate_content([
198
+ analysis_prompt,
199
+ {
200
+ 'mime_type': 'image/png',
201
+ 'data': base64.b64encode(image_bytes).decode('utf-8')
202
+ }
203
+ ])
204
+ except Exception as img_error:
205
+ if "API_KEY_INVALID" in str(img_error) or "API key not valid" in str(img_error):
206
+ logger.warning("API key doesn't support image processing, using demo mode")
207
+ processed_image = self._create_nano_banana_demo(image, f"Demo mode: {prompt}", style, editing_mode)
208
+ return processed_image, f"🍌 Nano Banana Demo: {editing_mode} mode with {style} style (Image analysis unavailable)"
209
+ else:
210
+ raise img_error
211
 
212
  if hasattr(response, 'text') and response.text:
213
  processed_image = self._create_nano_banana_demo(image, response.text, style, editing_mode)
 
386
  """
387
 
388
  demo_mode_notice = ""
389
+ if not FREEPIK_API_KEY and not GEMINI_API_KEY:
390
  demo_mode_notice = """
391
  <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 8px; padding: 15px; margin: 10px 0;">
392
+ <h3>πŸ”‘ API Keys Required</h3>
393
+ <p><strong>For best results:</strong> Add <strong>FREEPIK_API_KEY</strong> for real image generation</p>
394
+ <p><strong>For analysis:</strong> Add <strong>GEMINI_API_KEY</strong> from <a href="https://makersuite.google.com/app/apikey" target="_blank">Google AI Studio</a></p>
395
+ <p><em>Demo mode available without API keys</em></p>
396
+ </div>
397
+ """
398
+ elif not FREEPIK_API_KEY:
399
+ demo_mode_notice = """
400
+ <div style="background: #e3f2fd; border: 1px solid #2196f3; border-radius: 8px; padding: 15px; margin: 10px 0;">
401
+ <h3>🍌 Enhanced Mode Available</h3>
402
+ <p>Add <strong>FREEPIK_API_KEY</strong> for real Gemini 2.5 Flash image generation!</p>
403
+ <p><em>Currently using Gemini analysis + demo mode</em></p>
404
  </div>
405
  """
406
 
 
490
 
491
  gr.Markdown("""
492
  ### πŸ† Competition Features
493
+ - **🍌 Nano Banana Core**: Freepik's Gemini 2.5 Flash Image Preview for real image generation
494
+ - **🎨 Word-Based Editing**: Transform images with natural language prompts
495
+ - **🌟 Reality Blending**: Seamlessly fuse different visual elements
496
+ - **⚑ Real-time Processing**: Fast image transformations and generation
497
+ - **πŸ› οΈ Optional Enhancements**: Structure detection (YOLO) and voice narration (ElevenLabs)
498
+ - **πŸ”„ Smart Fallbacks**: Multiple processing modes for reliability
499
  """)
500
 
501
  process_btn.click(