Varhal commited on
Commit
b35c170
·
verified ·
1 Parent(s): 6fd2189

updated generation flow (added prompt to get tags and description)

Browse files
Files changed (1) hide show
  1. app.py +310 -103
app.py CHANGED
@@ -7,107 +7,315 @@ from PIL import Image, ImageDraw, ImageFont
7
  import gradio as gr
8
  import base64
9
  import mimetypes
 
 
10
  from google import genai
11
- from google.genai import types
12
 
13
- # Функція для збереження бінарного файлу (залишаємо без змін)
14
  def save_binary_file(file_name, data):
15
- with open(file_name, "wb") as f:
16
- f.write(data)
 
 
 
 
 
 
 
17
 
18
- # Модифікована функція generate - прибираємо api_key як параметр
19
  def generate(text, file_name, model="gemini-2.0-flash-exp"):
20
- # Ініціалізуємо клієнта, читаючи ключ зі змінної оточення geminigoogle
21
- # Переконайтеся, що змінна geminigoogle встановлена у ваших налаштуваннях Space
 
 
 
22
  api_key = os.environ.get("geminigoogle")
23
  if not api_key:
24
- raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")
25
-
26
- client = genai.Client(api_key=api_key)
27
-
28
- # Решта функції generate залишається без змін
29
- files = [ client.files.upload(file=file_name) ]
30
-
31
- contents = [
32
- types.Content(
33
- role="user",
34
- parts=[
35
- types.Part.from_uri(
36
- file_uri=files[0].uri,
37
- mime_type=files[0].mime_type,
38
- ),
39
- types.Part.from_text(text=text),
40
- ],
41
- ),
42
- ]
43
 
44
- generate_content_config = types.GenerateContentConfig(
45
- temperature=1,
46
- top_p=0.95,
47
- top_k=40,
48
- max_output_tokens=8192,
49
- response_modalities=["image", "text"],
50
- response_mime_type="text/plain",
51
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- text_response = ""
54
- image_path = None
55
-
56
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
57
- temp_path = tmp.name
58
- for chunk in client.models.generate_content_stream(
59
- model=model,
60
- contents=contents,
61
- config=generate_content_config,
62
- ):
63
- if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
64
- continue
65
- candidate = chunk.candidates[0].content.parts[0]
66
-
67
- text_part = getattr(candidate, "text", "")
68
- if text_part:
69
- text_response += text_part + "\n"
70
-
71
- if candidate.inline_data:
72
- save_binary_file(temp_path, candidate.inline_data.data)
73
- print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path} and prompt input: {text}")
74
- image_path = temp_path
75
- break
76
-
77
- # Видаляємо завантажені файли після використання
78
- del files
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  return image_path, text_response
81
 
82
- # Модифікована функція process_image_and_prompt - прибираємо gemini_api_key як параметр
83
- def process_image_and_prompt(composite_pil, prompt):
 
 
 
 
 
 
 
 
84
  try:
 
85
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
86
  composite_path = tmp.name
 
 
 
87
  composite_pil.save(composite_path)
 
88
 
89
- file_name = composite_path
90
- input_text = prompt
91
- model = "gemini-2.0-flash-exp" # Модель вказується тут
 
 
 
 
 
 
 
 
 
92
 
93
- # Викликаємо generate без api_key
94
- image_path, text_response = generate(text=input_text, file_name=file_name, model=model)
 
 
 
 
95
 
96
- if image_path:
97
- result_img = Image.open(image_path)
98
- if result_img.mode == "RGBA":
99
- result_img = result_img.convert("RGB")
100
- return [result_img], text_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  else:
102
- return None, text_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  except Exception as e:
105
- # Важливо видалити тимчасовий файл у разі помилки
106
- if 'composite_path' in locals() and os.path.exists(composite_path):
107
- os.remove(composite_path)
108
- raise gr.Error(f"Error Getting {e}", duration=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # Gradio інтерфейс
111
  with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
112
  ) as demo:
113
  gr.HTML(
@@ -127,19 +335,23 @@ with gr.Blocks( # css_paths="style.css", # Тимчасово закоменту
127
  """
128
  )
129
 
130
- # Прибираємо секцію API Configuration або змінюємо її опис, оскільки ключ більше не вводиться
131
  with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
132
  gr.Markdown("""
133
- - **Ваш Gemini API ключ має бути збережений у змінній оточення `geminigoogle` в налаштуваннях Hugging Face Space.**
134
- - ❗ Іноді модель повертає текст замість зображення.
 
135
  """)
136
 
137
  with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
138
  gr.Markdown("""
139
  ### 📌 Usage
140
- - Upload an image and enter a prompt to generate outputs.
141
- - If text is returned instead of an image, it will appear in the text output.
142
- - Upload Only PNG Image
 
 
 
 
143
  - ❌ **Do not use NSFW images!**
144
  """)
145
 
@@ -148,43 +360,38 @@ with gr.Blocks( # css_paths="style.css", # Тимчасово закоменту
148
  image_input = gr.Image(
149
  type="pil",
150
  label="Upload Image",
151
- image_mode="RGBA",
152
  elem_id="image-input",
153
  elem_classes="upload-box"
154
  )
155
- # Прибираємо поле введення API ключа з інтерфейсу
156
- # gemini_api_key = gr.Textbox(
157
- # lines=1,
158
- # placeholder="Enter Gemini API Key (optional)",
159
- # label="Gemini API Key (optional)",
160
- # elem_classes="api-key-input"
161
- # )
162
  prompt_input = gr.Textbox(
163
  lines=2,
164
- placeholder="Enter prompt here...",
165
- label="Prompt",
166
  elem_classes="prompt-input"
167
  )
168
- submit_btn = gr.Button("Generate", elem_classes="generate-btn")
169
 
170
  with gr.Column(elem_classes="output-column"):
171
- output_gallery = gr.Gallery(label="Generated Outputs", elem_classes="output-gallery")
172
  output_text = gr.Textbox(
173
- label="Gemini Output",
174
- placeholder="Text response will appear here if no image is generated.",
175
- elem_classes="output-text"
 
 
176
  )
177
 
178
- # Налаштовуємо взаємодію - прибираємо gemini_api_key з inputs
179
  submit_btn.click(
180
  fn=process_image_and_prompt,
181
- inputs=[image_input, prompt_input], # Передаємо лише image_input та prompt_input
182
  outputs=[output_gallery, output_text],
183
  )
184
 
185
  gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
186
 
187
- # Приклади залишаємо без змін, API ключ в них не потрібен
188
  examples = [
189
  ["data/1.webp", 'change text to "AMEER"'],
190
  ["data/2.webp", "remove the spoon from hand only"],
@@ -198,7 +405,7 @@ with gr.Blocks( # css_paths="style.css", # Тимчасово закоменту
198
 
199
  gr.Examples(
200
  examples=examples,
201
- inputs=[image_input, prompt_input], # Приклади також не потребують API ключа
202
  elem_id="examples-grid"
203
  )
204
 
 
7
  import gradio as gr
8
  import base64
9
  import mimetypes
10
+ # Make sure you have installed the google-generativeai library
11
+ # pip install google-generativeai Pillow gradio
12
  from google import genai
13
+ from google.genai import types # Using the newer client API structure if available
14
 
15
+ # Function to save binary file (kept as is)
16
  def save_binary_file(file_name, data):
17
+ """Saves binary data to a specified file."""
18
+ try:
19
+ with open(file_name, "wb") as f:
20
+ f.write(data)
21
+ # print(f"Binary data saved successfully to {file_name}")
22
+ except Exception as e:
23
+ print(f"Error saving binary data to {file_name}: {e}")
24
+ raise # Re-raise the exception after printing
25
+
26
 
27
+ # Modified generate function to handle stream and collect both text and image
28
  def generate(text, file_name, model="gemini-2.0-flash-exp"):
29
+ """
30
+ Sends image and text prompt to the Gemini model and streams the response.
31
+ Collects all text parts and saves the first image part encountered.
32
+ Returns the path to the generated image and the accumulated text response.
33
+ """
34
  api_key = os.environ.get("geminigoogle")
35
  if not api_key:
36
+ # Use gr.Error for Gradio interface display
37
+ raise gr.Error("GEMINI_API_KEY environment variable (geminigoogle) not set.", duration=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Configure the generative AI library
40
+ # This is the recommended way to configure the API key
41
+ genai.configure(api_key=api_key)
42
+
43
+ client = None # Placeholder for the client if needed for file upload
44
+
45
+ uploaded_file = None # To store the reference to the uploaded file
46
+ temp_generated_img_path = None # Path for saving generated image data
47
+
48
+ try:
49
+ # Attempt to use the genai.Client if available for file upload
50
+ # This is the method used in your original code, so we'll keep it.
51
+ # If this fails, consider falling back to models directly if they accept paths/bytes.
52
+ try:
53
+ client = genai.Client(api_key=api_key)
54
+ print("genai.Client initialized successfully.")
55
+ except Exception as e:
56
+ print(f"Warning: Failed to initialize genai.Client ({e}). Attempting direct model access.")
57
+ # In some library versions, you might interact directly via genai.get_model
58
+ # For this specific code structure using client.files.upload, the Client is needed.
59
+ # If the Client fails, file upload will likely fail too.
60
+ client = None
61
+ raise gr.Error(f"Failed to initialize Gemini client: {e}", duration=10)
62
+
63
+
64
+ # Upload the input file to Google's service using the client
65
+ if client and hasattr(client, 'files'):
66
+ try:
67
+ print(f"Attempting to upload input file: {file_name}")
68
+ # Use a loop with retry for file upload as it can sometimes be flaky
69
+ upload_attempts = 3
70
+ for i in range(upload_attempts):
71
+ try:
72
+ uploaded_file = client.files.upload(file=file_name)
73
+ print(f"Input file uploaded successfully: {uploaded_file.uri}")
74
+ break # Exit retry loop on success
75
+ except Exception as upload_e:
76
+ if i < upload_attempts - 1:
77
+ print(f"Upload attempt {i+1}/{upload_attempts} failed: {upload_e}. Retrying...")
78
+ time.sleep(1 * (i + 1)) # Simple backoff
79
+ else:
80
+ raise gr.Error(f"Failed to upload input file after multiple attempts: {upload_e}", duration=10)
81
+
82
+ except Exception as e:
83
+ # This catches errors from the upload loop
84
+ raise gr.Error(f"Fatal error during input file upload: {e}", duration=10)
85
+ else:
86
+ raise gr.Error("Gemini client or file upload capability not available.", duration=10)
87
+
88
+ # Construct the contents for the model input (image + text)
89
+ contents = [
90
+ types.Content(
91
+ role="user",
92
+ parts=[
93
+ types.Part.from_uri(
94
+ file_uri=uploaded_file.uri,
95
+ mime_type=uploaded_file.mime_type,
96
+ ),
97
+ types.Part.from_text(text=text), # The combined text prompt
98
+ ],
99
+ ),
100
+ ]
101
+
102
+ # Configuration for generating content
103
+ generate_content_config = types.GenerateContentConfig(
104
+ temperature=1,
105
+ top_p=0.95,
106
+ top_k=40,
107
+ max_output_tokens=8192,
108
+ response_modalities=["image", "text"], # Crucial: Ask for BOTH image and text
109
+ response_mime_type="text/plain", # Still want text parts as plain text
110
+ )
111
+
112
+ text_response = ""
113
+ image_path = None # Store the path to the *first* generated image
114
+
115
+ print(f"\n--- Sending Request to Model '{model}' ---")
116
+ print(f"Prompt: {text}")
117
+ print(f"Input Image URI: {uploaded_file.uri}")
118
+
119
+ # Create a temporary file to save the generated image data
120
+ # This file needs to exist before streaming data into it.
121
+ try:
122
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
123
+ temp_generated_img_path = tmp.name
124
+ print(f"Temporary path created for generated image: {temp_generated_img_path}")
125
+
126
+ # Get the model instance
127
+ # Use the model name directly with get_model
128
+ model_instance = genai.get_model(model)
129
+ print("Model instance obtained.")
130
+
131
+ # Stream the response from the model
132
+ print("Starting response stream...")
133
+ # Use the model instance's generate_content_stream method
134
+ stream = model_instance.generate_content_stream(
135
+ contents=contents,
136
+ generation_config=generate_content_config, # Use generation_config
137
+ )
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ for chunk in stream:
141
+ # Check if the chunk and candidates are valid
142
+ if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
143
+ # print("Skipping empty or invalid chunk.") # Optional: uncomment for verbose logging
144
+ continue
145
+
146
+ # Process each part within the candidate
147
+ for part in chunk.candidates[0].content.parts:
148
+ # Check for text parts
149
+ text_part = getattr(part, "text", "")
150
+ if text_part:
151
+ # Append text - the model's response might come in multiple text parts
152
+ text_response += text_part
153
+ # print(f"Received text part: {text_part[:50]}...") # Optional: log partial text
154
+
155
+ # Check for inline image data parts
156
+ if hasattr(part, 'inline_data') and part.inline_data and part.inline_data.data:
157
+ # Only save the *first* image data encountered during the stream
158
+ if image_path is None:
159
+ print(f"Received image data of mime type {part.inline_data.mime_type}")
160
+ try:
161
+ # Save the binary image data to our temporary file
162
+ save_binary_file(temp_generated_img_path, part.inline_data.data)
163
+ # Store the path to the saved file
164
+ image_path = temp_generated_img_path
165
+ print(f"Image data saved to: {image_path}")
166
+ # IMPORTANT: DO NOT BREAK HERE. Continue processing the stream
167
+ # to capture all text parts that might follow the image.
168
+ except Exception as e:
169
+ print(f"Error saving image data to {temp_generated_img_path}: {e}")
170
+ # If saving fails, image_path remains None
171
+
172
+ print("Response stream complete.")
173
+ print(f"Final Image Path: {image_path}")
174
+ print(f"Accumulated Text Response Length: {len(text_response)}")
175
+
176
+
177
+ except Exception as e:
178
+ print(f"\nAn error occurred during content generation stream: {e}")
179
+ # Clean up the temporary generated image file if it was created but not yet assigned to image_path
180
+ if temp_generated_img_path and os.path.exists(temp_generated_img_path) and image_path is None:
181
+ try:
182
+ os.remove(temp_generated_img_path)
183
+ print(f"Cleaned up temp generated file due to error: {temp_generated_img_path}")
184
+ except Exception as ce:
185
+ print(f"Error cleaning up temp generated file {temp_generated_img_path}: {ce}")
186
+ # Re-raise the exception
187
+ raise gr.Error(f"Gemini generation error: {e}", duration=10)
188
+
189
+
190
+ finally:
191
+ # Always delete the uploaded file from Google's service
192
+ if uploaded_file and client and hasattr(client, 'files'):
193
+ try:
194
+ print(f"Deleting uploaded file: {uploaded_file.name}")
195
+ client.files.delete(uploaded_file.name)
196
+ print("Uploaded file deleted.")
197
+ except Exception as e:
198
+ print(f"Error deleting uploaded file {uploaded_file.name}: {e}")
199
+ # Note: The temp_generated_img_path is cleaned up in process_image_and_prompt
200
+ # if it was successfully returned and processed. If an error occurs
201
+ # after temp_generated_img_path is created but before it's returned,
202
+ # the except block above handles cleanup.
203
+
204
+
205
+ # Return the path to the saved image and the accumulated text
206
  return image_path, text_response
207
 
208
+ # Modified function to prepare input and handle output for Gradio
209
+ def process_image_and_prompt(composite_pil: Image.Image, prompt: str):
210
+ """
211
+ Handles the Gradio input (PIL Image, prompt), prepares the model input,
212
+ calls the generate function, and formats the output for Gradio.
213
+ Constructs a combined prompt asking for both analysis and generation/edit.
214
+ """
215
+ composite_path = None # Path for the temporary input image file
216
+ temp_generated_image_path_returned = None # Path for the temporary generated image file returned by generate
217
+
218
  try:
219
+ # 1. Save the input PIL image to a temporary file that can be uploaded
220
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
221
  composite_path = tmp.name
222
+ # Ensure image is RGB or RGBA before saving as PNG for compatibility
223
+ if composite_pil.mode not in ["RGB", "RGBA"]:
224
+ composite_pil = composite_pil.convert("RGBA") # Use RGBA for potential transparency
225
  composite_pil.save(composite_path)
226
+ print(f"Input image saved to temporary path for upload: {composite_path}")
227
 
228
+ # 2. Construct the combined prompt for the model
229
+ # This prompt tells the model to FIRST describe/tag the image,
230
+ # and THEN perform the requested image task (edit/generation).
231
+ # The phrasing can influence the model's response format.
232
+ # Let's be explicit: Ask for description and tags first, then the main task.
233
+ combined_prompt = f"""
234
+ Analyze the input image carefully.
235
+ Provide a detailed description of the image, including key objects, actions, setting, and style.
236
+ Then, provide a comma-separated list of relevant tags for the input image.
237
+ Structure this analysis clearly, for example:
238
+ Description: [Detailed description here]
239
+ Tags: [tag1, tag2, tag3, ...]
240
 
241
+ After the analysis, perform the following task based on the input image and these instructions:
242
+ {prompt}
243
+ """
244
+ # You can adjust the formatting of the combined_prompt as needed.
245
+ # The goal is to clearly tell the model you want analysis text *first*
246
+ # or at least included in the text response, followed by the image task.
247
 
248
+ print(f"\n--- Combined Prompt Sent to Model ---")
249
+ print(combined_prompt)
250
+
251
+
252
+ # 3. Call the generate function with the combined prompt and the input image file
253
+ # generate will return the path to the generated image (if any) and the full text response from the stream
254
+ # This is where the single API request happens, processing input image+text and yielding output image+text.
255
+ temp_generated_image_path_returned, text_response = generate(text=combined_prompt, file_name=composite_path, model="gemini-2.0-flash-exp")
256
+
257
+ # 4. Process the results from the generate function
258
+ result_img = None
259
+ if temp_generated_image_path_returned and os.path.exists(temp_generated_image_path_returned):
260
+ try:
261
+ # Load the generated image file into a PIL Image object
262
+ result_img = Image.open(temp_generated_image_path_returned)
263
+ # Convert to RGB if it's RGBA for compatibility with Gradio's Gallery
264
+ # Gradio Gallery often expects RGB
265
+ if result_img.mode == "RGBA":
266
+ result_img = result_img.convert("RGB")
267
+ print(f"\nGenerated image loaded successfully from {temp_generated_image_path_returned}.")
268
+ except Exception as img_e:
269
+ print(f"\nError loading generated image from {temp_generated_image_path_returned}: {img_e}")
270
+ # If loading fails, treat it as if no image was successfully generated
271
+ result_img = None
272
  else:
273
+ print("\nNo valid generated image path returned or file not found after generation.")
274
+ # The model might fail to generate an image but still provide text
275
+
276
+ # 5. Prepare the output for Gradio
277
+ # Gradio's Gallery expects a list of images or None
278
+ output_gallery_content = [result_img] if result_img else None
279
+
280
+ # The text_response will contain the accumulated text from the model,
281
+ # which *should* now include the description/tags because we asked for them in the prompt,
282
+ # as well as any other textual output related to the edit/generation task.
283
+
284
+ print("\n--- Final Output Prepared for Gradio ---")
285
+ print("Image Generated Successfully:", result_img is not None)
286
+ print(f"Text Response Length: {len(text_response)}")
287
+ print("Text Response (showing first 500 chars):\n", text_response[:500] + ('...' if len(text_response) > 500 else ''))
288
+
289
+
290
+ return output_gallery_content, text_response
291
 
292
  except Exception as e:
293
+ # Exceptions from generate or above are caught here.
294
+ print(f"\nAn error occurred in process_image_and_prompt: {e}")
295
+ # Use gr.Error to display the error message nicely in the Gradio interface
296
+ raise gr.Error(f"Processing Error: {e}", duration=10)
297
+
298
+ finally:
299
+ # 6. Clean up temporary files regardless of success or failure
300
+ # Clean up the temporary input image file that was uploaded
301
+ if composite_path and os.path.exists(composite_path):
302
+ try:
303
+ os.remove(composite_path)
304
+ print(f"Removed temporary input file: {composite_path}")
305
+ except Exception as ce:
306
+ print(f"Error removing input temp file {composite_path}: {ce}")
307
+
308
+ # Clean up the temporary generated image file *if it was created* and returned
309
+ # The path `temp_generated_image_path_returned` holds the path returned by generate.
310
+ if temp_generated_image_path_returned and os.path.exists(temp_generated_image_path_returned):
311
+ try:
312
+ os.remove(temp_generated_image_path_returned)
313
+ print(f"Removed temporary generated file: {temp_generated_image_path_returned}")
314
+ except Exception as ge:
315
+ print(f"Error removing generated temp file {temp_generated_image_path_returned}: {ge}")
316
+
317
 
318
+ # Gradio interface - Keep this section mostly the same
319
  with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
320
  ) as demo:
321
  gr.HTML(
 
335
  """
336
  )
337
 
 
338
  with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
339
  gr.Markdown("""
340
+ - **Your Gemini API key must be stored in the environment variable `geminigoogle` in your Hugging Face Space settings (Settings -> Repository secrets).**
341
+ - ❗ Sometimes the model may return only text or encounter errors.
342
+ - The text output box below should contain the model's analysis of the *input image* (description and tags) followed by any commentary related to the edit/generation.
343
  """)
344
 
345
  with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
346
  gr.Markdown("""
347
  ### 📌 Usage
348
+ - Upload an image and enter a prompt describing the *image edit or generation* you want.
349
+ - The model will analyze the input image and attempt to perform the edit/generation.
350
+ - The generated image will appear in the gallery (if successful).
351
+ - The text output will contain:
352
+ 1. A description and tags of the **input image**.
353
+ 2. Any commentary from the model about the edit/generation task.
354
+ - Upload Only PNG Image (recommended for transparent edits, but JPG often works)
355
  - ❌ **Do not use NSFW images!**
356
  """)
357
 
 
360
  image_input = gr.Image(
361
  type="pil",
362
  label="Upload Image",
363
+ image_mode="RGBA", # Use RGBA to handle transparency
364
  elem_id="image-input",
365
  elem_classes="upload-box"
366
  )
 
 
 
 
 
 
 
367
  prompt_input = gr.Textbox(
368
  lines=2,
369
+ placeholder="Enter your image edit or generation prompt here (e.g., 'add a red hat', 'change background to a beach', 'make the eyes green').",
370
+ label="Image Task Prompt",
371
  elem_classes="prompt-input"
372
  )
373
+ submit_btn = gr.Button("Generate & Analyze", elem_classes="generate-btn") # Button text reflects dual task
374
 
375
  with gr.Column(elem_classes="output-column"):
376
+ output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", preview=True)
377
  output_text = gr.Textbox(
378
+ label="Gemini Text Output (Input Image Analysis + Edit Commentary)",
379
+ placeholder="Analysis of the input image (description, tags) and commentary on the image task will appear here.",
380
+ elem_classes="output-text",
381
+ lines=10, # Give more space for the text output
382
+ show_copy_button=True # Allow easy copying of the text
383
  )
384
 
385
+ # Set up the interaction
386
  submit_btn.click(
387
  fn=process_image_and_prompt,
388
+ inputs=[image_input, prompt_input],
389
  outputs=[output_gallery, output_text],
390
  )
391
 
392
  gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
393
 
394
+ # Examples (adjust if necessary based on new prompt structure)
395
  examples = [
396
  ["data/1.webp", 'change text to "AMEER"'],
397
  ["data/2.webp", "remove the spoon from hand only"],
 
405
 
406
  gr.Examples(
407
  examples=examples,
408
+ inputs=[image_input, prompt_input],
409
  elem_id="examples-grid"
410
  )
411