Varhal commited on
Commit
2c1f8ae
·
verified ·
1 Parent(s): b35c170

updated logic to tag and generate flow

Browse files
Files changed (1) hide show
  1. app.py +238 -282
app.py CHANGED
@@ -7,351 +7,309 @@ from PIL import Image, ImageDraw, ImageFont
7
  import gradio as gr
8
  import base64
9
  import mimetypes
10
- # Make sure you have installed the google-generativeai library
11
- # pip install google-generativeai Pillow gradio
12
  from google import genai
13
- from google.genai import types # Using the newer client API structure if available
14
 
15
- # Function to save binary file (kept as is)
16
  def save_binary_file(file_name, data):
17
- """Saves binary data to a specified file."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
- with open(file_name, "wb") as f:
20
- f.write(data)
21
- # print(f"Binary data saved successfully to {file_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  except Exception as e:
23
- print(f"Error saving binary data to {file_name}: {e}")
24
- raise # Re-raise the exception after printing
 
 
 
 
 
 
 
 
 
25
 
26
 
27
- # Modified generate function to handle stream and collect both text and image
28
  def generate(text, file_name, model="gemini-2.0-flash-exp"):
29
  """
30
- Sends image and text prompt to the Gemini model and streams the response.
31
- Collects all text parts and saves the first image part encountered.
32
- Returns the path to the generated image and the accumulated text response.
33
  """
34
  api_key = os.environ.get("geminigoogle")
35
  if not api_key:
36
- # Use gr.Error for Gradio interface display
37
- raise gr.Error("GEMINI_API_KEY environment variable (geminigoogle) not set.", duration=10)
38
 
39
- # Configure the generative AI library
40
- # This is the recommended way to configure the API key
41
- genai.configure(api_key=api_key)
42
-
43
- client = None # Placeholder for the client if needed for file upload
44
-
45
- uploaded_file = None # To store the reference to the uploaded file
46
- temp_generated_img_path = None # Path for saving generated image data
47
 
48
  try:
49
- # Attempt to use the genai.Client if available for file upload
50
- # This is the method used in your original code, so we'll keep it.
51
- # If this fails, consider falling back to models directly if they accept paths/bytes.
52
- try:
53
- client = genai.Client(api_key=api_key)
54
- print("genai.Client initialized successfully.")
55
- except Exception as e:
56
- print(f"Warning: Failed to initialize genai.Client ({e}). Attempting direct model access.")
57
- # In some library versions, you might interact directly via genai.get_model
58
- # For this specific code structure using client.files.upload, the Client is needed.
59
- # If the Client fails, file upload will likely fail too.
60
- client = None
61
- raise gr.Error(f"Failed to initialize Gemini client: {e}", duration=10)
62
-
63
-
64
- # Upload the input file to Google's service using the client
65
- if client and hasattr(client, 'files'):
66
- try:
67
- print(f"Attempting to upload input file: {file_name}")
68
- # Use a loop with retry for file upload as it can sometimes be flaky
69
- upload_attempts = 3
70
- for i in range(upload_attempts):
71
- try:
72
- uploaded_file = client.files.upload(file=file_name)
73
- print(f"Input file uploaded successfully: {uploaded_file.uri}")
74
- break # Exit retry loop on success
75
- except Exception as upload_e:
76
- if i < upload_attempts - 1:
77
- print(f"Upload attempt {i+1}/{upload_attempts} failed: {upload_e}. Retrying...")
78
- time.sleep(1 * (i + 1)) # Simple backoff
79
- else:
80
- raise gr.Error(f"Failed to upload input file after multiple attempts: {upload_e}", duration=10)
81
-
82
- except Exception as e:
83
- # This catches errors from the upload loop
84
- raise gr.Error(f"Fatal error during input file upload: {e}", duration=10)
85
- else:
86
- raise gr.Error("Gemini client or file upload capability not available.", duration=10)
87
 
88
- # Construct the contents for the model input (image + text)
89
  contents = [
90
  types.Content(
91
  role="user",
92
  parts=[
93
  types.Part.from_uri(
94
- file_uri=uploaded_file.uri,
95
- mime_type=uploaded_file.mime_type,
96
  ),
97
- types.Part.from_text(text=text), # The combined text prompt
98
  ],
99
  ),
100
  ]
101
 
102
- # Configuration for generating content
103
  generate_content_config = types.GenerateContentConfig(
104
  temperature=1,
105
  top_p=0.95,
106
  top_k=40,
107
  max_output_tokens=8192,
108
- response_modalities=["image", "text"], # Crucial: Ask for BOTH image and text
109
- response_mime_type="text/plain", # Still want text parts as plain text
110
  )
111
 
112
  text_response = ""
113
- image_path = None # Store the path to the *first* generated image
114
-
115
- print(f"\n--- Sending Request to Model '{model}' ---")
116
- print(f"Prompt: {text}")
117
- print(f"Input Image URI: {uploaded_file.uri}")
118
-
119
- # Create a temporary file to save the generated image data
120
- # This file needs to exist before streaming data into it.
121
- try:
122
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
123
- temp_generated_img_path = tmp.name
124
- print(f"Temporary path created for generated image: {temp_generated_img_path}")
125
-
126
- # Get the model instance
127
- # Use the model name directly with get_model
128
- model_instance = genai.get_model(model)
129
- print("Model instance obtained.")
130
-
131
- # Stream the response from the model
132
- print("Starting response stream...")
133
- # Use the model instance's generate_content_stream method
134
- stream = model_instance.generate_content_stream(
135
- contents=contents,
136
- generation_config=generate_content_config, # Use generation_config
137
- )
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- for chunk in stream:
141
- # Check if the chunk and candidates are valid
142
- if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
143
- # print("Skipping empty or invalid chunk.") # Optional: uncomment for verbose logging
144
- continue
145
-
146
- # Process each part within the candidate
147
- for part in chunk.candidates[0].content.parts:
148
- # Check for text parts
149
- text_part = getattr(part, "text", "")
150
- if text_part:
151
- # Append text - the model's response might come in multiple text parts
152
- text_response += text_part
153
- # print(f"Received text part: {text_part[:50]}...") # Optional: log partial text
154
-
155
- # Check for inline image data parts
156
- if hasattr(part, 'inline_data') and part.inline_data and part.inline_data.data:
157
- # Only save the *first* image data encountered during the stream
158
- if image_path is None:
159
- print(f"Received image data of mime type {part.inline_data.mime_type}")
160
- try:
161
- # Save the binary image data to our temporary file
162
- save_binary_file(temp_generated_img_path, part.inline_data.data)
163
- # Store the path to the saved file
164
- image_path = temp_generated_img_path
165
- print(f"Image data saved to: {image_path}")
166
- # IMPORTANT: DO NOT BREAK HERE. Continue processing the stream
167
- # to capture all text parts that might follow the image.
168
- except Exception as e:
169
- print(f"Error saving image data to {temp_generated_img_path}: {e}")
170
- # If saving fails, image_path remains None
171
-
172
- print("Response stream complete.")
173
- print(f"Final Image Path: {image_path}")
174
- print(f"Accumulated Text Response Length: {len(text_response)}")
175
-
176
-
177
- except Exception as e:
178
- print(f"\nAn error occurred during content generation stream: {e}")
179
- # Clean up the temporary generated image file if it was created but not yet assigned to image_path
180
- if temp_generated_img_path and os.path.exists(temp_generated_img_path) and image_path is None:
181
- try:
182
- os.remove(temp_generated_img_path)
183
- print(f"Cleaned up temp generated file due to error: {temp_generated_img_path}")
184
- except Exception as ce:
185
- print(f"Error cleaning up temp generated file {temp_generated_img_path}: {ce}")
186
- # Re-raise the exception
187
- raise gr.Error(f"Gemini generation error: {e}", duration=10)
188
-
189
 
190
  finally:
191
- # Always delete the uploaded file from Google's service
192
- if uploaded_file and client and hasattr(client, 'files'):
193
- try:
194
- print(f"Deleting uploaded file: {uploaded_file.name}")
195
- client.files.delete(uploaded_file.name)
196
- print("Uploaded file deleted.")
197
- except Exception as e:
198
- print(f"Error deleting uploaded file {uploaded_file.name}: {e}")
199
- # Note: The temp_generated_img_path is cleaned up in process_image_and_prompt
200
- # if it was successfully returned and processed. If an error occurs
201
- # after temp_generated_img_path is created but before it's returned,
202
- # the except block above handles cleanup.
203
-
204
-
205
- # Return the path to the saved image and the accumulated text
206
- return image_path, text_response
207
-
208
- # Modified function to prepare input and handle output for Gradio
209
- def process_image_and_prompt(composite_pil: Image.Image, prompt: str):
210
- """
211
- Handles the Gradio input (PIL Image, prompt), prepares the model input,
212
- calls the generate function, and formats the output for Gradio.
213
- Constructs a combined prompt asking for both analysis and generation/edit.
214
- """
215
- composite_path = None # Path for the temporary input image file
216
- temp_generated_image_path_returned = None # Path for the temporary generated image file returned by generate
217
-
218
  try:
219
- # 1. Save the input PIL image to a temporary file that can be uploaded
220
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
221
  composite_path = tmp.name
222
- # Ensure image is RGB or RGBA before saving as PNG for compatibility
223
- if composite_pil.mode not in ["RGB", "RGBA"]:
224
- composite_pil = composite_pil.convert("RGBA") # Use RGBA for potential transparency
225
- composite_pil.save(composite_path)
226
- print(f"Input image saved to temporary path for upload: {composite_path}")
227
-
228
- # 2. Construct the combined prompt for the model
229
- # This prompt tells the model to FIRST describe/tag the image,
230
- # and THEN perform the requested image task (edit/generation).
231
- # The phrasing can influence the model's response format.
232
- # Let's be explicit: Ask for description and tags first, then the main task.
233
- combined_prompt = f"""
234
- Analyze the input image carefully.
235
- Provide a detailed description of the image, including key objects, actions, setting, and style.
236
- Then, provide a comma-separated list of relevant tags for the input image.
237
- Structure this analysis clearly, for example:
238
- Description: [Detailed description here]
239
- Tags: [tag1, tag2, tag3, ...]
240
-
241
- After the analysis, perform the following task based on the input image and these instructions:
242
- {prompt}
243
- """
244
- # You can adjust the formatting of the combined_prompt as needed.
245
- # The goal is to clearly tell the model you want analysis text *first*
246
- # or at least included in the text response, followed by the image task.
247
-
248
- print(f"\n--- Combined Prompt Sent to Model ---")
249
- print(combined_prompt)
250
-
251
-
252
- # 3. Call the generate function with the combined prompt and the input image file
253
- # generate will return the path to the generated image (if any) and the full text response from the stream
254
- # This is where the single API request happens, processing input image+text and yielding output image+text.
255
- temp_generated_image_path_returned, text_response = generate(text=combined_prompt, file_name=composite_path, model="gemini-2.0-flash-exp")
256
-
257
- # 4. Process the results from the generate function
258
- result_img = None
259
- if temp_generated_image_path_returned and os.path.exists(temp_generated_image_path_returned):
260
- try:
261
- # Load the generated image file into a PIL Image object
262
- result_img = Image.open(temp_generated_image_path_returned)
263
- # Convert to RGB if it's RGBA for compatibility with Gradio's Gallery
264
- # Gradio Gallery often expects RGB
265
- if result_img.mode == "RGBA":
266
- result_img = result_img.convert("RGB")
267
- print(f"\nGenerated image loaded successfully from {temp_generated_image_path_returned}.")
268
- except Exception as img_e:
269
- print(f"\nError loading generated image from {temp_generated_image_path_returned}: {img_e}")
270
- # If loading fails, treat it as if no image was successfully generated
271
- result_img = None
272
- else:
273
- print("\nNo valid generated image path returned or file not found after generation.")
274
- # The model might fail to generate an image but still provide text
275
 
276
- # 5. Prepare the output for Gradio
277
- # Gradio's Gallery expects a list of images or None
278
- output_gallery_content = [result_img] if result_img else None
279
 
280
- # The text_response will contain the accumulated text from the model,
281
- # which *should* now include the description/tags because we asked for them in the prompt,
282
- # as well as any other textual output related to the edit/generation task.
283
 
284
- print("\n--- Final Output Prepared for Gradio ---")
285
- print("Image Generated Successfully:", result_img is not None)
286
- print(f"Text Response Length: {len(text_response)}")
287
- print("Text Response (showing first 500 chars):\n", text_response[:500] + ('...' if len(text_response) > 500 else ''))
288
 
 
 
 
289
 
290
- return output_gallery_content, text_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  except Exception as e:
293
- # Exceptions from generate or above are caught here.
294
- print(f"\nAn error occurred in process_image_and_prompt: {e}")
295
- # Use gr.Error to display the error message nicely in the Gradio interface
296
- raise gr.Error(f"Processing Error: {e}", duration=10)
297
 
298
  finally:
299
- # 6. Clean up temporary files regardless of success or failure
300
- # Clean up the temporary input image file that was uploaded
301
  if composite_path and os.path.exists(composite_path):
 
 
 
 
 
 
 
 
 
 
302
  try:
303
- os.remove(composite_path)
304
- print(f"Removed temporary input file: {composite_path}")
305
- except Exception as ce:
306
- print(f"Error removing input temp file {composite_path}: {ce}")
307
-
308
- # Clean up the temporary generated image file *if it was created* and returned
309
- # The path `temp_generated_image_path_returned` holds the path returned by generate.
310
- if temp_generated_image_path_returned and os.path.exists(temp_generated_image_path_returned):
311
- try:
312
- os.remove(temp_generated_image_path_returned)
313
- print(f"Removed temporary generated file: {temp_generated_image_path_returned}")
314
- except Exception as ge:
315
- print(f"Error removing generated temp file {temp_generated_image_path_returned}: {ge}")
316
 
317
 
318
- # Gradio interface - Keep this section mostly the same
319
  with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
320
  ) as demo:
321
  gr.HTML(
322
  """
323
  <div class="header-container">
324
  <div>
325
- <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
326
  </div>
327
  <div>
328
- <h1>Gemini for Image Editing</h1>
329
- <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
330
- <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
331
- <a href="https://aistudio.google.com/apikey">Get an API Key</a> |
332
- Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
333
  </div>
334
  </div>
335
  """
336
  )
337
 
 
338
  with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
339
  gr.Markdown("""
340
- - **Your Gemini API key must be stored in the environment variable `geminigoogle` in your Hugging Face Space settings (Settings -> Repository secrets).**
341
- - ❗ Sometimes the model may return only text or encounter errors.
342
- - The text output box below should contain the model's analysis of the *input image* (description and tags) followed by any commentary related to the edit/generation.
343
  """)
344
 
345
  with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
346
  gr.Markdown("""
347
  ### 📌 Usage
348
- - Upload an image and enter a prompt describing the *image edit or generation* you want.
349
- - The model will analyze the input image and attempt to perform the edit/generation.
350
- - The generated image will appear in the gallery (if successful).
351
- - The text output will contain:
352
- 1. A description and tags of the **input image**.
353
- 2. Any commentary from the model about the edit/generation task.
354
- - Upload Only PNG Image (recommended for transparent edits, but JPG often works)
355
  - ❌ **Do not use NSFW images!**
356
  """)
357
 
@@ -360,29 +318,28 @@ with gr.Blocks( # css_paths="style.css", # Тимчасово закоменту
360
  image_input = gr.Image(
361
  type="pil",
362
  label="Upload Image",
363
- image_mode="RGBA", # Use RGBA to handle transparency
364
  elem_id="image-input",
365
  elem_classes="upload-box"
366
  )
367
  prompt_input = gr.Textbox(
368
  lines=2,
369
- placeholder="Enter your image edit or generation prompt here (e.g., 'add a red hat', 'change background to a beach', 'make the eyes green').",
370
- label="Image Task Prompt",
371
  elem_classes="prompt-input"
372
  )
373
- submit_btn = gr.Button("Generate & Analyze", elem_classes="generate-btn") # Button text reflects dual task
374
 
375
  with gr.Column(elem_classes="output-column"):
376
- output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", preview=True)
377
  output_text = gr.Textbox(
378
- label="Gemini Text Output (Input Image Analysis + Edit Commentary)",
379
- placeholder="Analysis of the input image (description, tags) and commentary on the image task will appear here.",
380
  elem_classes="output-text",
381
- lines=10, # Give more space for the text output
382
- show_copy_button=True # Allow easy copying of the text
383
  )
384
 
385
- # Set up the interaction
386
  submit_btn.click(
387
  fn=process_image_and_prompt,
388
  inputs=[image_input, prompt_input],
@@ -391,7 +348,6 @@ with gr.Blocks( # css_paths="style.css", # Тимчасово закоменту
391
 
392
  gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
393
 
394
- # Examples (adjust if necessary based on new prompt structure)
395
  examples = [
396
  ["data/1.webp", 'change text to "AMEER"'],
397
  ["data/2.webp", "remove the spoon from hand only"],
 
7
  import gradio as gr
8
  import base64
9
  import mimetypes
 
 
10
  from google import genai
11
+ from google.genai import types
12
 
13
+ # Helper function to save binary data
14
  def save_binary_file(file_name, data):
15
+ with open(file_name, "wb") as f:
16
+ f.write(data)
17
+
18
+ # Function to get tags from an image using Gemini
19
+ def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
20
+ """
21
+ Analyzes an image using a text prompt and returns the text response.
22
+ Used specifically for generating tags in this case.
23
+ """
24
+ api_key = os.environ.get("geminigoogle")
25
+ if not api_key:
26
+ # Return a clear message if API key is missing
27
+ return "Error: GEMINI_API_KEY environment variable (geminigoogle) not set for tagging."
28
+
29
+ client = genai.Client(api_key=api_key)
30
+ uploaded_files = [] # Keep track of uploaded files for cleanup
31
+
32
  try:
33
+ # Upload the file
34
+ uploaded_files = [client.files.upload(file=file_name)]
35
+ print(f"Uploaded file for tagging: {uploaded_files[0].uri}")
36
+
37
+ contents = [
38
+ types.Content(
39
+ role="user",
40
+ parts=[
41
+ types.Part.from_uri(
42
+ file_uri=uploaded_files[0].uri,
43
+ mime_type=uploaded_files[0].mime_type,
44
+ ),
45
+ types.Part.from_text(text=text_prompt),
46
+ ],
47
+ ),
48
+ ]
49
+
50
+ # Configure for text-only response (focus on getting JSON)
51
+ generate_content_config = types.GenerateContentConfig(
52
+ temperature=0.5, # Lower temperature might give more focused tags
53
+ top_p=0.95,
54
+ top_k=40,
55
+ max_output_tokens=1024, # Tags shouldn't need many tokens
56
+ response_modalities=["text"], # Explicitly ask for text
57
+ response_mime_type="text/plain", # Expect plain text
58
+ )
59
+
60
+ # Use generate_content for a single text response
61
+ response = client.models.generate_content(
62
+ model=model,
63
+ contents=contents,
64
+ config=generate_content_config,
65
+ )
66
+
67
+ tag_response = ""
68
+ if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts:
69
+ # Concatenate all text parts from the response
70
+ for part in response.candidates[0].content.parts:
71
+ if hasattr(part, 'text'):
72
+ tag_response += part.text
73
+ else:
74
+ tag_response = "Could not generate tags."
75
+
76
+ return tag_response
77
+
78
  except Exception as e:
79
+ print(f"Error during tagging API call: {e}")
80
+ # Return an error message if tagging fails
81
+ return f"Error generating tags: {e}"
82
+ finally:
83
+ # Clean up uploaded files from the tagging call
84
+ for file in uploaded_files:
85
+ try:
86
+ client.files.delete(name=file.name)
87
+ print(f"Deleted uploaded file after tagging: {file.name}")
88
+ except Exception as cleanup_e:
89
+ print(f"Error deleting uploaded file {file.name}: {cleanup_e}")
90
 
91
 
92
+ # Function for the main image processing call
93
  def generate(text, file_name, model="gemini-2.0-flash-exp"):
94
  """
95
+ Sends the image and prompt to Gemini and processes the streamed response.
96
+ This function is used for the main user request (editing, analysis, etc.).
 
97
  """
98
  api_key = os.environ.get("geminigoogle")
99
  if not api_key:
100
+ raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")
 
101
 
102
+ client = genai.Client(api_key=api_key)
103
+ uploaded_files = [] # Keep track of uploaded files for cleanup
104
+ temp_output_image_path = None # Keep track of generated temp image for cleanup
 
 
 
 
 
105
 
106
  try:
107
+ # Upload the file for the main generation call
108
+ uploaded_files = [client.files.upload(file=file_name)]
109
+ print(f"Uploaded file for generation: {uploaded_files[0].uri}")
110
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
 
112
  contents = [
113
  types.Content(
114
  role="user",
115
  parts=[
116
  types.Part.from_uri(
117
+ file_uri=uploaded_files[0].uri,
118
+ mime_type=uploaded_files[0].mime_type,
119
  ),
120
+ types.Part.from_text(text=text),
121
  ],
122
  ),
123
  ]
124
 
 
125
  generate_content_config = types.GenerateContentConfig(
126
  temperature=1,
127
  top_p=0.95,
128
  top_k=40,
129
  max_output_tokens=8192,
130
+ response_modalities=["image", "text"], # Expecting potentially image and text
131
+ response_mime_type="text/plain",
132
  )
133
 
134
  text_response = ""
135
+ image_path = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ # Use NamedTemporaryFile with delete=False because we need to return the path
138
+ # We will handle deletion explicitly later.
139
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
140
+ temp_output_image_path = tmp.name
141
+
142
+ print("Starting generation stream...")
143
+ # Stream the response
144
+ for chunk in client.models.generate_content_stream(
145
+ model=model,
146
+ contents=contents,
147
+ config=generate_content_config,
148
+ ):
149
+ if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
150
+ continue
151
+
152
+ # Process each part in the chunk
153
+ for part in chunk.candidates[0].content.parts:
154
+ # Check for text parts
155
+ text_part = getattr(part, "text", "")
156
+ if text_part:
157
+ text_response += text_part
158
+
159
+ # Check for inline image data
160
+ if part.inline_data:
161
+ print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}")
162
+ save_binary_file(temp_output_image_path, part.inline_data.data)
163
+ image_path = temp_output_image_path # Set the output image path
164
+ # Note: If the model sends multiple images, this will only save the last one received in a part.
165
+ # For typical use cases where one image is expected, this is fine.
166
+ # If multiple images could be in different parts of the *same* chunk,
167
+ # you'd need more complex handling (e.g., saving each to a separate file).
168
+ # If the model sends an image and *then* more text, the loop continues.
169
+ # We set image_path here and let the loop finish collecting text.
170
+
171
+ print("Generation stream finished.")
172
+ # The loop finishes after processing all parts of all chunks.
173
+
174
+ # Check if an image was actually saved, otherwise set image_path to None
175
+ if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0:
176
+ print("No valid image data was received or saved.")
177
+ image_path = None # Ensure image_path is None if no image data was received/saved
178
+
179
+ return image_path, text_response.strip() # Return the path to the saved image (or None) and the collected text
180
 
181
+ except Exception as e:
182
+ print(f"Error during main generation API call: {e}")
183
+ # Ensure temporary files created before the error are cleaned up
184
+ if temp_output_image_path and os.path.exists(temp_output_image_path):
185
+ os.remove(temp_output_image_path)
186
+ raise e # Re-raise the exception after cleanup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  finally:
189
+ # Clean up uploaded files from the generation call
190
+ for file in uploaded_files:
191
+ try:
192
+ client.files.delete(name=file.name)
193
+ print(f"Deleted uploaded file after generation: {file.name}")
194
+ except Exception as cleanup_e:
195
+ print(f"Error deleting uploaded file {file.name}: {cleanup_e}")
196
+
197
+ # Main processing function for Gradio
198
+ def process_image_and_prompt(composite_pil, prompt):
199
+ composite_path = None # Initialize input temp file path for finally block
200
+ output_image_path = None # Initialize output temp file path for finally block
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  try:
202
+ # 1. Save the input PIL image to a temporary file
203
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
204
  composite_path = tmp.name
205
+ # Ensure image is saved in a format compatible with Gemini, convert if necessary
206
+ if composite_pil.mode == "RGBA":
207
+ # Convert RGBA to RGB if necessary, as some models prefer RGB
208
+ # Or handle alpha channel depending on model capabilities.
209
+ # For simplicity here, saving as PNG should preserve alpha,
210
+ # but Gemini might interpret it differently. Let's save as PNG.
211
+ composite_pil.save(composite_path, format="PNG")
212
+ else:
213
+ composite_pil.save(composite_path, format="PNG") # Save as PNG by default
214
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ file_name = composite_path # This is the path to the saved input image file
217
+ model = "gemini-2.0-flash-exp" # Specify the model here
 
218
 
219
+ # 2. Call get_image_tags to get tags from the original image
220
+ tagging_prompt = "Analyze this image. Provide a JSON object containing a single key, 'tags', whose value is a JSON array of strings, representing relevant keywords or tags for the image content. Example: {\"tags\": [\"apple\", \"fruit\", \"red\"]}. Provide ONLY the JSON object and nothing else."
221
+ tag_json_string = get_image_tags(file_name, tagging_prompt, model=model)
222
 
223
+ # 3. Call generate for the main image processing based on the user prompt
224
+ # This function returns the path to a generated image (if any) and text response
225
+ output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model)
 
226
 
227
+ # 4. Combine the tag JSON string and the main text response
228
+ # Format the output clearly
229
+ final_text_output = f"Original Image Tags (JSON): {tag_json_string}\n\n---\n\nGemini Response:\n{main_text_response}"
230
 
231
+ # 5. Prepare the image output for the Gradio gallery
232
+ result_img = None
233
+ image_output_list = []
234
+ if output_image_path and os.path.exists(output_image_path):
235
+ try:
236
+ result_img = Image.open(output_image_path)
237
+ # Convert to RGB for display if it's RGBA (Gradio Gallery often expects RGB)
238
+ if result_img.mode == "RGBA":
239
+ result_img = result_img.convert("RGB")
240
+ image_output_list = [result_img] # Add the image to the list for the gallery
241
+ except Exception as img_e:
242
+ print(f"Error opening generated image {output_image_path}: {img_e}")
243
+ # If image opening fails, don't return an image
244
+ image_output_list = []
245
+ # Append error to text response
246
+ final_text_output += f"\n\n---\n\nError loading generated image: {img_e}"
247
+
248
+
249
+ # 6. Return results to Gradio
250
+ return image_output_list, final_text_output
251
 
252
  except Exception as e:
253
+ # Catch any exceptions during the process
254
+ print(f"An error occurred during processing: {e}")
255
+ # Use Gradio's error handling to display a message in the UI
256
+ raise gr.Error(f"Processing failed: {e}", duration=5)
257
 
258
  finally:
259
+ # 7. Clean up temporary files
260
+ # Clean up the temporary input file
261
  if composite_path and os.path.exists(composite_path):
262
+ try:
263
+ os.remove(composite_path)
264
+ print(f"Deleted input temporary file: {composite_path}")
265
+ except Exception as cleanup_e:
266
+ print(f"Error deleting input temporary file {composite_path}: {cleanup_e}")
267
+
268
+ # Clean up the temporary output image file created by generate()
269
+ # Note: generate() might have already deleted the *uploaded* file via API,
270
+ # but this handles the local file saved from inline_data.
271
+ if output_image_path and os.path.exists(output_image_path):
272
  try:
273
+ os.remove(output_image_path)
274
+ print(f"Deleted output temporary file: {output_image_path}")
275
+ except Exception as cleanup_e:
276
+ print(f"Error deleting output temporary file {output_image_path}: {cleanup_e}")
 
 
 
 
 
 
 
 
 
277
 
278
 
279
+ # Gradio інтерфейс (unchanged from your original code, except connection)
280
  with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
281
  ) as demo:
282
  gr.HTML(
283
  """
284
  <div class="header-container">
285
  <div>
286
+ <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
287
  </div>
288
  <div>
289
+ <h1>Gemini for Image Editing</h1>
290
+ <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
291
+ <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
292
+ <a href="https://aistudio.google.com/apikey">Get an API Key</a> |
293
+ Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
294
  </div>
295
  </div>
296
  """
297
  )
298
 
299
+ # Прибираємо секцію API Configuration або змінюємо її опис, оскільки ключ більше не вводиться
300
  with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
301
  gr.Markdown("""
302
+ - **Ваш Gemini API ключ має бути збережений у змінній оточення `geminigoogle` в налаштуваннях Hugging Face Space.**
303
+ - ❗ Іноді модель повертає текст замість зображення.
 
304
  """)
305
 
306
  with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
307
  gr.Markdown("""
308
  ### 📌 Usage
309
+ - Upload an image and enter a prompt to generate outputs.
310
+ - The response will include generated tags for the original image (in JSON format) and Gemini's text output.
311
+ - If an edited image is returned, it will appear in the gallery. If not, only text will appear.
312
+ - Upload Only PNG Image
 
 
 
313
  - ❌ **Do not use NSFW images!**
314
  """)
315
 
 
318
  image_input = gr.Image(
319
  type="pil",
320
  label="Upload Image",
321
+ image_mode="RGBA",
322
  elem_id="image-input",
323
  elem_classes="upload-box"
324
  )
325
  prompt_input = gr.Textbox(
326
  lines=2,
327
+ placeholder="Enter prompt here (e.g., 'change text to \"HELLO\"', 'remove the background')",
328
+ label="Prompt for Gemini",
329
  elem_classes="prompt-input"
330
  )
331
+ submit_btn = gr.Button("Generate", elem_classes="generate-btn")
332
 
333
  with gr.Column(elem_classes="output-column"):
334
+ output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", allow_preview=True)
335
  output_text = gr.Textbox(
336
+ label="Gemini Output (Tags + Response)",
337
+ placeholder="Original image tags (JSON) and Gemini's response will appear here.",
338
  elem_classes="output-text",
339
+ lines=10 # Give more space for combined output
 
340
  )
341
 
342
+ # Connect the button click to the updated processing function
343
  submit_btn.click(
344
  fn=process_image_and_prompt,
345
  inputs=[image_input, prompt_input],
 
348
 
349
  gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
350
 
 
351
  examples = [
352
  ["data/1.webp", 'change text to "AMEER"'],
353
  ["data/2.webp", "remove the spoon from hand only"],