Gemini-Image-Edit

Running

App Files Files Community

Varhal commited on May 2, 2025

Commit

2c1f8ae

verified ·

1 Parent(s): b35c170

updated logic to tag and generate flow

Browse files

Files changed (1) hide show

app.py +238 -282

app.py CHANGED Viewed

@@ -7,351 +7,309 @@ from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
 import base64
 import mimetypes
-# Make sure you have installed the google-generativeai library
-# pip install google-generativeai Pillow gradio
 from google import genai
-from google.genai import types # Using the newer client API structure if available
-# Function to save binary file (kept as is)
 def save_binary_file(file_name, data):
-    """Saves binary data to a specified file."""
     try:
-        with open(file_name, "wb") as f:
-            f.write(data)
-        # print(f"Binary data saved successfully to {file_name}")
     except Exception as e:
-        print(f"Error saving binary data to {file_name}: {e}")
-        raise # Re-raise the exception after printing
-# Modified generate function to handle stream and collect both text and image
 def generate(text, file_name, model="gemini-2.0-flash-exp"):
     """
-    Sends image and text prompt to the Gemini model and streams the response.
-    Collects all text parts and saves the first image part encountered.
-    Returns the path to the generated image and the accumulated text response.
     """
     api_key = os.environ.get("geminigoogle")
     if not api_key:
-        # Use gr.Error for Gradio interface display
-        raise gr.Error("GEMINI_API_KEY environment variable (geminigoogle) not set.", duration=10)
-    # Configure the generative AI library
-    # This is the recommended way to configure the API key
-    genai.configure(api_key=api_key)
-    client = None # Placeholder for the client if needed for file upload
-    uploaded_file = None # To store the reference to the uploaded file
-    temp_generated_img_path = None # Path for saving generated image data
     try:
-        # Attempt to use the genai.Client if available for file upload
-        # This is the method used in your original code, so we'll keep it.
-        # If this fails, consider falling back to models directly if they accept paths/bytes.
-        try:
-            client = genai.Client(api_key=api_key)
-            print("genai.Client initialized successfully.")
-        except Exception as e:
-            print(f"Warning: Failed to initialize genai.Client ({e}). Attempting direct model access.")
-            # In some library versions, you might interact directly via genai.get_model
-            # For this specific code structure using client.files.upload, the Client is needed.
-            # If the Client fails, file upload will likely fail too.
-            client = None
-            raise gr.Error(f"Failed to initialize Gemini client: {e}", duration=10)
-        # Upload the input file to Google's service using the client
-        if client and hasattr(client, 'files'):
-            try:
-                print(f"Attempting to upload input file: {file_name}")
-                # Use a loop with retry for file upload as it can sometimes be flaky
-                upload_attempts = 3
-                for i in range(upload_attempts):
-                    try:
-                        uploaded_file = client.files.upload(file=file_name)
-                        print(f"Input file uploaded successfully: {uploaded_file.uri}")
-                        break # Exit retry loop on success
-                    except Exception as upload_e:
-                         if i < upload_attempts - 1:
-                              print(f"Upload attempt {i+1}/{upload_attempts} failed: {upload_e}. Retrying...")
-                              time.sleep(1 * (i + 1)) # Simple backoff
-                         else:
-                              raise gr.Error(f"Failed to upload input file after multiple attempts: {upload_e}", duration=10)
-            except Exception as e:
-                 # This catches errors from the upload loop
-                 raise gr.Error(f"Fatal error during input file upload: {e}", duration=10)
-        else:
-             raise gr.Error("Gemini client or file upload capability not available.", duration=10)
-        # Construct the contents for the model input (image + text)
         contents = [
             types.Content(
                 role="user",
                 parts=[
                     types.Part.from_uri(
-                        file_uri=uploaded_file.uri,
-                        mime_type=uploaded_file.mime_type,
                     ),
-                    types.Part.from_text(text=text), # The combined text prompt
                 ],
             ),
         ]
-        # Configuration for generating content
         generate_content_config = types.GenerateContentConfig(
             temperature=1,
             top_p=0.95,
             top_k=40,
             max_output_tokens=8192,
-            response_modalities=["image", "text"], # Crucial: Ask for BOTH image and text
-            response_mime_type="text/plain", # Still want text parts as plain text
         )
         text_response = ""
-        image_path = None # Store the path to the *first* generated image
-        print(f"\n--- Sending Request to Model '{model}' ---")
-        print(f"Prompt: {text}")
-        print(f"Input Image URI: {uploaded_file.uri}")
-        # Create a temporary file to save the generated image data
-        # This file needs to exist before streaming data into it.
-        try:
-            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
-                 temp_generated_img_path = tmp.name
-            print(f"Temporary path created for generated image: {temp_generated_img_path}")
-            # Get the model instance
-            # Use the model name directly with get_model
-            model_instance = genai.get_model(model)
-            print("Model instance obtained.")
-            # Stream the response from the model
-            print("Starting response stream...")
-            # Use the model instance's generate_content_stream method
-            stream = model_instance.generate_content_stream(
-                contents=contents,
-                generation_config=generate_content_config, # Use generation_config
-            )
-            for chunk in stream:
-                # Check if the chunk and candidates are valid
-                if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
-                    # print("Skipping empty or invalid chunk.") # Optional: uncomment for verbose logging
-                    continue
-                # Process each part within the candidate
-                for part in chunk.candidates[0].content.parts:
-                    # Check for text parts
-                    text_part = getattr(part, "text", "")
-                    if text_part:
-                        # Append text - the model's response might come in multiple text parts
-                        text_response += text_part
-                        # print(f"Received text part: {text_part[:50]}...") # Optional: log partial text
-                    # Check for inline image data parts
-                    if hasattr(part, 'inline_data') and part.inline_data and part.inline_data.data:
-                        # Only save the *first* image data encountered during the stream
-                        if image_path is None:
-                            print(f"Received image data of mime type {part.inline_data.mime_type}")
-                            try:
-                                # Save the binary image data to our temporary file
-                                save_binary_file(temp_generated_img_path, part.inline_data.data)
-                                # Store the path to the saved file
-                                image_path = temp_generated_img_path
-                                print(f"Image data saved to: {image_path}")
-                                # IMPORTANT: DO NOT BREAK HERE. Continue processing the stream
-                                # to capture all text parts that might follow the image.
-                            except Exception as e:
-                                print(f"Error saving image data to {temp_generated_img_path}: {e}")
-                                # If saving fails, image_path remains None
-            print("Response stream complete.")
-            print(f"Final Image Path: {image_path}")
-            print(f"Accumulated Text Response Length: {len(text_response)}")
-        except Exception as e:
-            print(f"\nAn error occurred during content generation stream: {e}")
-            # Clean up the temporary generated image file if it was created but not yet assigned to image_path
-            if temp_generated_img_path and os.path.exists(temp_generated_img_path) and image_path is None:
-                 try:
-                      os.remove(temp_generated_img_path)
-                      print(f"Cleaned up temp generated file due to error: {temp_generated_img_path}")
-                 except Exception as ce:
-                      print(f"Error cleaning up temp generated file {temp_generated_img_path}: {ce}")
-            # Re-raise the exception
-            raise gr.Error(f"Gemini generation error: {e}", duration=10)
     finally:
-         # Always delete the uploaded file from Google's service
-         if uploaded_file and client and hasattr(client, 'files'):
-             try:
-                 print(f"Deleting uploaded file: {uploaded_file.name}")
-                 client.files.delete(uploaded_file.name)
-                 print("Uploaded file deleted.")
-             except Exception as e:
-                 print(f"Error deleting uploaded file {uploaded_file.name}: {e}")
-         # Note: The temp_generated_img_path is cleaned up in process_image_and_prompt
-         # if it was successfully returned and processed. If an error occurs
-         # after temp_generated_img_path is created but before it's returned,
-         # the except block above handles cleanup.
-    # Return the path to the saved image and the accumulated text
-    return image_path, text_response
-# Modified function to prepare input and handle output for Gradio
-def process_image_and_prompt(composite_pil: Image.Image, prompt: str):
-    """
-    Handles the Gradio input (PIL Image, prompt), prepares the model input,
-    calls the generate function, and formats the output for Gradio.
-    Constructs a combined prompt asking for both analysis and generation/edit.
-    """
-    composite_path = None # Path for the temporary input image file
-    temp_generated_image_path_returned = None # Path for the temporary generated image file returned by generate
     try:
-        # 1. Save the input PIL image to a temporary file that can be uploaded
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
             composite_path = tmp.name
-            # Ensure image is RGB or RGBA before saving as PNG for compatibility
-            if composite_pil.mode not in ["RGB", "RGBA"]:
-                 composite_pil = composite_pil.convert("RGBA") # Use RGBA for potential transparency
-            composite_pil.save(composite_path)
-        print(f"Input image saved to temporary path for upload: {composite_path}")
-        # 2. Construct the combined prompt for the model
-        # This prompt tells the model to FIRST describe/tag the image,
-        # and THEN perform the requested image task (edit/generation).
-        # The phrasing can influence the model's response format.
-        # Let's be explicit: Ask for description and tags first, then the main task.
-        combined_prompt = f"""
-Analyze the input image carefully.
-Provide a detailed description of the image, including key objects, actions, setting, and style.
-Then, provide a comma-separated list of relevant tags for the input image.
-Structure this analysis clearly, for example:
-Description: [Detailed description here]
-Tags: [tag1, tag2, tag3, ...]
-After the analysis, perform the following task based on the input image and these instructions:
-{prompt}
-"""
-        # You can adjust the formatting of the combined_prompt as needed.
-        # The goal is to clearly tell the model you want analysis text *first*
-        # or at least included in the text response, followed by the image task.
-        print(f"\n--- Combined Prompt Sent to Model ---")
-        print(combined_prompt)
-        # 3. Call the generate function with the combined prompt and the input image file
-        # generate will return the path to the generated image (if any) and the full text response from the stream
-        # This is where the single API request happens, processing input image+text and yielding output image+text.
-        temp_generated_image_path_returned, text_response = generate(text=combined_prompt, file_name=composite_path, model="gemini-2.0-flash-exp")
-        # 4. Process the results from the generate function
-        result_img = None
-        if temp_generated_image_path_returned and os.path.exists(temp_generated_image_path_returned):
-            try:
-                # Load the generated image file into a PIL Image object
-                result_img = Image.open(temp_generated_image_path_returned)
-                # Convert to RGB if it's RGBA for compatibility with Gradio's Gallery
-                # Gradio Gallery often expects RGB
-                if result_img.mode == "RGBA":
-                    result_img = result_img.convert("RGB")
-                print(f"\nGenerated image loaded successfully from {temp_generated_image_path_returned}.")
-            except Exception as img_e:
-                 print(f"\nError loading generated image from {temp_generated_image_path_returned}: {img_e}")
-                 # If loading fails, treat it as if no image was successfully generated
-                 result_img = None
-        else:
-            print("\nNo valid generated image path returned or file not found after generation.")
-            # The model might fail to generate an image but still provide text
-        # 5. Prepare the output for Gradio
-        # Gradio's Gallery expects a list of images or None
-        output_gallery_content = [result_img] if result_img else None
-        # The text_response will contain the accumulated text from the model,
-        # which *should* now include the description/tags because we asked for them in the prompt,
-        # as well as any other textual output related to the edit/generation task.
-        print("\n--- Final Output Prepared for Gradio ---")
-        print("Image Generated Successfully:", result_img is not None)
-        print(f"Text Response Length: {len(text_response)}")
-        print("Text Response (showing first 500 chars):\n", text_response[:500] + ('...' if len(text_response) > 500 else ''))
-        return output_gallery_content, text_response
     except Exception as e:
-        # Exceptions from generate or above are caught here.
-        print(f"\nAn error occurred in process_image_and_prompt: {e}")
-        # Use gr.Error to display the error message nicely in the Gradio interface
-        raise gr.Error(f"Processing Error: {e}", duration=10)
     finally:
-        # 6. Clean up temporary files regardless of success or failure
-        # Clean up the temporary input image file that was uploaded
         if composite_path and os.path.exists(composite_path):
              try:
-                  os.remove(composite_path)
-                  print(f"Removed temporary input file: {composite_path}")
-             except Exception as ce:
-                  print(f"Error removing input temp file {composite_path}: {ce}")
-        # Clean up the temporary generated image file *if it was created* and returned
-        # The path `temp_generated_image_path_returned` holds the path returned by generate.
-        if temp_generated_image_path_returned and os.path.exists(temp_generated_image_path_returned):
-             try:
-                 os.remove(temp_generated_image_path_returned)
-                 print(f"Removed temporary generated file: {temp_generated_image_path_returned}")
-             except Exception as ge:
-                 print(f"Error removing generated temp file {temp_generated_image_path_returned}: {ge}")
-# Gradio interface - Keep this section mostly the same
 with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
     ) as demo:
     gr.HTML(
     """
     <div class="header-container">
       <div>
-          <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
       </div>
       <div>
-          <h1>Gemini for Image Editing</h1>
-          <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
-           <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
-           <a href="https://aistudio.google.com/apikey">Get an API Key</a> |
-           Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
       </div>
     </div>
     """
     )
     with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
         gr.Markdown("""
-    - **Your Gemini API key must be stored in the environment variable `geminigoogle` in your Hugging Face Space settings (Settings -> Repository secrets).**
-    - ❗ Sometimes the model may return only text or encounter errors.
-    - The text output box below should contain the model's analysis of the *input image* (description and tags) followed by any commentary related to the edit/generation.
     """)
     with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
         gr.Markdown("""
     ### 📌 Usage
-      - Upload an image and enter a prompt describing the *image edit or generation* you want.
-      - The model will analyze the input image and attempt to perform the edit/generation.
-      - The generated image will appear in the gallery (if successful).
-      - The text output will contain:
-          1. A description and tags of the **input image**.
-          2. Any commentary from the model about the edit/generation task.
-      - Upload Only PNG Image (recommended for transparent edits, but JPG often works)
       - ❌ **Do not use NSFW images!**
     """)
@@ -360,29 +318,28 @@ with gr.Blocks( # css_paths="style.css", # Тимчасово закоменту
             image_input = gr.Image(
                 type="pil",
                 label="Upload Image",
-                image_mode="RGBA", # Use RGBA to handle transparency
                 elem_id="image-input",
                 elem_classes="upload-box"
             )
             prompt_input = gr.Textbox(
                 lines=2,
-                placeholder="Enter your image edit or generation prompt here (e.g., 'add a red hat', 'change background to a beach', 'make the eyes green').",
-                label="Image Task Prompt",
                 elem_classes="prompt-input"
             )
-            submit_btn = gr.Button("Generate & Analyze", elem_classes="generate-btn") # Button text reflects dual task
         with gr.Column(elem_classes="output-column"):
-            output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", preview=True)
             output_text = gr.Textbox(
-                label="Gemini Text Output (Input Image Analysis + Edit Commentary)",
-                placeholder="Analysis of the input image (description, tags) and commentary on the image task will appear here.",
                 elem_classes="output-text",
-                lines=10, # Give more space for the text output
-                show_copy_button=True # Allow easy copying of the text
             )
-    # Set up the interaction
     submit_btn.click(
         fn=process_image_and_prompt,
         inputs=[image_input, prompt_input],
@@ -391,7 +348,6 @@ with gr.Blocks( # css_paths="style.css", # Тимчасово закоменту
     gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
-    # Examples (adjust if necessary based on new prompt structure)
     examples = [
         ["data/1.webp", 'change text to "AMEER"'],
         ["data/2.webp", "remove the spoon from hand only"],

 import gradio as gr
 import base64
 import mimetypes
 from google import genai
+from google.genai import types
+# Helper function to save binary data
 def save_binary_file(file_name, data):
+    with open(file_name, "wb") as f:
+        f.write(data)
+# Function to get tags from an image using Gemini
+def get_image_tags(file_name, text_prompt, model="gemini-2.0-flash-exp"):
+    """
+    Analyzes an image using a text prompt and returns the text response.
+    Used specifically for generating tags in this case.
+    """
+    api_key = os.environ.get("geminigoogle")
+    if not api_key:
+        # Return a clear message if API key is missing
+        return "Error: GEMINI_API_KEY environment variable (geminigoogle) not set for tagging."
+    client = genai.Client(api_key=api_key)
+    uploaded_files = [] # Keep track of uploaded files for cleanup
     try:
+        # Upload the file
+        uploaded_files = [client.files.upload(file=file_name)]
+        print(f"Uploaded file for tagging: {uploaded_files[0].uri}")
+        contents = [
+            types.Content(
+                role="user",
+                parts=[
+                    types.Part.from_uri(
+                        file_uri=uploaded_files[0].uri,
+                        mime_type=uploaded_files[0].mime_type,
+                    ),
+                    types.Part.from_text(text=text_prompt),
+                ],
+            ),
+        ]
+        # Configure for text-only response (focus on getting JSON)
+        generate_content_config = types.GenerateContentConfig(
+            temperature=0.5, # Lower temperature might give more focused tags
+            top_p=0.95,
+            top_k=40,
+            max_output_tokens=1024, # Tags shouldn't need many tokens
+            response_modalities=["text"], # Explicitly ask for text
+            response_mime_type="text/plain", # Expect plain text
+        )
+        # Use generate_content for a single text response
+        response = client.models.generate_content(
+            model=model,
+            contents=contents,
+            config=generate_content_config,
+        )
+        tag_response = ""
+        if response and response.candidates and response.candidates[0].content and response.candidates[0].content.parts:
+             # Concatenate all text parts from the response
+             for part in response.candidates[0].content.parts:
+                 if hasattr(part, 'text'):
+                      tag_response += part.text
+        else:
+            tag_response = "Could not generate tags."
+        return tag_response
     except Exception as e:
+        print(f"Error during tagging API call: {e}")
+        # Return an error message if tagging fails
+        return f"Error generating tags: {e}"
+    finally:
+        # Clean up uploaded files from the tagging call
+        for file in uploaded_files:
+            try:
+                client.files.delete(name=file.name)
+                print(f"Deleted uploaded file after tagging: {file.name}")
+            except Exception as cleanup_e:
+                print(f"Error deleting uploaded file {file.name}: {cleanup_e}")
+# Function for the main image processing call
 def generate(text, file_name, model="gemini-2.0-flash-exp"):
     """
+    Sends the image and prompt to Gemini and processes the streamed response.
+    This function is used for the main user request (editing, analysis, etc.).
     """
     api_key = os.environ.get("geminigoogle")
     if not api_key:
+        raise ValueError("GEMINI_API_KEY environment variable (geminigoogle) not set.")
+    client = genai.Client(api_key=api_key)
+    uploaded_files = [] # Keep track of uploaded files for cleanup
+    temp_output_image_path = None # Keep track of generated temp image for cleanup
     try:
+        # Upload the file for the main generation call
+        uploaded_files = [client.files.upload(file=file_name)]
+        print(f"Uploaded file for generation: {uploaded_files[0].uri}")
         contents = [
             types.Content(
                 role="user",
                 parts=[
                     types.Part.from_uri(
+                        file_uri=uploaded_files[0].uri,
+                        mime_type=uploaded_files[0].mime_type,
                     ),
+                    types.Part.from_text(text=text),
                 ],
             ),
         ]
         generate_content_config = types.GenerateContentConfig(
             temperature=1,
             top_p=0.95,
             top_k=40,
             max_output_tokens=8192,
+            response_modalities=["image", "text"], # Expecting potentially image and text
+            response_mime_type="text/plain",
         )
         text_response = ""
+        image_path = None
+        # Use NamedTemporaryFile with delete=False because we need to return the path
+        # We will handle deletion explicitly later.
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+             temp_output_image_path = tmp.name
+        print("Starting generation stream...")
+        # Stream the response
+        for chunk in client.models.generate_content_stream(
+            model=model,
+            contents=contents,
+            config=generate_content_config,
+        ):
+            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
+                continue
+            # Process each part in the chunk
+            for part in chunk.candidates[0].content.parts:
+                 # Check for text parts
+                 text_part = getattr(part, "text", "")
+                 if text_part:
+                     text_response += text_part
+                 # Check for inline image data
+                 if part.inline_data:
+                     print(f"Received image data with mime type {part.inline_data.mime_type}. Saving to {temp_output_image_path}")
+                     save_binary_file(temp_output_image_path, part.inline_data.data)
+                     image_path = temp_output_image_path # Set the output image path
+                     # Note: If the model sends multiple images, this will only save the last one received in a part.
+                     # For typical use cases where one image is expected, this is fine.
+                     # If multiple images could be in different parts of the *same* chunk,
+                     # you'd need more complex handling (e.g., saving each to a separate file).
+                     # If the model sends an image and *then* more text, the loop continues.
+                     # We set image_path here and let the loop finish collecting text.
+        print("Generation stream finished.")
+        # The loop finishes after processing all parts of all chunks.
+        # Check if an image was actually saved, otherwise set image_path to None
+        if not image_path or not os.path.exists(image_path) or os.path.getsize(image_path) == 0:
+             print("No valid image data was received or saved.")
+             image_path = None # Ensure image_path is None if no image data was received/saved
+        return image_path, text_response.strip() # Return the path to the saved image (or None) and the collected text
+    except Exception as e:
+        print(f"Error during main generation API call: {e}")
+        # Ensure temporary files created before the error are cleaned up
+        if temp_output_image_path and os.path.exists(temp_output_image_path):
+             os.remove(temp_output_image_path)
+        raise e # Re-raise the exception after cleanup
     finally:
+        # Clean up uploaded files from the generation call
+        for file in uploaded_files:
+            try:
+                client.files.delete(name=file.name)
+                print(f"Deleted uploaded file after generation: {file.name}")
+            except Exception as cleanup_e:
+                print(f"Error deleting uploaded file {file.name}: {cleanup_e}")
+# Main processing function for Gradio
+def process_image_and_prompt(composite_pil, prompt):
+    composite_path = None # Initialize input temp file path for finally block
+    output_image_path = None # Initialize output temp file path for finally block
     try:
+        # 1. Save the input PIL image to a temporary file
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
             composite_path = tmp.name
+            # Ensure image is saved in a format compatible with Gemini, convert if necessary
+            if composite_pil.mode == "RGBA":
+                # Convert RGBA to RGB if necessary, as some models prefer RGB
+                # Or handle alpha channel depending on model capabilities.
+                # For simplicity here, saving as PNG should preserve alpha,
+                # but Gemini might interpret it differently. Let's save as PNG.
+                 composite_pil.save(composite_path, format="PNG")
+            else:
+                 composite_pil.save(composite_path, format="PNG") # Save as PNG by default
+        file_name = composite_path # This is the path to the saved input image file
+        model = "gemini-2.0-flash-exp" # Specify the model here
+        # 2. Call get_image_tags to get tags from the original image
+        tagging_prompt = "Analyze this image. Provide a JSON object containing a single key, 'tags', whose value is a JSON array of strings, representing relevant keywords or tags for the image content. Example: {\"tags\": [\"apple\", \"fruit\", \"red\"]}. Provide ONLY the JSON object and nothing else."
+        tag_json_string = get_image_tags(file_name, tagging_prompt, model=model)
+        # 3. Call generate for the main image processing based on the user prompt
+        # This function returns the path to a generated image (if any) and text response
+        output_image_path, main_text_response = generate(text=prompt, file_name=file_name, model=model)
+        # 4. Combine the tag JSON string and the main text response
+        # Format the output clearly
+        final_text_output = f"Original Image Tags (JSON): {tag_json_string}\n\n---\n\nGemini Response:\n{main_text_response}"
+        # 5. Prepare the image output for the Gradio gallery
+        result_img = None
+        image_output_list = []
+        if output_image_path and os.path.exists(output_image_path):
+             try:
+                 result_img = Image.open(output_image_path)
+                 # Convert to RGB for display if it's RGBA (Gradio Gallery often expects RGB)
+                 if result_img.mode == "RGBA":
+                     result_img = result_img.convert("RGB")
+                 image_output_list = [result_img] # Add the image to the list for the gallery
+             except Exception as img_e:
+                 print(f"Error opening generated image {output_image_path}: {img_e}")
+                 # If image opening fails, don't return an image
+                 image_output_list = []
+                 # Append error to text response
+                 final_text_output += f"\n\n---\n\nError loading generated image: {img_e}"
+        # 6. Return results to Gradio
+        return image_output_list, final_text_output
     except Exception as e:
+        # Catch any exceptions during the process
+        print(f"An error occurred during processing: {e}")
+        # Use Gradio's error handling to display a message in the UI
+        raise gr.Error(f"Processing failed: {e}", duration=5)
     finally:
+        # 7. Clean up temporary files
+        # Clean up the temporary input file
         if composite_path and os.path.exists(composite_path):
+            try:
+                os.remove(composite_path)
+                print(f"Deleted input temporary file: {composite_path}")
+            except Exception as cleanup_e:
+                print(f"Error deleting input temporary file {composite_path}: {cleanup_e}")
+        # Clean up the temporary output image file created by generate()
+        # Note: generate() might have already deleted the *uploaded* file via API,
+        # but this handles the local file saved from inline_data.
+        if output_image_path and os.path.exists(output_image_path):
              try:
+                 os.remove(output_image_path)
+                 print(f"Deleted output temporary file: {output_image_path}")
+             except Exception as cleanup_e:
+                 print(f"Error deleting output temporary file {output_image_path}: {cleanup_e}")
+# Gradio інтерфейс (unchanged from your original code, except connection)
 with gr.Blocks( # css_paths="style.css", # Тимчасово закоментували цей рядок
     ) as demo:
     gr.HTML(
     """
     <div class="header-container">
       <div>
+        <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Gemini logo">
       </div>
       <div>
+        <h1>Gemini for Image Editing</h1>
+        <p>Powered by <a href="https://gradio.app/">Gradio</a>⚡️|
+         <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a> this Repo |
+         <a href="https://aistudio.google.com/apikey">Get an API Key</a> |
+         Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
       </div>
     </div>
     """
     )
+    # Прибираємо секцію API Configuration або змінюємо її опис, оскільки ключ більше не вводиться
     with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"):
         gr.Markdown("""
+    - **Ваш Gemini API ключ має бути збережений у змінній оточення `geminigoogle` в налаштуваннях Hugging Face Space.**
+    - ❗ Іноді модель повертає текст замість зображення.
     """)
     with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"):
         gr.Markdown("""
     ### 📌 Usage
+      - Upload an image and enter a prompt to generate outputs.
+      - The response will include generated tags for the original image (in JSON format) and Gemini's text output.
+      - If an edited image is returned, it will appear in the gallery. If not, only text will appear.
+      - Upload Only PNG Image
       - ❌ **Do not use NSFW images!**
     """)
             image_input = gr.Image(
                 type="pil",
                 label="Upload Image",
+                image_mode="RGBA",
                 elem_id="image-input",
                 elem_classes="upload-box"
             )
             prompt_input = gr.Textbox(
                 lines=2,
+                placeholder="Enter prompt here (e.g., 'change text to \"HELLO\"', 'remove the background')",
+                label="Prompt for Gemini",
                 elem_classes="prompt-input"
             )
+            submit_btn = gr.Button("Generate", elem_classes="generate-btn")
         with gr.Column(elem_classes="output-column"):
+            output_gallery = gr.Gallery(label="Generated Image Output", elem_classes="output-gallery", allow_preview=True)
             output_text = gr.Textbox(
+                label="Gemini Output (Tags + Response)",
+                placeholder="Original image tags (JSON) and Gemini's response will appear here.",
                 elem_classes="output-text",
+                lines=10 # Give more space for combined output
             )
+    # Connect the button click to the updated processing function
     submit_btn.click(
         fn=process_image_and_prompt,
         inputs=[image_input, prompt_input],
     gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
     examples = [
         ["data/1.webp", 'change text to "AMEER"'],
         ["data/2.webp", "remove the spoon from hand only"],